Branch: refs/heads/master Author: Enrico Tröger enrico.troeger@uvena.de Committer: Enrico Tröger enrico.troeger@uvena.de Date: Sat, 06 Jul 2019 14:33:28 UTC Commit: 4ae1f41c23f828b5e08d7930a193550631e5181b https://github.com/geany/infrastructure/commit/4ae1f41c23f828b5e08d7930a1935...
Log Message: ----------- Add irclog.py script
This is used to render IRC logs as HTML on https://irc.geany.org/logs/.
Modified Paths: -------------- scripts/irclog.py
Modified: scripts/irclog.py 384 lines changed, 384 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,384 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# IRC Log parser +# +# Copyright 2010 Enrico Tröger <enrico(dot)troeger(at)uvena(dot)de> +# License: GPLv2 +# + +from html.entities import entitydefs +from time import ctime +import datetime +import re +import sys + + +CHANNEL = '#geany' +TITLE = 'IRC Logs for {} @ irc.freenode.net'.format(CHANNEL) + +MSG_TYPE_NORMAL = 1 +MSG_TYPE_STATUS = 2 +MSG_TYPE_ACTION = 3 + + +# 2010-01-31T14:09:29 <eht16> logs and stats are currently broken(i.e. not up2date), ... +# 2010-01-31T14:09:34 <ct|kyle> I did not think that would still work +# 2010-01-31T14:09:45 <ct|kyle> I figured that +# 2010-01-31T14:09:52 <eht16> I wrote a Geany plugin :) +REGEXP_MSG = re.compile(r'^(\d+)-([\d\w]+)-(\d+)[ T](\d+):(\d+):(\d+) <(\S+)> (.*)') + +# 2010-01-31T02:25:40 *** SweetGeany has joined #geany +# 2010-01-31T02:25:48 *** SweetGeany has left #geany +# 2010-01-31T02:26:57 *** SweetGeany has joined #geany +# 2010-01-31T03:25:54 *** _dmaphy_ has joined #geany +# 2010-01-31T03:27:36 *** dmaphy has quit IRC +# 2010-01-31T03:27:43 *** _dmaphy_ is now known as dmaphy +REGEXP_STATUS = re.compile(r'(\d+)-([\d\w]+)-(\d+)[ T](\d+):(\d+):(\d+) *** (\S+) ?(.*)') + +# 2010-01-31T14:17:18 * eht16 just noticed, 'make distcheck' is broken for Geany +REGEXP_ACTION = re.compile(r'(\d+)-([\d\w]+)-(\d+)[ T](\d+):(\d+):(\d+) * (\S+) ?(.*)') + + +HTML_TEMPLATE = '''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + +<head> + <title>{title}</title> + <meta http-equiv="content-type" content="text/html;charset=utf-8" /> + <meta name="generator" content="Geany 1.25" /> + <link rel='icon' href='/favicon.ico' type='image/x-icon' /> + <link rel='shortcut icon' href='/favicon.ico' /> + <style type="text/css"> + body {{ + font-size: 90%; + }} + h1, h2, h3 {{ + color: #70709f; clear: left; + }} + + .nick1 {{ + color: #00AA33; + }} + .nick2 {{ + color: #AA0000; + }} + .nick3 {{ + color: #005500; + }} + .nick4 {{ + color: #FF0077; + }} + .nick5 {{ + color: blue; + }} + .nick6 {{ + color: #8B008B; + }} + .nick7 {{ + color: #50507f; + }} + .nick8 {{ + color: #00008B; + }} + .nick9 {{ + color: #222222; + }} + + tr, td {{ + vertical-align: top; + }} + + img {{ + border: 0px; + }} + + .status {{ + font-size: 80%; + color: gray; + }} + + .action {{ + font-weight: bold; + }} + + #log {{ + border-collapse: collapse; + width: 100%; + border: 1px solid #efefef; + }} + + #log tr {{ + border-top: 1px solid #efefef; + }} + #log tr.head {{ + border-top-style: none; + }} + #log tr.dark {{ + background-color: #dfdfdf; + }} + + #log td, th {{ + font-family: Consolas, "Lucida Console", "Courier New", monospace; + padding: 0.2em 0.4em; + }} + + #log th {{ + border-bottom: 1px solid #C0C0C0; + }} + + #log .nick {{ + text-align: right; + border-right: 1px solid #C0C0C0; + }} + #log .msg {{ + border-right: 1px solid #efefef; line-height: 1.3em; + }} + #log .time {{ + border-left: 1px solid #efefef; + }} + </style> +</head> + +<body> + <h1>{title}</h1> + + <table id="log" style="clear:both"> + <tr class="head"> + <th>Time</th> + <th>Nick</th> + <th>Message</th> + </tr> + {content} + </table> + <p> </p> + <p>Stats generated by <a href="https://github.com/geany/infrastructure/blob/master/scripts/irclog.py">irclog.py</a> + on {ctime} (times in UTC{date})</p> + <p> + <a href="https://validator.w3.org/check?uri=referer"> + <img src="https://i18n.geany.org/button-xhtml.png" alt="Valid XHTML 1.0 Strict" /> + </a> + + <a href="https://www.geany.org"> + <img src="https://i18n.geany.org/button-geany.png" alt="Geany" /> + </a> + </p> +</body> +</html> +''' + + +class LogEntry: + + # ---------------------------------------------------------------------- + def __init__(self): + self.date = None + self.nick = None + self.msg = None + self.msg_type = MSG_TYPE_NORMAL + + # ---------------------------------------------------------------------- + def __str__(self): + return 'LogEntry: {}: {}: {}'.format(self.date, self.nick, self.msg) + + +######################################################################## +class IrcLogFormatter: + + # ---------------------------------------------------------------------- + def __init__(self, input_filename, output_filename): + self._input_filename = input_filename + self._output_filename = output_filename + self._log_lines = None + self._last_nick_index = 1 + self._entitydefs_inverted = dict() + self._badchars_regex = None + self._been_fixed_regex = None + self._nick_color_index = dict() + self._setup_entity_definitions() + + # ---------------------------------------------------------------------- + def _setup_entity_definitions(self): + for key, value in entitydefs.items(): + value = value + self._entitydefs_inverted[value] = key + + self._badchars_regex = re.compile('|'.join(entitydefs.values())) + self._been_fixed_regex = re.compile(r'&\w+;|&#[0-9]+;') + + # ---------------------------------------------------------------------- + def format(self): + self._get_contents(self._input_filename) + content = self._create_table() + if self._output_filename == '-': + date = '' + else: + date = ', will be generated every 24 hours' + + output = HTML_TEMPLATE.format( + title=TITLE, + content=content, + date=date, + ctime=ctime()) + + if self._output_filename == '-': + print(output) + else: + with open(self._output_filename, 'w') as output_file: + output_file.write(output) + + # ---------------------------------------------------------------------- + def _get_contents(self, file_name): + with open(file_name) as file_handle: + self._log_lines = list() + for line in file_handle: + match = REGEXP_MSG.match(line) + if match and len(match.groups()) == 8: + groups = match.groups() + entry = LogEntry() + entry.date = _create_datetime_from_tuple(groups) + entry.nick = groups[6] + entry.msg = groups[7].strip() + self._log_lines.append(entry) + else: + match = REGEXP_STATUS.match(line) + if match and len(match.groups()) == 8: + groups = match.groups() + entry = LogEntry() + entry.date = _create_datetime_from_tuple(groups) + entry.nick = groups[6] + entry.msg = groups[7] + entry.msg_type = MSG_TYPE_STATUS + self._log_lines.append(entry) + else: + match = REGEXP_ACTION.match(line) + if match and len(match.groups()) == 8: + groups = match.groups() + entry = LogEntry() + entry.date = _create_datetime_from_tuple(groups) + entry.nick = groups[6] + entry.msg = groups[7] + entry.msg_type = MSG_TYPE_ACTION + self._log_lines.append(entry) + else: + print('Failed: {}'.format(line), file=sys.stderr) + + # ---------------------------------------------------------------------- + def _create_table(self): + result = '' + row_index = 0 + last_day = 0 + + for entry in self._log_lines: + if not entry.msg: + continue + + # Date header + if last_day != entry.date.day: + date = entry.date.strftime('%Y-%m-%d') + result += '<tr><th colspan="3">{}</th></tr>'.format(date) + last_day = entry.date.day + + row_index += 1 + if row_index % 2 == 0: + row_alt = ' class="dark"' + else: + row_alt = '' + + if entry.msg_type == MSG_TYPE_STATUS: + row_alt = ' class="status"' + entry.msg = '{} {}'.format(entry.nick, entry.msg) + entry.nick = '' + elif entry.msg_type == MSG_TYPE_ACTION: + row_alt = ' class="action"' + entry.msg = '{} {}'.format(entry.nick, entry.msg) + entry.nick = '' + + result += ''' + <tr{}> + <td class="time">{}</td> + <td class="nick nick{}">{}</td> + <td class="msg">{}</td> + </tr>'''.format( + row_alt, + entry.date.strftime(u'%H:%M'), + self._get_color_index(entry.nick), + self._html_escape(entry.nick), + self._html_escape(entry.msg)) + + return result + + # ---------------------------------------------------------------------- + def _get_color_index(self, nick): + if nick not in self._nick_color_index: + if self._last_nick_index > 9: + self._last_nick_index = 1 + + self._nick_color_index[nick] = self._last_nick_index + self._last_nick_index += 1 + + return self._nick_color_index[nick] + + # ---------------------------------------------------------------------- + def _html_escape(self, text): + if self._been_fixed_regex.findall(text): + return text + + keyholder = dict() + for bad_char in self._badchars_regex.findall(text): + keyholder[bad_char] = 1 + + text = text.replace('&', '&') + text = text.replace('ß', 'ß') + text = text.replace('\x01', ' ') # ASCII char #1, SOH, sent by some clients for ACTIONs + text = text.replace('\x80', '€') + for each in keyholder: + if each == '&': + continue + + better = self._entitydefs_inverted[each] + if not better.startswith('&#'): + better = '&{};'.format(self._entitydefs_inverted[each]) + + text = text.replace(each, better) + return text + + +# ---------------------------------------------------------------------- +def _create_datetime_from_tuple(values): + return datetime.datetime( + int(values[0]), + int(values[1]), + int(values[2]), + int(values[3]), + int(values[4]), + int(values[5])) + + +# ---------------------------------------------------------------------- +def _try_to_decode(value): + try: + return value.encode('utf-8') + except UnicodeError: + try: + return value.encode('latin1') + except UnicodeError: + return repr(value) + + +# ---------------------------------------------------------------------- +def main(): + if len(sys.argv) < 3: + print('Usage: irclog.py <input-file> <output-file>', file=sys.stderr) + exit(1) + + input_filename = sys.argv[1] + output_filename = sys.argv[2] + + irc_log_formatter = IrcLogFormatter(input_filename, output_filename) + irc_log_formatter.format() + + +if __name__ == '__main__': + main()
-------------- This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).