[geany/infrastructure] 4ae1f4: Add irclog.py script

Enrico Tröger git-noreply at xxxxx
Sat Jul 6 14:33:28 UTC 2019


Branch:      refs/heads/master
Author:      Enrico Tröger <enrico.troeger at uvena.de>
Committer:   Enrico Tröger <enrico.troeger at uvena.de>
Date:        Sat, 06 Jul 2019 14:33:28 UTC
Commit:      4ae1f41c23f828b5e08d7930a193550631e5181b
             https://github.com/geany/infrastructure/commit/4ae1f41c23f828b5e08d7930a193550631e5181b

Log Message:
-----------
Add irclog.py script

This is used to render IRC logs as HTML on https://irc.geany.org/logs/.


Modified Paths:
--------------
    scripts/irclog.py

Modified: scripts/irclog.py
384 lines changed, 384 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,384 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# IRC Log parser
+#
+# Copyright 2010 Enrico Tröger <enrico(dot)troeger(at)uvena(dot)de>
+# License: GPLv2
+#
+
+from html.entities import entitydefs
+from time import ctime
+import datetime
+import re
+import sys
+
+
+CHANNEL = '#geany'
+TITLE = 'IRC Logs for {} @ irc.freenode.net'.format(CHANNEL)
+
+MSG_TYPE_NORMAL = 1
+MSG_TYPE_STATUS = 2
+MSG_TYPE_ACTION = 3
+
+
+# 2010-01-31T14:09:29  <eht16> logs and stats are currently broken(i.e. not up2date), ...
+# 2010-01-31T14:09:34  <ct|kyle> I did not think that would still work
+# 2010-01-31T14:09:45  <ct|kyle> I figured that
+# 2010-01-31T14:09:52  <eht16> I wrote a Geany plugin :)
+REGEXP_MSG = re.compile(r'^(\d+)-([\d\w]+)-(\d+)[ T](\d+):(\d+):(\d+)  <(\S+)> (.*)')
+
+# 2010-01-31T02:25:40  *** SweetGeany has joined #geany
+# 2010-01-31T02:25:48  *** SweetGeany has left #geany
+# 2010-01-31T02:26:57  *** SweetGeany has joined #geany
+# 2010-01-31T03:25:54  *** _dmaphy_ has joined #geany
+# 2010-01-31T03:27:36  *** dmaphy has quit IRC
+# 2010-01-31T03:27:43  *** _dmaphy_ is now known as dmaphy
+REGEXP_STATUS = re.compile(r'(\d+)-([\d\w]+)-(\d+)[ T](\d+):(\d+):(\d+)  \*\*\* (\S+) ?(.*)')
+
+# 2010-01-31T14:17:18  * eht16 just noticed, 'make distcheck' is broken for Geany
+REGEXP_ACTION = re.compile(r'(\d+)-([\d\w]+)-(\d+)[ T](\d+):(\d+):(\d+)  \* (\S+) ?(.*)')
+
+
+HTML_TEMPLATE = '''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+
+<head>
+    <title>{title}</title>
+    <meta http-equiv="content-type" content="text/html;charset=utf-8" />
+    <meta name="generator" content="Geany 1.25" />
+    <link rel='icon' href='/favicon.ico' type='image/x-icon' />
+    <link rel='shortcut icon' href='/favicon.ico' />
+    <style type="text/css">
+        body {{
+            font-size: 90%;
+        }}
+        h1, h2, h3 {{
+            color: #70709f;  clear: left;
+        }}
+
+        .nick1 {{
+            color: #00AA33;
+        }}
+        .nick2 {{
+            color: #AA0000;
+        }}
+        .nick3 {{
+            color: #005500;
+        }}
+        .nick4 {{
+            color: #FF0077;
+        }}
+        .nick5 {{
+            color: blue;
+        }}
+        .nick6 {{
+            color: #8B008B;
+        }}
+        .nick7 {{
+            color: #50507f;
+        }}
+        .nick8 {{
+            color: #00008B;
+        }}
+        .nick9 {{
+            color: #222222;
+        }}
+
+        tr, td {{
+            vertical-align: top;
+        }}
+
+        img {{
+            border: 0px;
+        }}
+
+        .status {{
+            font-size: 80%;
+            color: gray;
+        }}
+
+        .action {{
+            font-weight: bold;
+        }}
+
+        #log {{
+            border-collapse: collapse;
+            width: 100%;
+            border: 1px solid #efefef;
+        }}
+
+        #log tr {{
+            border-top: 1px solid #efefef;
+        }}
+        #log tr.head {{
+            border-top-style: none;
+        }}
+        #log tr.dark {{
+            background-color: #dfdfdf;
+        }}
+
+        #log td, th {{
+            font-family: Consolas, "Lucida Console", "Courier New", monospace;
+            padding: 0.2em 0.4em;
+        }}
+
+        #log th {{
+            border-bottom: 1px solid #C0C0C0;
+        }}
+
+        #log .nick {{
+            text-align: right;
+            border-right: 1px solid #C0C0C0;
+        }}
+        #log .msg {{
+            border-right: 1px solid #efefef; line-height: 1.3em;
+        }}
+        #log .time {{
+            border-left:  1px solid #efefef;
+        }}
+    </style>
+</head>
+
+<body>
+    <h1>{title}</h1>
+
+    <table id="log" style="clear:both">
+    <tr class="head">
+        <th>Time</th>
+        <th>Nick</th>
+        <th>Message</th>
+    </tr>
+    {content}
+    </table>
+    <p> </p>
+    <p>Stats generated by <a href="https://github.com/geany/infrastructure/blob/master/scripts/irclog.py">irclog.py</a>
+        on {ctime} (times in UTC{date})</p>
+    <p>
+        <a href="https://validator.w3.org/check?uri=referer">
+        <img src="https://i18n.geany.org/button-xhtml.png" alt="Valid XHTML 1.0 Strict" />
+        </a>
+           
+        <a href="https://www.geany.org">
+        <img src="https://i18n.geany.org/button-geany.png" alt="Geany" />
+        </a>
+    </p>
+</body>
+</html>
+'''
+
+
+class LogEntry:
+
+    # ----------------------------------------------------------------------
+    def __init__(self):
+        self.date = None
+        self.nick = None
+        self.msg = None
+        self.msg_type = MSG_TYPE_NORMAL
+
+    # ----------------------------------------------------------------------
+    def __str__(self):
+        return 'LogEntry: {}: {}: {}'.format(self.date, self.nick, self.msg)
+
+
+########################################################################
+class IrcLogFormatter:
+
+    # ----------------------------------------------------------------------
+    def __init__(self, input_filename, output_filename):
+        self._input_filename = input_filename
+        self._output_filename = output_filename
+        self._log_lines = None
+        self._last_nick_index = 1
+        self._entitydefs_inverted = dict()
+        self._badchars_regex = None
+        self._been_fixed_regex = None
+        self._nick_color_index = dict()
+        self._setup_entity_definitions()
+
+    # ----------------------------------------------------------------------
+    def _setup_entity_definitions(self):
+        for key, value in entitydefs.items():
+            value = value
+            self._entitydefs_inverted[value] = key
+
+        self._badchars_regex = re.compile('|'.join(entitydefs.values()))
+        self._been_fixed_regex = re.compile(r'&\w+;|&#[0-9]+;')
+
+    # ----------------------------------------------------------------------
+    def format(self):
+        self._get_contents(self._input_filename)
+        content = self._create_table()
+        if self._output_filename == '-':
+            date = ''
+        else:
+            date = ', will be generated every 24 hours'
+
+        output = HTML_TEMPLATE.format(
+            title=TITLE,
+            content=content,
+            date=date,
+            ctime=ctime())
+
+        if self._output_filename == '-':
+            print(output)
+        else:
+            with open(self._output_filename, 'w') as output_file:
+                output_file.write(output)
+
+    # ----------------------------------------------------------------------
+    def _get_contents(self, file_name):
+        with open(file_name) as file_handle:
+            self._log_lines = list()
+            for line in file_handle:
+                match = REGEXP_MSG.match(line)
+                if match and len(match.groups()) == 8:
+                    groups = match.groups()
+                    entry = LogEntry()
+                    entry.date = _create_datetime_from_tuple(groups)
+                    entry.nick = groups[6]
+                    entry.msg = groups[7].strip()
+                    self._log_lines.append(entry)
+                else:
+                    match = REGEXP_STATUS.match(line)
+                    if match and len(match.groups()) == 8:
+                        groups = match.groups()
+                        entry = LogEntry()
+                        entry.date = _create_datetime_from_tuple(groups)
+                        entry.nick = groups[6]
+                        entry.msg = groups[7]
+                        entry.msg_type = MSG_TYPE_STATUS
+                        self._log_lines.append(entry)
+                    else:
+                        match = REGEXP_ACTION.match(line)
+                        if match and len(match.groups()) == 8:
+                            groups = match.groups()
+                            entry = LogEntry()
+                            entry.date = _create_datetime_from_tuple(groups)
+                            entry.nick = groups[6]
+                            entry.msg = groups[7]
+                            entry.msg_type = MSG_TYPE_ACTION
+                            self._log_lines.append(entry)
+                        else:
+                            print('Failed: {}'.format(line), file=sys.stderr)
+
+    # ----------------------------------------------------------------------
+    def _create_table(self):
+        result = ''
+        row_index = 0
+        last_day = 0
+
+        for entry in self._log_lines:
+            if not entry.msg:
+                continue
+
+            # Date header
+            if last_day != entry.date.day:
+                date = entry.date.strftime('%Y-%m-%d')
+                result += '<tr><th colspan="3">{}</th></tr>'.format(date)
+                last_day = entry.date.day
+
+            row_index += 1
+            if row_index % 2 == 0:
+                row_alt = ' class="dark"'
+            else:
+                row_alt = ''
+
+            if entry.msg_type == MSG_TYPE_STATUS:
+                row_alt = ' class="status"'
+                entry.msg = '{} {}'.format(entry.nick, entry.msg)
+                entry.nick = ''
+            elif entry.msg_type == MSG_TYPE_ACTION:
+                row_alt = ' class="action"'
+                entry.msg = '{} {}'.format(entry.nick, entry.msg)
+                entry.nick = ''
+
+            result += '''
+            <tr{}>
+                <td class="time">{}</td>
+                <td class="nick nick{}">{}</td>
+                <td class="msg">{}</td>
+            </tr>'''.format(
+                row_alt,
+                entry.date.strftime(u'%H:%M'),
+                self._get_color_index(entry.nick),
+                self._html_escape(entry.nick),
+                self._html_escape(entry.msg))
+
+        return result
+
+    # ----------------------------------------------------------------------
+    def _get_color_index(self, nick):
+        if nick not in self._nick_color_index:
+            if self._last_nick_index > 9:
+                self._last_nick_index = 1
+
+            self._nick_color_index[nick] = self._last_nick_index
+            self._last_nick_index += 1
+
+        return self._nick_color_index[nick]
+
+    # ----------------------------------------------------------------------
+    def _html_escape(self, text):
+        if self._been_fixed_regex.findall(text):
+            return text
+
+        keyholder = dict()
+        for bad_char in self._badchars_regex.findall(text):
+            keyholder[bad_char] = 1
+
+        text = text.replace('&', '&')
+        text = text.replace('ß', 'ß')
+        text = text.replace('\x01', ' ')  # ASCII char #1, SOH, sent by some clients for ACTIONs
+        text = text.replace('\x80', '€')
+        for each in keyholder:
+            if each == '&':
+                continue
+
+            better = self._entitydefs_inverted[each]
+            if not better.startswith('&#'):
+                better = '&{};'.format(self._entitydefs_inverted[each])
+
+            text = text.replace(each, better)
+        return text
+
+
+# ----------------------------------------------------------------------
+def _create_datetime_from_tuple(values):
+    return datetime.datetime(
+        int(values[0]),
+        int(values[1]),
+        int(values[2]),
+        int(values[3]),
+        int(values[4]),
+        int(values[5]))
+
+
+# ----------------------------------------------------------------------
+def _try_to_decode(value):
+    try:
+        return value.encode('utf-8')
+    except UnicodeError:
+        try:
+            return value.encode('latin1')
+        except UnicodeError:
+            return repr(value)
+
+
+# ----------------------------------------------------------------------
+def main():
+    if len(sys.argv) < 3:
+        print('Usage: irclog.py <input-file> <output-file>', file=sys.stderr)
+        exit(1)
+
+    input_filename = sys.argv[1]
+    output_filename = sys.argv[2]
+
+    irc_log_formatter = IrcLogFormatter(input_filename, output_filename)
+    irc_log_formatter.format()
+
+
+if __name__ == '__main__':
+    main()



--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).


More information about the Commits mailing list