[geany/infrastructure] 69bea0: Add various GIT related scripts

Enrico Tröger git-noreply at xxxxx
Sun Jul 15 11:09:03 UTC 2012


Branch:      refs/heads/master
Author:      Enrico Tröger <enrico.troeger at uvena.de>
Committer:   Enrico Tröger <enrico.troeger at uvena.de>
Date:        Sun, 15 Jul 2012 11:09:03
Commit:      69bea047664f108a68b3879333598dafcf668c71
             https://github.com/geany/infrastructure/commit/69bea047664f108a68b3879333598dafcf668c71

Log Message:
-----------
Add various GIT related scripts

This contains the most important GIT commit hook to update
the GIT mirror repositories and send commit mails to the mailing
list.


Modified Paths:
--------------
    scripts/git_hooks/garbage_collect_repositories.sh
    scripts/git_hooks/github_commit_mail.py
    scripts/git_hooks/post_commit_hook.py
    scripts/git_hooks/update_repositories.py

Modified: scripts/git_hooks/garbage_collect_repositories.sh
17 files changed, 17 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+# Geany GIT mirror repository cleaner (executed once a week as cronjob)
+
+
+REPO_HOME=/srv/www/git.geany.org/repos
+
+
+cd $REPO_HOME
+for repo in `ls $REPO_HOME`
+do
+    cd $repo
+
+    sudo -u www-data git gc
+
+    cd ..
+done


Modified: scripts/git_hooks/github_commit_mail.py
299 files changed, 299 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,299 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Author:  Enrico Tröger
+# License: GPLv2
+#
+'''
+Github Post-Receive commit hook
+'''
+
+
+from datetime import datetime, timedelta
+from email.mime.text import MIMEText
+from email.Header import Header
+from email.utils import formatdate, formataddr
+from json import loads
+from smtplib import SMTP
+from time import mktime
+import logging
+import sys
+import urllib2
+# Python likes to encode MIME messages with base64, I prefer plain text (#issue12552)
+from email import charset
+charset.add_charset('utf-8', charset.SHORTEST)
+
+
+HTTP_REQUEST_TIMEOUT = 30
+LOG_LEVEL = logging.DEBUG
+
+EMAIL_SENDER = u'git-noreply at geany.org'
+EMAIL_HOST = u'localhost'
+EMAIL_SUBJECT_TEMPLATE = u'[%(user)s/%(repository)s] %(short_hash)s: %(short_commit_message)s'
+EMAIL_BODY_TEMPLATE = u'''Branch:      %(branch)s
+Author:      %(author)s
+Committer:   %(committer)s
+Date:        %(commit_date_formatted)s
+Commit:      %(commit)s
+             %(commit_url)s
+
+Log Message:
+-----------
+%(commit_message)s
+
+
+Modified Paths:
+--------------
+%(modified_files_list)s
+
+%(modified_files_diffs)s
+--------------
+This E-Mail was brought to you by github_commit_mail.py (Source: TBD).
+'''
+EMAIL_DIFF_TEMPLATE = u'''Modified: %(filename)s
+%(changes)s files changed, %(additions)s insertions(+), %(deletions)s deletions(-)
+===================================================================
+%(patch)s
+
+
+'''
+
+EMAIL_RECIPIENT_MAP = {
+    # repository: email address
+    # geany
+    'geany/geany': 'geany-commits at uvena.de',
+    'geany/talks': 'geany-commits at uvena.de',
+    'geany/infrastructure': 'geany-commits at uvena.de',
+    # plugins
+    'geany/geany-plugins': 'geany-plugins-commits at uvena.de',
+    'geany/plugins.geany.org': 'geany-plugins-commits at uvena.de',
+    # newsletter
+    'geany/newsletter': 'geany-newsletter-commits at uvena.de',
+}
+
+
+########################################################################
+class CommitMailGenerator(object):
+    """"""
+
+    #----------------------------------------------------------------------
+    def __init__(self, user, repository, branch, commits, logger):
+        self._user = user
+        self._repository = repository
+        self._branch = branch
+        self._commits = commits
+        self._logger = logger
+
+    #----------------------------------------------------------------------
+    def generate_commit_mails(self):
+        for commit in self._commits:
+            self._try_to_generate_commit_mail(commit)
+
+    #----------------------------------------------------------------------
+    def _try_to_generate_commit_mail(self, commit):
+        try:
+            self._generate_commit_mail(commit)
+        except Exception, e:
+            self._logger.error('An error occurred while processing commit %s: %s' %
+                (commit, e), exc_info=True)
+
+    #----------------------------------------------------------------------
+    def _generate_commit_mail(self, commit):
+        full_commit_info = self._query_commit_info(commit)
+        commit_info = self._adapt_commit_info(full_commit_info)
+        self._send_mail(commit_info)
+
+    #----------------------------------------------------------------------
+    def _query_commit_info(self, commit):
+        url_parameters = dict(user=self._user,
+                              repository=self._repository,
+                              commit=commit)
+        url = u'https://api.github.com/repos/%(user)s/%(repository)s/commits/%(commit)s' % \
+            url_parameters
+        handle = urllib2.urlopen(url, timeout=HTTP_REQUEST_TIMEOUT)
+        self._log_rate_limit(handle)
+        # parse response
+        response_json = handle.read()
+        response = loads(response_json)
+        return response
+
+    #----------------------------------------------------------------------
+    def _log_rate_limit(self, urllib_handle):
+        headers = urllib_handle.info()
+        rate_limit_remaining = headers['X-RateLimit-Remaining']
+        rate_limit = headers['X-RateLimit-Limit']
+        length = headers['Content-Length']
+        self._logger.debug(u'Github rate limits: %s/%s (%s bytes received)' %
+            (rate_limit_remaining, rate_limit, length))
+
+    #----------------------------------------------------------------------
+    def _adapt_commit_info(self, full_commit_info):
+        branch = self._branch
+        commit = full_commit_info['sha']
+        commit_url = self._generate_commit_url(commit)
+        author = self._get_name(full_commit_info, 'author')
+        author_name = full_commit_info['commit']['author']['name']
+        committer = self._get_name(full_commit_info, 'committer')
+        committer_name = full_commit_info['commit']['committer']['name']
+        commit_datetime = self._parse_commit_date(full_commit_info['commit']['committer']['date'])
+        commit_date = mktime(commit_datetime.timetuple())
+        commit_date_formatted = commit_datetime.strftime('%a, %d %b %Y %H:%M:%S')
+        commit_message = full_commit_info['commit']['message']
+        short_commit_message = self._get_short_commit_message(commit_message)
+        short_hash = commit[:6]
+        modified_files_list = self._generate_modified_files_list(full_commit_info)
+        modified_files_diffs = self._generate_modified_files_diffs(full_commit_info)
+
+        return dict(user=self._user,
+                    repository=self._repository,
+                    commit=commit,
+                    commit_url=commit_url,
+                    branch=branch,
+                    author=author,
+                    author_name=author_name,
+                    committer=committer,
+                    committer_name=committer_name,
+                    commit_date=commit_date,
+                    commit_date_formatted=commit_date_formatted,
+                    commit_message=commit_message,
+                    short_commit_message=short_commit_message,
+                    short_hash=short_hash,
+                    modified_files_list=modified_files_list,
+                    modified_files_diffs=modified_files_diffs)
+
+    #----------------------------------------------------------------------
+    def _generate_commit_url(self, commit):
+        url_parameters = dict(user=self._user,
+                              repository=self._repository,
+                              commit=commit)
+        return u'https://github.com/%(user)s/%(repository)s/commit/%(commit)s' % url_parameters
+
+    #----------------------------------------------------------------------
+    def _get_name(self, full_commit_info, name):
+        return u'%s <%s>' % (full_commit_info['commit'][name]['name'],
+                             full_commit_info['commit'][name]['email'])
+
+    #----------------------------------------------------------------------
+    def _parse_commit_date(self, date_raw):
+        # unfortunately, Python's strptime cannot parse numeric timezone offsets (anymore since 2.6)
+        # so we need to do it on our own, example date: 2012-01-08T04:44:21-08:00
+        date_to_parse = date_raw[:-6]
+        timezone_offset = date_raw[-6:]
+        # parse date
+        date = datetime.strptime(date_to_parse, '%Y-%m-%dT%H:%M:%S')
+        # handle timezone information
+        timezone_offset = timezone_offset.replace(':', '')
+        try:
+            offset = int(timezone_offset)
+        except ValueError:
+            self._logger.warn(
+                u'Error on parsing timezone information "%s" (%s)' % (timezone_offset, date_raw))
+            offset = 0
+
+        delta = timedelta(hours=offset / 100.0)
+        date -= delta
+        return date
+
+    #----------------------------------------------------------------------
+    def _get_short_commit_message(self, short_commit_message):
+        return short_commit_message.splitlines()[0]
+
+    #----------------------------------------------------------------------
+    def _generate_modified_files_list(self, full_commit_info):
+        modified_files = map(lambda x: x['filename'], full_commit_info['files'])
+        return u'    %s' % u'\n    '.join(modified_files)
+
+    #----------------------------------------------------------------------
+    def _generate_modified_files_diffs(self, full_commit_info):
+        diffs = u''
+        for modified_file in full_commit_info['files']:
+            parameters = dict(filename=modified_file['filename'],
+                              changes=modified_file['changes'],
+                              additions=modified_file['additions'],
+                              deletions=modified_file['deletions'],
+                              patch=self._get_diff_if_available(modified_file))
+            diffs += EMAIL_DIFF_TEMPLATE % parameters
+        # shrink diffs to at most ~ 100KB
+        diffs = diffs[:100000]
+        diffs += u'@@ Diff output truncated at 100000 characters. @@\n\n'
+        return diffs
+
+    #----------------------------------------------------------------------
+    def _get_diff_if_available(self, modified_file):
+        try:
+            return modified_file['patch']
+        except KeyError:
+            return u'No diff available, check online'
+
+    #----------------------------------------------------------------------
+    def _send_mail(self, commit_info):
+        author_name = commit_info['author_name'].encode('utf-8')
+        author_name = str(Header(author_name, 'UTF-8'))
+        content = EMAIL_BODY_TEMPLATE % commit_info
+        msg = MIMEText(content, 'plain', 'utf-8')
+
+        msg['Subject'] = EMAIL_SUBJECT_TEMPLATE % commit_info
+        msg['From'] = formataddr((author_name, EMAIL_SENDER))
+        msg['To'] = self._get_email_recipient()
+        msg['Date'] = formatdate(commit_info['commit_date'])
+
+        smtp_conn = SMTP(EMAIL_HOST)
+        smtp_conn.sendmail(EMAIL_SENDER, msg['To'].split(','), msg.as_string())
+        smtp_conn.quit()
+
+    #----------------------------------------------------------------------
+    def _get_email_recipient(self):
+        repository = u'%s/%s' % (self._user, self._repository)
+        # no error handling on purpose, this should bail out if repository is not in the map
+        return EMAIL_RECIPIENT_MAP[repository]
+
+
+########################################################################
+class CommandLineArgumentError(Exception):
+
+    #----------------------------------------------------------------------
+    def __str__(self):
+        return 'Usage: %s <user> <repository> <branch> <commit> ...' % sys.argv[0]
+
+
+#----------------------------------------------------------------------
+def setup_logging():
+    logging.basicConfig()
+    logger = logging.getLogger('github_commit_mail_hook')
+    logger.setLevel(LOG_LEVEL)
+
+    return logger
+
+
+#----------------------------------------------------------------------
+def parse_command_line_arguments():
+    if len(sys.argv) < 5:
+        raise CommandLineArgumentError()
+
+    user = sys.argv[1]
+    repository = sys.argv[2]
+    branch = sys.argv[3]
+    commits = sys.argv[4:]
+
+    return user, repository, branch, commits
+
+
+#----------------------------------------------------------------------
+def main():
+    logger = setup_logging()
+    try:
+        user, repository, branch, commits = parse_command_line_arguments()
+        gen = CommitMailGenerator(user, repository, branch, commits, logger)
+        gen.generate_commit_mails()
+    except CommandLineArgumentError, e:
+        print >> sys.stderr, e
+    except Exception, e:
+        logger.warn(u'An error occurred: %s' % e, exc_info=True)
+    logging.shutdown()
+
+
+if __name__ == '__main__':
+    main()
+
+
+# python /misc/github_commit_mail.py geany geany refs/heads/master 85b5e08c471c505b59218b1a94df9b95a01cca06 eb04c514bab87af60f01ae3c8e9ee1d3fd9bccf8 ca922e0ddc8022283ec3c1f49aaa15ab7c5ba213 aa96bc2cbfab0a8033d0ed600541c2d2e0c767bb


Modified: scripts/git_hooks/post_commit_hook.py
134 files changed, 134 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Author:  Enrico Tröger
+# License: GPLv2
+#
+'''
+Geany Github Post-Receive commit hook
+
+This script actually does two things:
+- trigger an update of the corresponding GIT mirror repository
+- send a commit mail to the mailing list
+'''
+
+
+from cgi import FieldStorage
+from json import loads
+from os import unlink
+from os.path import exists
+from subprocess import Popen, PIPE
+import github_commit_mail
+import logging
+import logging.handlers
+
+
+LOG_FILENAME = u'/var/log/git_mirror.log'
+VALID_UPDATE_REPOSITORIES = ('geany', 'geany-plugins', 'infrastructure', 'newsletter', 'talks')
+REPOSITORY_BASE_PATH = u'/srv/www/git.geany.org/repos/%s.git'
+UPDATE_LOCK_FILE = u'%s/.update_lock'
+UPDATE_NOTIFY_FILE = u'%s/.update_required'
+# extend on demand
+LOG_EMAIL_ADDRESSES = ['enrico at geany.org']
+
+
+#----------------------------------------------------------------------
+def setup_logging():
+    logger = logging.getLogger('post_commit_hook')
+    logger.setLevel(logging.DEBUG)
+    file_handler = logging.FileHandler(LOG_FILENAME)
+    file_handler.setLevel(logging.DEBUG)
+    formatter = logging.Formatter('%(asctime)s %(name)s: %(levelname)s: %(message)s')
+    file_handler.setFormatter(formatter)
+    logger.addHandler(file_handler)
+    # mail
+    mail_handler = logging.handlers.SMTPHandler(
+        u'localhost',
+        u'git-noreply at geany.org',
+        LOG_EMAIL_ADDRESSES,
+        u'Error on git_post_commit')
+    mail_handler.setLevel(logging.WARNING)
+    logger.addHandler(mail_handler)
+
+    return logger
+
+
+#----------------------------------------------------------------------
+def handle_repository_update(repository):
+    repository_path = REPOSITORY_BASE_PATH % repository
+    lock_file_path = UPDATE_LOCK_FILE % repository_path
+    # this is not exactly safe against race-conditions but should be good enough
+    if exists(lock_file_path):
+        # if there is currently an update process running, simply mark the repository to be updated
+        # again later, a cronjob will pick it
+        update_notify_path = UPDATE_NOTIFY_FILE % repository_path
+        update_notify = open(update_notify_path, 'w')
+        update_notify.write('1')
+        update_notify.close()
+        logger.info(u'Not updating repository %s because it is locked, leaving a notify' % repository)
+    else:
+        lock_file = open(lock_file_path, 'w')
+        # update the repository
+        logger.info(u'Updating repository %s' % repository)
+        run_command(repository_path, ('git', 'remote', 'update'))
+        run_command(repository_path, ('git', 'update-server-info'))
+        # remove lockfile
+        lock_file.close()
+        unlink(lock_file_path)
+
+
+#----------------------------------------------------------------------
+def process_commit_mails(content):
+    user = content['repository']['owner']['name']
+    repository = content['repository']['name']
+    # we just use the ref here for simplicity
+    branch = content['ref']
+    # get a list of commit hashes
+    commits = map(lambda x: x['id'], content['commits'])
+
+    generator = github_commit_mail.CommitMailGenerator(user, repository, branch, commits, logger)
+    generator.generate_commit_mails()
+
+
+#----------------------------------------------------------------------
+def run_command(repository_path, command):
+    process = Popen(command, cwd=repository_path, stdout=PIPE, stderr=PIPE)
+    stdout, stderr = process.communicate()
+    output = u''
+    if stdout:
+        output = u'%s\nStdout:\n%s' % (output, stdout)
+    if stderr:
+        output = u'%s\nStderr:\n%s' % (output, stderr)
+    logger.debug(u'Command "%s": %s' % (' '.join(command), output))
+
+
+#----------------------------------------------------------------------
+def main():
+    # parse query string
+    arguments = FieldStorage(keep_blank_values=True)
+
+    json = arguments.getvalue('payload')
+    content = loads(json)
+    if 'commits' in content:
+        from pprint import pformat
+        logger.info(pformat(content))
+        repo = content['repository']['name']
+
+        if repo in VALID_UPDATE_REPOSITORIES:
+            handle_repository_update(repo)
+
+        process_commit_mails(content)
+
+
+logger = setup_logging()
+try:
+    main()
+except Exception, e:
+    logger.warn(u'An error occurred: %s' % e, exc_info=True)
+
+
+print 'Content-type: text/html'
+print
+
+
+logging.shutdown()


Modified: scripts/git_hooks/update_repositories.py
97 files changed, 97 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Author:  Enrico Tröger
+# License: GPLv2
+#
+'''
+Geany GIT mirror repository updater
+
+This script is called automatically every 5 minutes as cronjob or can be run manually
+to update all of the Geany GIT Github mirror repositories which were marked by the
+post_commit_hook script to be out-of-date.
+'''
+
+
+from os import listdir, unlink
+from os.path import exists, join
+from subprocess import Popen, PIPE
+import logging
+
+
+LOG_FILENAME = u'/var/log/git_mirror.log'
+REPOSITORY_BASE_PATH = u'/srv/www/git.geany.org/repos/'
+UPDATE_LOCK_FILE = u'%s/.update_lock'
+UPDATE_NOTIFY_FILE = u'%s/.update_required'
+
+
+#----------------------------------------------------------------------
+def setup_logging():
+    logger = logging.getLogger('update_repositories')
+    logger.setLevel(logging.DEBUG)
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.DEBUG)
+    formatter = logging.Formatter('%(asctime)s %(name)s: %(levelname)s: %(message)s')
+    ch.setFormatter(formatter)
+    logger.addHandler(ch)
+    fh = logging.FileHandler(LOG_FILENAME)
+    fh.setLevel(logging.DEBUG)
+    formatter = logging.Formatter('%(asctime)s %(name)s: %(levelname)s: %(message)s')
+    fh.setFormatter(formatter)
+    logger.addHandler(fh)
+
+    return logger
+
+
+#----------------------------------------------------------------------
+def handle_repository_update(repository):
+    repository_path = join(REPOSITORY_BASE_PATH, repository)
+    lock_file_path = UPDATE_LOCK_FILE % repository_path
+    update_notify_path = UPDATE_NOTIFY_FILE % repository_path
+    # this is not exactly safe against race-conditions but should be good enough
+    if exists(lock_file_path):
+        return
+
+    if exists(update_notify_path):
+        update_notify_file = open(update_notify_path, 'r+')
+        need_update = update_notify_file.read() == '1'
+        if need_update:
+            lock_file = open(lock_file_path, 'w')
+            # update the repository
+            logger.info(u'Updating repository %s' % repository)
+            run_command(repository_path, ('sudo', '-u', 'www-data', 'git', 'remote', 'update'))
+            run_command(repository_path, ('sudo', '-u', 'www-data', 'git', 'update-server-info'))
+            # remove lockfile
+            lock_file.close()
+            unlink(lock_file_path)
+            # unmark update notify
+            update_notify_file.truncate(0)
+            update_notify_file.close()
+
+
+#----------------------------------------------------------------------
+def run_command(repository_path, command):
+    process = Popen(command, cwd=repository_path, stdout=PIPE, stderr=PIPE)
+    stdout, stderr = process.communicate()
+    output = u''
+    if stdout:
+        output = u'%s\nStdout:\n%s' % (output, stdout)
+    if stderr:
+        output = u'%s\nStderr:\n%s' % (output, stderr)
+    logger.debug(u'Command "%s": %s' % (' '.join(command), output))
+
+
+#----------------------------------------------------------------------
+def main():
+    repositories = listdir(REPOSITORY_BASE_PATH)
+    for repository in repositories:
+        handle_repository_update(repository)
+
+
+logger = setup_logging()
+try:
+    main()
+except Exception, e:
+    logger.warn(u'An error occurred: %s' % e, exc_info=True)
+
+logging.shutdown()


@@ Diff output truncated at 100000 characters. @@


--------------
This E-Mail was brought to you by github_commit_mail.py (Source: TBD).



More information about the Commits mailing list