obmc-scripts: Add maintainers

maintainers is a python library and collection of scripts for
parsing and generating OpenBMC MAINTAINERS files. The culmination of the
effort is the `obmc-gerrit` git wrapper that automatically adds people
listed in the per-repository MAINTAINERS file as reviewers on changes
pushed to Gerrit.

Change-Id: I4a3c3efc5899b80a65836c1ad948ec1153dd6796
Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
diff --git a/amboar/obmc-scripts/maintainers/README.md b/amboar/obmc-scripts/maintainers/README.md
new file mode 100644
index 0000000..14434f7
--- /dev/null
+++ b/amboar/obmc-scripts/maintainers/README.md
@@ -0,0 +1,69 @@
+A Collection of Python Tools to Manipulate MAINTAINERS Files
+============================================================
+
+OpenBMC defines its own style of MAINTAINERS file that is almost but not
+entirely alike the Linux kernel's MAINTAINERS file.
+
+Historically the MAINTAINERS file was kept in the openbmc/docs repository and
+described the maintainers for all repositories under the OpenBMC Github
+organisation. Due to its separation from the repositories it was describing,
+openbmc/docs:MAINTAINERS was both incomplete and out-of-date.
+
+These scripts were developed to resolve unmaintained state of MAINTAINERS by
+distributing the information into each associated repository.
+
+General Use Stuff
+=================
+
+`obmc-gerrit` is a helper script for pushing changes to Gerrit. For a
+repository with an OpenBMC-compatible MAINTAINERS file at its top level,
+`obmc-gerrit` will parse the MAINTAINERS file and mangle the `git push`
+`REFSPEC` such that the maintainers and reviewers listed for the repository are
+automatically added to the changes pushed:
+
+```
+$ obmc-gerrit push gerrit HEAD:refs/for/master
+```
+
+Installation
+------------
+
+obmc-gerrit requires Python3. If this is not available on your system (!), see
+the virtualenv section below.
+
+To install obmc-gerrit:
+
+```
+$ pip3 install --user -r requirements.txt
+$ python3 setup.py install --user
+```
+
+I don't have Python3
+--------------------
+
+Well, hopefully you have `virtualenv`. If you do, then you can run the
+following commands:
+
+```
+$ virtualenv --python=python3 .venv
+$ . .venv/bin/activate
+$ pip install -r requirements.txt
+$ python setup.py install
+```
+
+To exit the virtualenv, run `deactivate`. To run `obmc-gerrit` you will need to
+ensure you have activated your virtualenv first.
+
+MAINTAINERS Library
+===================
+
+`maintainers.py` is the core library that handles parsing and assembling
+MAINTAINERS files. An AST can be obtained with `parse_block()`, and the content
+of a MAINTAINERS file can be obtained by passing an AST to `assemble_block()`
+
+Once-off Thingos
+================
+
+`split_maintainers.py` is the script used to split the monolithic MAINTAINERS
+file in openbmc/docs into per-repository MAINTAINERS files and post the patches
+to Gerrit. Do not use this, it's captured for posterity.
diff --git a/amboar/obmc-scripts/maintainers/obmc-gerrit b/amboar/obmc-scripts/maintainers/obmc-gerrit
new file mode 100755
index 0000000..a52abbc
--- /dev/null
+++ b/amboar/obmc-scripts/maintainers/obmc-gerrit
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+#
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2018 IBM Corp.
+#
+# Push changes to Gerrit, automatically adding reviewers to the patches by
+# parsing the OpenBMC-style MAINTAINERS file in the root of the repository (if
+# it exists).
+
+from obmc import maintainers
+from typing import cast, List, Optional
+import argparse
+import os
+import sh
+import sys
+
+git = sh.git.bake()
+
+def get_reviewers(root: Optional[str]=None, mname: str='MAINTAINERS') -> List[str]:
+    reviewers: List[str] = list()
+    if not root:
+        root = git('rev-parse', '--show-toplevel').strip()
+    mfile = os.path.join(root, mname)
+    with open(mfile, 'r') as mstream:
+        maintainers.trash_preamble(mstream)
+        block = maintainers.parse_block(mstream)
+        if not block:
+            return reviewers
+        mlist = cast(List[maintainers.Identity],
+                     block[maintainers.LineType.MAINTAINER])
+        reviewers.extend(i.email.address for i in mlist)
+        if maintainers.LineType.REVIEWER in block:
+            rlist = cast(List[maintainers.Identity],
+                         block[maintainers.LineType.REVIEWER])
+            reviewers.extend(i.email.address for i in rlist)
+    return reviewers
+
+def gerrit_refspec_args(reviewers: Optional[List[str]]=None) -> str:
+    args = ""
+    if reviewers:
+        args += ",".join("r={}".format(addr) for addr in reviewers)
+    return args
+
+def decorate_refspec(refspec: str) -> str:
+    gargs = gerrit_refspec_args(get_reviewers())
+    if '%' in refspec:
+        return "{},{}".format(refspec, gargs)
+    return "{}%{}".format(refspec, gargs)
+
+def do_push(args: argparse.Namespace) -> None:
+    git.push(args.remote, decorate_refspec(args.refspec),
+             _in=sys.stdin, _out=sys.stdout, _err=sys.stderr)
+
+parser = argparse.ArgumentParser()
+subbies = parser.add_subparsers(dest='subcommand')
+subbies.required = True
+push = subbies.add_parser("push", help="Push changes to Gerrit with reviewers")
+push.add_argument("remote")
+push.add_argument("refspec")
+push.set_defaults(func=do_push)
+
+args = parser.parse_args()
+args.func(args)
diff --git a/amboar/obmc-scripts/maintainers/obmc/__init__.py b/amboar/obmc-scripts/maintainers/obmc/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/amboar/obmc-scripts/maintainers/obmc/__init__.py
diff --git a/amboar/obmc-scripts/maintainers/obmc/maintainers.py b/amboar/obmc-scripts/maintainers/obmc/maintainers.py
new file mode 100755
index 0000000..d18d676
--- /dev/null
+++ b/amboar/obmc-scripts/maintainers/obmc/maintainers.py
@@ -0,0 +1,233 @@
+#!/usr/bin/python3
+#
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2018 IBM Corp.
+
+import argparse
+import sys
+from collections import namedtuple, OrderedDict
+from enum import Enum, unique
+from typing import (Dict, NamedTuple, Iterator, Sequence, Union, Optional,
+                    List, cast, IO)
+from pprint import pprint
+
+@unique
+class LineType(Enum):
+    REPO = 1
+    MAINTAINER = 2
+    REVIEWER = 3
+    FORKED = 4
+    COMMENT = 5
+
+@unique
+class ParseState(Enum):
+    BEGIN = 1
+    BLOCK = 2
+
+Email = NamedTuple("Email", [("name", str), ("address", str)])
+Identity = NamedTuple("Identity", [("email", Email), ("irc", Optional[str])])
+Entry = NamedTuple("Entry", [("type", LineType), ("content", str)])
+
+def parse_line(line: str) -> Optional[Entry]:
+    sline = line.strip()
+    if not sline:
+        return None
+
+    if sline == "MAINTAINERS":
+        return Entry(LineType.REPO, sline)
+
+    tag = line[:2]
+    if '@' in tag:
+        return Entry(LineType.REPO, sline[1:].split(":")[0].strip())
+    elif tag == 'M:':
+        return Entry(LineType.MAINTAINER, sline.split(":")[1].strip())
+    elif tag == 'R:':
+        return Entry(LineType.REVIEWER, sline.split(":")[1].strip())
+    elif tag == 'F:':
+        return Entry(LineType.FORKED, sline[2:].strip())
+    elif '#' in tag:
+        return Entry(LineType.COMMENT, line)
+
+    return None
+
+D = Union[str, List[Identity], List[str]]
+
+def parse_repo(content: str) -> str:
+    return content
+
+def parse_forked(content: str) -> str:
+    return content
+
+def parse_irc(src: Iterator[str]) -> Optional[str]:
+    irc = ""
+    for c in src:
+        if c == '#':
+            return None
+        if c == '<':
+            break
+    else:
+        return None
+
+    for c in src:
+        if c in '!#':
+            return irc.strip()
+        irc += c
+
+    raise ValueError("Unterminated IRC handle")
+
+def parse_address(src: Iterator[str]) -> str:
+    addr = ""
+    for c in src:
+        if c in '>#':
+            return addr.strip()
+        addr += c
+    raise ValueError("Unterminated email address")
+
+def parse_name(src: Iterator[str]) -> str:
+    name = ""
+    for c in src:
+        if c in '<#':
+            return name.strip()
+        name += c
+    raise ValueError("Unterminated name")
+
+def parse_email(src: Iterator[str]) -> Email:
+    name = parse_name(src)
+    address = parse_address(src)
+    return Email(name, address)
+
+def parse_identity(content: str) -> Identity:
+    ci = iter(content)
+    email = parse_email(ci)
+    irc = parse_irc(ci)
+    return Identity(email, irc)
+
+B = Dict[LineType, D]
+
+def parse_block(src: Iterator[str]) -> Optional[B]:
+    state = ParseState.BEGIN
+    repo: Dict[LineType, D] = OrderedDict()
+    for line in src:
+        try:
+            entry = parse_line(line)
+            if state == ParseState.BEGIN and not entry:
+                continue
+            elif state == ParseState.BEGIN and entry:
+                state = ParseState.BLOCK
+            elif state == ParseState.BLOCK and not entry:
+                return repo
+
+            assert entry
+
+            if entry.type == LineType.REPO:
+                repo[entry.type] = parse_repo(entry.content)
+            elif entry.type in { LineType.MAINTAINER, LineType.REVIEWER }:
+                if not entry.type in repo:
+                    repo[entry.type] = cast(List[Identity], list())
+                cast(list, repo[entry.type]).append(parse_identity(entry.content))
+            elif entry.type == LineType.FORKED:
+                repo[entry.type] = parse_forked(entry.content)
+            elif entry.type == LineType.COMMENT:
+                if not entry.type in repo:
+                    repo[entry.type] = cast(List[str], list())
+                cast(list, repo[entry.type]).append(entry.content)
+        except ValueError as e:
+            print("Failed to parse line '{}': {}".format(line.strip(), e))
+
+    if not repo:
+        return None
+
+    return repo
+
+def trash_preamble(src: Iterator[str]) -> None:
+    s = 0
+    for line in src:
+        sline = line.strip()
+        if "START OF MAINTAINERS LIST" == sline:
+            s = 1
+        if s == 1 and sline == "-------------------------":
+            break
+
+def parse_maintainers(src: Iterator[str]) -> Dict[D, B]:
+    maintainers: Dict[D, B] = OrderedDict()
+    trash_preamble(src)
+    while True:
+        repo: B = parse_block(src)
+        if not repo:
+            break
+        maintainers[repo[LineType.REPO]] = repo
+    return maintainers
+
+def assemble_name(name: str, dst: IO[str]) -> None:
+    dst.write(name)
+
+def assemble_address(address: str, dst: IO[str]) -> None:
+    dst.write("<")
+    dst.write(address)
+    dst.write(">")
+
+def assemble_email(email: Email, dst: IO[str]) -> None:
+    assemble_name(email.name, dst)
+    dst.write(" ")
+    assemble_address(email.address, dst)
+
+def assemble_irc(irc: Optional[str], dst: IO[str]) -> None:
+    if irc:
+        dst.write(" ")
+        dst.write("<")
+        dst.write(irc)
+        dst.write("!>")
+
+def assemble_identity(identity: Identity, dst: IO[str]) -> None:
+    assemble_email(identity.email, dst)
+    assemble_irc(identity.irc, dst)
+
+def assemble_maintainers(identities: List[Identity], dst: IO[str]) -> None:
+    for i in identities:
+        dst.write("M:  ")
+        assemble_identity(i, dst)
+        dst.write("\n")
+
+def assemble_reviewers(identities: List[Identity], dst: IO[str]) -> None:
+    for i in identities:
+        dst.write("R:  ")
+        assemble_identity(i, dst)
+        dst.write("\n")
+
+def assemble_forked(content: str, dst: IO[str]) -> None:
+    if content:
+        dst.write("F:  ")
+        dst.write(content)
+        dst.write("\n")
+
+def assemble_comment(content: List[str], dst: IO[str]) -> None:
+    dst.write("".join(content))
+
+def assemble_block(block: B, default: B, dst: IO[str]) -> None:
+    if LineType.COMMENT in block:
+        assemble_comment(cast(List[str], block[LineType.COMMENT]), dst)
+    if LineType.MAINTAINER in block:
+        maintainers = block[LineType.MAINTAINER]
+    else:
+        maintainers = default[LineType.MAINTAINER]
+    assemble_maintainers(cast(List[Identity], maintainers), dst)
+    if LineType.REVIEWER in block:
+        assemble_reviewers(cast(List[Identity], block[LineType.REVIEWER]), dst)
+    if LineType.FORKED in block:
+        assemble_forked(cast(str, block[LineType.FORKED]), dst)
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("maintainers", type=argparse.FileType('r'),
+                        default=sys.stdin)
+    parser.add_argument("output", type=argparse.FileType('w'),
+                        default=sys.stdout)
+    args = parser.parse_args()
+    blocks = parse_maintainers(args.maintainers)
+    for block in blocks.values():
+        print(block[LineType.REPO])
+        assemble_block(block, blocks['MAINTAINERS'], args.output)
+        print()
+
+if __name__ == "__main__":
+    main()
diff --git a/amboar/obmc-scripts/maintainers/requirements.txt b/amboar/obmc-scripts/maintainers/requirements.txt
new file mode 100644
index 0000000..579ac31
--- /dev/null
+++ b/amboar/obmc-scripts/maintainers/requirements.txt
@@ -0,0 +1,2 @@
+requests
+sh
diff --git a/amboar/obmc-scripts/maintainers/setup.py b/amboar/obmc-scripts/maintainers/setup.py
new file mode 100644
index 0000000..ab8b585
--- /dev/null
+++ b/amboar/obmc-scripts/maintainers/setup.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+from distutils.core import setup
+
+setup(name='obmc-gerrit',
+      version='0.1',
+      description='OpenBMC Gerrit wrapper',
+      author='Andrew Jeffery',
+      author_email='andrew@aj.id.au',
+      url='https://github.com/openbmc/openbmc-tools',
+      packages=['obmc'],
+      requires=['requests', 'sh'],
+      scripts=['obmc-gerrit'],
+     )
diff --git a/amboar/obmc-scripts/maintainers/split_maintainers.py b/amboar/obmc-scripts/maintainers/split_maintainers.py
new file mode 100755
index 0000000..bf0a143
--- /dev/null
+++ b/amboar/obmc-scripts/maintainers/split_maintainers.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+#
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2018 IBM Corp.
+
+import argparse
+import sh
+import os
+import maintainers
+from pprint import pprint
+import requests
+import json
+from typing import List, Dict, Union, cast, Iterator
+import sys
+import itertools
+
+git = sh.git.bake()
+
+mailmap = {
+    'andrewg@us.ibm.com' : 'geissonator@yahoo.com',
+}
+
+def gerrit_url(name: str, user: str) -> str:
+    return "ssh://{}@gerrit.openbmc-project.xyz:29418/openbmc/{}".format(user, name)
+
+def gerrit_push_args(reviewers: Iterator[maintainers.Identity]) -> str:
+    addrs = (i.email.address for i in reviewers)
+    maddrs = (mailmap[a] if a in mailmap else a for a in addrs)
+    return ','.join("r={}".format(ma) for ma in maddrs)
+
+def gerrit_push(name: str, user: str, reviewers: Iterator[maintainers.Identity]) -> None:
+    refspec = 'HEAD:refs/for/master/maintainers%{}'.format(gerrit_push_args(reviewers))
+    git.push(gerrit_url(name, user), refspec)
+
+def org_repos_url(name) -> str:
+    return "https://api.github.com/users/{}/repos?per_page=100".format(name)
+
+V = Union[Dict[str, str], str]
+E = Dict[str, V]
+R = List[E]
+
+def org_repos(name: str) -> R:
+    r = requests.get(org_repos_url(name))
+    if not r.ok:
+        raise ValueError("Bad organisation name")
+    return json.loads(r.text or r.content)
+
+def git_reset_upstream(name: str) -> None:
+    cwd = os.getcwd()
+    os.chdir(name)
+    git.fetch("origin")
+    git.reset("--hard", "origin/master")
+    os.chdir(cwd)
+
+def ensure_org_repo(name: str, user: str) -> str:
+    if os.path.exists(os.path.join(name, ".git")):
+        # git_reset_upstream(name)
+        pass
+    else:
+        git.clone(gerrit_url(name, user), name)
+    scp_src = "{}@gerrit.openbmc-project.xyz:hooks/commit-msg".format(user)
+    scp_dst = "{}/.git/hooks/".format(name)
+    sh.scp("-p", "-P", 29418, scp_src, scp_dst)
+    return name
+
+def repo_url(name: str) -> str:
+    return "https://github.com/openbmc/{}.git".format(name)
+
+def ensure_repo(name: str) -> str:
+    if os.path.exists(os.path.join(name, ".git")):
+        # git_reset_upstream(name)
+        pass
+    else:
+        git.clone(repo_url(name), name)
+    return name
+
+preamble_text = """\
+How to use this list:
+    Find the most specific section entry (described below) that matches where
+    your change lives and add the reviewers (R) and maintainers (M) as
+    reviewers. You can use the same method to track down who knows a particular
+    code base best.
+
+    Your change/query may span multiple entries; that is okay.
+
+    If you do not find an entry that describes your request at all, someone
+    forgot to update this list; please at least file an issue or send an email
+    to a maintainer, but preferably you should just update this document.
+
+Description of section entries:
+
+    Section entries are structured according to the following scheme:
+
+    X:  NAME <EMAIL_USERNAME@DOMAIN> <IRC_USERNAME!>
+    X:  ...
+    .
+    .
+    .
+
+    Where REPO_NAME is the name of the repository within the OpenBMC GitHub
+    organization; FILE_PATH is a file path within the repository, possibly with
+    wildcards; X is a tag of one of the following types:
+
+    M:  Denotes maintainer; has fields NAME <EMAIL_USERNAME@DOMAIN> <IRC_USERNAME!>;
+        if omitted from an entry, assume one of the maintainers from the
+        MAINTAINERS entry.
+    R:  Denotes reviewer; has fields NAME <EMAIL_USERNAME@DOMAIN> <IRC_USERNAME!>;
+        these people are to be added as reviewers for a change matching the repo
+        path.
+    F:  Denotes forked from an external repository; has fields URL.
+
+    Line comments are to be denoted "# SOME COMMENT" (typical shell style
+    comment); it is important to follow the correct syntax and semantics as we
+    may want to use automated tools with this file in the future.
+
+    A change cannot be added to an OpenBMC repository without a MAINTAINER's
+    approval; thus, a MAINTAINER should always be listed as a reviewer.
+
+START OF MAINTAINERS LIST
+-------------------------
+
+"""
+
+def generate_maintainers_change(name: str, block: maintainers.B,
+        default: maintainers.B, user: str) -> None:
+    cwd = os.getcwd()
+    os.chdir(name)
+    mpath = "MAINTAINERS"
+    try:
+        if os.path.exists(mpath):
+            print("{} already exists, skipping".format(mpath))
+            return
+        with open(mpath, 'w') as m:
+            m.write(preamble_text)
+            maintainers.assemble_block(block, default, m)
+        git.add(mpath)
+        git.commit("-s", "-m", "Add {} file".format(mpath), _out=sys.stdout)
+        with open(mpath, 'r') as m:
+            maintainers.trash_preamble(m)
+            block = maintainers.parse_block(m)
+            pprint(block)
+            audience = cast(List[maintainers.Identity],
+                    block[maintainers.LineType.MAINTAINER][:])
+            if maintainers.LineType.REVIEWER in block:
+                reviewers = cast(List[maintainers.Identity],
+                        block[maintainers.LineType.REVIEWER])
+                audience.extend(reviewers)
+            gerrit_push(name, user, iter(audience))
+    finally:
+        os.chdir(cwd)
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--organisation", type=str, default="openbmc")
+    parser.add_argument("--user", type=str, default="amboar")
+    args = parser.parse_args()
+    ensure_repo("docs")
+    with open('docs/MAINTAINERS', 'r') as mfile:
+        mast = maintainers.parse_maintainers(mfile)
+
+    # Don't leak the generic comment into the repo-specific MAINTAINERS file
+    del mast['MAINTAINERS'][maintainers.LineType.COMMENT]
+
+    for e in org_repos(args.organisation):
+        print("Ensuring MAINTAINERS for {}".format(e['name']))
+        name = cast(str, e['name'])
+        try:
+            ensure_org_repo(name, args.user)
+            default = mast['MAINTAINERS']
+            block = mast[name] if name in mast else default
+            if not maintainers.LineType.FORKED in block:
+                generate_maintainers_change(name, block, default, args.user)
+        except sh.ErrorReturnCode_128:
+            print("{} has not been imported into Gerrit, skipping".format(name))
+        print()
+
+if __name__ == "__main__":
+    main()