tof-voters: add commit analysis subcommand

Signed-off-by: Patrick Williams <patrick@stwcx.xyz>
Change-Id: I6e56202c014a52d16773415edf59d4e6a7ecdf59
diff --git a/tof-voters/libvoters/acceptable.py b/tof-voters/libvoters/acceptable.py
new file mode 100644
index 0000000..2e350e1
--- /dev/null
+++ b/tof-voters/libvoters/acceptable.py
@@ -0,0 +1,68 @@
+#!/usr/bin/python3
+import re
+
+
+def project(name: str) -> bool:
+    reject_regex = [
+        ".*-oem",
+        "openbmc/ibm-.*",
+        "openbmc/intel-.*",
+        "openbmc/openpower-.*",
+        "openbmc/google-.*",
+        "openbmc/meta-.*",
+    ]
+
+    reject_repo = [
+        "openbmc/s2600wf-misc",
+    ]
+
+    for r in reject_repo:
+        if r == name:
+            return False
+
+    for r in reject_regex:
+        if re.match(r, name):
+            return False
+
+    return True
+
+
+def file(proj: str, filename: str) -> bool:
+    reject_regex = {
+        "all": [
+            ".*/google/",
+            ".*/ibm/",
+            ".*/intel/",
+            "MAINTAINERS",
+            "OWNERS",
+            "ibm-.*",
+            "ibm_.*",
+        ],
+        "openbmc/entity-manager": ["configurations/.*"],
+        "openbmc/libmctp": ["docs/bindings/vendor-.*"],
+        "openbmc/openbmc": ["meta-(?!phosphor).*", "poky/.*"],
+        "openbmc/openbmc-test-automation": ["oem/.*", "openpower/.*"],
+        "openbmc/phosphor-debug-collector": [
+            "dump-extensions/.*",
+            "tools/dreport.d/ibm.d/.*",
+        ],
+        "openbmc/phosphor-fan-presence": [".*/config_files/.*"],
+        "openbmc/phosphor-power": [".*/config_files/.*"],
+        "openbmc/phosphor-led-manager": ["configs/.*"],
+        "openbmc/phosphor-logging": [".*/openpower-pels/.*"],
+        "openbmc/webui-vue": [
+            "src/env/.*",
+        ],
+    }
+
+    reject_files = ["/COMMIT_MSG"]
+
+    for r in reject_files:
+        if r == filename:
+            return False
+
+    for r in reject_regex.get(proj, []) + reject_regex.get("all", []):
+        if re.match(r, filename):
+            return False
+
+    return True
diff --git a/tof-voters/libvoters/entry_point.py b/tof-voters/libvoters/entry_point.py
index e36d06e..87ed67b 100644
--- a/tof-voters/libvoters/entry_point.py
+++ b/tof-voters/libvoters/entry_point.py
@@ -4,7 +4,7 @@
 from importlib import import_module
 from typing import List
 
-subcommands = ["dump-gerrit"]
+subcommands = ["analyze-commits", "dump-gerrit"]
 
 
 def main() -> int:
diff --git a/tof-voters/libvoters/subcmd/analyze-commits.py b/tof-voters/libvoters/subcmd/analyze-commits.py
new file mode 100644
index 0000000..4717991
--- /dev/null
+++ b/tof-voters/libvoters/subcmd/analyze-commits.py
@@ -0,0 +1,104 @@
+#!/usr/bin/python3
+
+import argparse
+import json
+import libvoters.acceptable as acceptable
+import os
+import re
+from collections import defaultdict
+from libvoters.time import timestamp, TimeOfDay
+from typing import Any, Dict
+
+
+class subcmd:
+    def __init__(self, parser: argparse._SubParsersAction) -> None:
+        p = parser.add_parser(
+            "analyze-commits", help="Determine points for commits"
+        )
+
+        p.add_argument(
+            "--before",
+            "-b",
+            help="Before timestamp (YYYY-MM-DD)",
+            required=True,
+        )
+        p.add_argument(
+            "--after",
+            "-a",
+            help="After timestamp (YYYY-MM-DD)",
+            required=True,
+        )
+
+        p.set_defaults(cmd=self)
+
+    def run(self, args: argparse.Namespace) -> int:
+        before = timestamp(args.before, TimeOfDay.AM)
+        after = timestamp(args.after, TimeOfDay.PM)
+
+        changes_per_user: Dict[str, list[int]] = defaultdict(list)
+
+        for f in sorted(os.listdir(args.dir)):
+            path = os.path.join(args.dir, f)
+            if not os.path.isfile(path):
+                continue
+
+            if not re.match("[0-9]*\.json", f):
+                continue
+
+            with open(path, "r") as file:
+                data = json.load(file)
+
+            if data["status"] != "MERGED":
+                continue
+
+            merged_at = 0
+            for c in data["comments"]:
+                if "timestamp" not in c:
+                    continue
+                if "message" in c and re.match(
+                    "Change has been successfully .*", c["message"]
+                ):
+                    merged_at = c["timestamp"]
+
+            if merged_at == 0:
+                raise RuntimeError(f"Missing merge timestamp on {f}")
+
+            if merged_at > before or merged_at < after:
+                continue
+
+            project = data["project"]
+            id_number = data["number"]
+            user = data["owner"]["username"]
+
+            if not acceptable.project(project):
+                print("Rejected project:", project, id_number)
+                continue
+
+            changes = 0
+            touched_files = []
+            for file_data in sorted(
+                data["patchSets"], key=lambda x: x["number"]
+            )[-1][
+                "files"
+            ]:  # type: Dict[str, Any]
+                if not acceptable.file(project, file_data["file"]):
+                    continue
+                changes += int(file_data["insertions"]) + abs(
+                    int(file_data["deletions"])
+                )
+                touched_files.append(file_data["file"])
+
+            if changes < 10:
+                print("Rejected for limited changes:", project, id_number)
+                continue
+
+            print(project, id_number, user)
+            for f in touched_files:
+                print(f"    {f}")
+
+            changes_per_user[user].append(id_number)
+
+        with open(os.path.join(args.dir, "commits.json"), "w") as outfile:
+            outfile.write(json.dumps(changes_per_user, indent=4))
+
+        return 0
diff --git a/tof-voters/libvoters/time.py b/tof-voters/libvoters/time.py
new file mode 100644
index 0000000..efcee29
--- /dev/null
+++ b/tof-voters/libvoters/time.py
@@ -0,0 +1,23 @@
+#!/usr/bin/python3
+from enum import Enum
+from datetime import datetime, timezone
+
+
+class TimeOfDay(Enum):
+    AM = 0
+    PM = 1
+
+
+def timestamp(date: str, time: TimeOfDay) -> int:
+    [year, month, day] = [int(x) for x in date.split("-")]
+
+    if time == TimeOfDay.AM:
+        [hour, minute, second] = [00, 00, 00]
+    else:
+        [hour, minute, second] = [23, 59, 59]
+
+    return int(
+        datetime(
+            year, month, day, hour, minute, second, tzinfo=timezone.utc
+        ).timestamp()
+    )