Rewrite DCO check in python 40/61840/18
authorDW Talton <dtalton@contractor.linuxfoundation.org>
Wed, 25 Sep 2019 17:38:57 +0000 (10:38 -0700)
committerDW Talton <dtalton@contractor.linuxfoundation.org>
Mon, 28 Oct 2019 22:56:47 +0000 (15:56 -0700)
Move to python for more control and readability

Issue: RELENG-2374
Signed-off-by: DW Talton <dtalton@contractor.linuxfoundation.org>
Change-Id: If3c60718579fcb8f966c36fa212d23360c637864

lftools/cli/dco.py
lftools/shell/__init__.py [new file with mode: 0644]
lftools/shell/dco.py [new file with mode: 0644]
setup.cfg
shell/dco [deleted file]

index 7bd0cb4..4b1ec5e 100644 (file)
@@ -9,11 +9,12 @@
 ##############################################################################
 """Script to check a git repository for commits missing DCO."""
 
-import subprocess
 import sys
 
 import click
 
+from lftools.shell import dco as dco_checker
+
 
 @click.group()
 @click.pass_context
@@ -36,7 +37,7 @@ def check(ctx, repo_path):
     """
     if not repo_path:
         repo_path = "."
-    status = subprocess.call(['dco', 'check', repo_path])
+    status = dco_checker.check(repo_path)
     sys.exit(status)
 
 
@@ -44,7 +45,7 @@ def check(ctx, repo_path):
 @click.argument('repo-path', required=False)
 @click.pass_context
 def match(ctx, repo_path):
-    """Check repository for commits whose DCO does not match the commit author's email.
+    """Check for commits whose DCO does not match the commit author's email.
 
     This check will exclude merge commits and empty commits.
     It operates in your current working directory which has to
@@ -54,7 +55,7 @@ def match(ctx, repo_path):
     """
     if not repo_path:
         repo_path = "."
-    status = subprocess.call(['dco', 'match', repo_path])
+    status = dco_checker.match(repo_path)
     sys.exit(status)
 
 
diff --git a/lftools/shell/__init__.py b/lftools/shell/__init__.py
new file mode 100644 (file)
index 0000000..b75d42d
--- /dev/null
@@ -0,0 +1,11 @@
+# -*- code: utf-8 -*-
+# SPDX-License-Identifier: EPL-1.0
+##############################################################################
+# Copyright (c) 2019 The Linux Foundation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+##############################################################################
+"""Placeholder docstring for PyDocStyleBear."""
diff --git a/lftools/shell/dco.py b/lftools/shell/dco.py
new file mode 100644 (file)
index 0000000..dc9b96d
--- /dev/null
@@ -0,0 +1,114 @@
+# -*- code: utf-8 -*-
+# SPDX-License-Identifier: EPL-1.0
+##############################################################################
+# Copyright (c) 2019 The Linux Foundation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+##############################################################################
+"""Functions for DCO check tasks."""
+
+import logging
+from os import chdir
+from os import getcwd
+import re
+import subprocess  # nosec
+
+log = logging.getLogger(__name__)
+
+
+def get_branches(path=getcwd(), invert=False):
+    """Get a list of branches."""
+    if invert:
+        invert = '--invert-grep'
+    else:
+        invert = ''
+    chdir(path)
+    try:
+        branches = subprocess.check_output(  # nosec
+            "git branch -r | grep -v origin/HEAD", shell=True)\
+            .decode(encoding="UTF-8") \
+            .splitlines()
+        hashlist = []
+        for branch in branches:
+            branch = branch.strip()
+            hashes = subprocess.check_output(  # nosec
+                'git log {} --no-merges --pretty="%H %ae" --grep "Signed-off-by" {}'  # noqa
+                .format(branch, invert), shell=True)\
+                .decode(encoding="UTF-8")\
+                .split('\n')
+            hashlist = hashlist + hashes
+        if hashlist:
+            # remove a trailing blank list entry
+            hashlist.pop()
+            return hashlist
+        else:
+            return False
+    except subprocess.CalledProcessError as e:
+        log.error(e)
+        exit(1)
+
+
+def check(path=getcwd()):
+    """Check repository for commits missing DCO."""
+    chdir(path)
+    try:
+        hashes = get_branches(path, invert=True)
+        if not hashes:
+            exit(0)
+        else:
+            missing = []
+            for commit in hashes:
+                if commit:
+                    missing.append(commit.split(' ')[0])
+
+            if missing:
+                # de-dupe the commit list
+                missing_list = list(dict.fromkeys(missing))
+                for commit in missing_list:
+                    log.info("{}".format(commit))
+                exit(1)
+    except subprocess.CalledProcessError as e:
+        log.error(e)
+        exit(1)
+
+
+def match(path=getcwd()):
+    """Check for commits where DCO does not match the commit author's email."""
+    chdir(path)
+    try:
+        hashes = get_branches(path)
+        exit_code = 0
+        if not hashes:
+            exit(exit_code)
+        else:
+            for commit in hashes:
+                commit_id = commit.split(' ')[0]
+                if commit_id:
+                    commit_log_message = subprocess.check_output(  # nosec
+                        "git log --format=%B -n 1 {}"
+                        .format(commit_id), shell=True)\
+                        .decode(encoding="UTF-8")
+                    commit_author_email = subprocess.check_output(  # nosec
+                        "git log --format='%ae' {}^!"
+                        .format(commit_id), shell=True)\
+                        .decode(encoding="UTF-8").strip()
+                    sob_email_regex = '(?=Signed\-off\-by: )*[\<](.*)[\>]'  # noqa
+                    sob_results = re.findall(sob_email_regex,
+                                             commit_log_message)
+
+                    if commit_author_email in sob_results:
+                        continue
+                    else:
+                        log.info("For commit ID {}: \n\tCommitter is {}"
+                                 "\n\tbut commit is signed off by {}\n"
+                                 .format(commit_id,
+                                         commit_author_email,
+                                         sob_results))
+                        exit_code = 1
+            exit(exit_code)
+    except subprocess.CalledProcessError as e:
+        log.error(e)
+        exit(1)
index 8d02002..a18574c 100644 (file)
--- a/setup.cfg
+++ b/setup.cfg
@@ -34,7 +34,6 @@ openstack =
 [files]
 packages = lftools
 scripts =
-    shell/dco
     shell/deploy
     shell/gerrit_create
     shell/inactivecommitters
diff --git a/shell/dco b/shell/dco
deleted file mode 100755 (executable)
index c8992cb..0000000
--- a/shell/dco
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: EPL-1.0
-##############################################################################
-# Copyright (c) 2018 The Linux Foundation and others.
-#
-# All rights reserved. This program and the accompanying materials
-# are made available under the terms of the Eclipse Public License v1.0
-# which accompanies this distribution, and is available at
-# http://www.eclipse.org/legal/epl-v10.html
-##############################################################################
-
-# This script determines if a git repo contains commits missing DCO.
-# It operates in your current working directory which must be a git repo.
-# Alternatively you can pass it a path to a git repo.
-
-function dcocheck {
-  REPO_PATH="$1"
-  cd "$REPO_PATH" || exit 1
-
-  status=0
-  while IFS= read -r -a line; do
-      # shellcheck disable=SC2128
-      my_array+=( "$line" )
-      done < <( git branch -r | grep -v origin/HEAD )
-  for branch in "${my_array[@]}"
-  do
-      status=1
-      branch=$(echo "$branch" | xargs)
-      echo "Checking commits in branch $branch for commits missing DCO..."
-      git log "$branch" --no-merges --pretty="%H %ae" --grep 'Signed-off-by' --invert-grep -- | \
-          while read -r results; do
-            commit_hash="$(echo "$results" | cut -d' ' -f1)"
-            >&2 echo "$commit_hash is missing Signed-off-by line."
-          done
-  done
-  exit $status
-}
-
-function dcomatch {
-  REPO_PATH="$1"
-  cd "$REPO_PATH" || exit 1
-
-  status=0
-  while IFS= read -r -a line; do
-      # shellcheck disable=SC2128
-      my_array+=( "$line" )
-      done < <( git branch -r | grep -v origin/HEAD )
-  for branch in "${my_array[@]}"
-  do
-      status=1
-      branch=$(echo "$branch" | xargs)
-      git log "$branch" --no-merges --pretty="%H %ae" --grep 'Signed-off-by' -- | \
-          while read -r results; do
-            commit_hash="$(echo "$results" | cut -d' ' -f1)"
-            author_email="$(echo "$results" | cut -d' ' -f2)"
-            sob="$(git show --quiet "$commit_hash" | grep -oP '(?=Signed\-off\-by: )[\s\S]*[\<](.*)[\>]')"
-            if [[ "$sob" != *"$author_email"* ]] ; then
-              >&2 echo "$commit_hash author is $author_email and DCO is $sob"
-              if [ "$sob" = "" ] ; then
-                >&2 echo "NOTE: If DCO is empty, then the commit is likely signed with a name but no email address"
-              fi
-            fi
-          done
-  done
-  exit $status
-}
-
-case "$1" in
-  match) dcomatch "$2" ;;
-  check) dcocheck "$2" ;;
-esac