From: Jonathan Corbet Date: Fri, 6 Apr 2012 22:00:04 +0000 (-0600) Subject: Add version tracking support and an "unknown hackers" report X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=1e293bc90a2a0d53340c96e74c9391049f4e7165;p=people%2Flarsk%2Fxenproject-org-gitdm.git Add version tracking support and an "unknown hackers" report Version tracking was used to see who had contributed to the most kernel releases; not sure it's a long-term-useful feature. The unknown hackers report helps when trying to improve the database. Signed-off-by: Jonathan Corbet --- diff --git a/gitdm b/gitdm index 1367ebd..25ffafb 100755 --- a/gitdm +++ b/gitdm @@ -5,8 +5,8 @@ # # This code is part of the LWN git data miner. # -# Copyright 2007-11 Eklektix, Inc. -# Copyright 2007-11 Jonathan Corbet +# Copyright 2007-12 Eklektix, Inc. +# Copyright 2007-12 Jonathan Corbet # Copyright 2011 Germán Póo-Caamaño # # This file may be distributed under the terms of the GNU General @@ -43,6 +43,7 @@ DirName = '' Aggregate = 'month' Numstat = 0 ReportByFileType = 0 +ReportUnknowns = False # # Options: @@ -60,6 +61,7 @@ ReportByFileType = 0 # -r pattern Restrict to files matching pattern # -s Ignore author SOB lines # -u Map unknown employers to '(Unknown)' +# -U Dump unknown hackers in report # -x file.csv Export raw statistics as CSV # -w Aggregrate the raw statistics by weeks instead of months # -y Aggregrate the raw statistics by years instead of months @@ -69,9 +71,9 @@ def ParseOpts (): global MapUnknown, DevReports global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB global CFName, CSVFile, CSVPrefix,DirName, Aggregate, Numstat - global ReportByFileType + global ReportByFileType, ReportUnknowns - opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:stuwx:yz') + opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:stUuwx:yz') for opt in opts: if opt[0] == '-a': AkpmOverLt = 1 @@ -102,6 +104,8 @@ def ParseOpts (): ReportByFileType = 1 elif opt[0] == '-u': MapUnknown = 1 + elif opt[0] == '-U': + ReportUnknowns = True elif opt[0] == '-x': CSVFile = open (opt[1], 'w') print "open output file " + opt[1] + "\n" @@ -492,6 +496,8 @@ if CSVFile: if DevReports: reports.DevReports (hlist, TotalChanged, CSCount, TotalRemoved) +if ReportUnknowns: + reports.ReportUnknowns(hlist, CSCount) reports.EmplReports (elist, TotalChanged, CSCount) if ReportByFileType and Numstat: diff --git a/patterns.py b/patterns.py index 803e532..4d4a347 100644 --- a/patterns.py +++ b/patterns.py @@ -21,7 +21,8 @@ import re _pemail = r'\s+"?([^<"]+)"?\s<([^>]+)>' # just email addr + name patterns = { - 'commit': re.compile (r'^commit ([0-9a-f ]+)$'), + 'tagcommit': re.compile (r'^commit ([\da-f]+) .*tag: (v[23]\.\d(\.\d\d?)?)'), + 'commit': re.compile (r'^commit ([0-9a-f ]+)'), 'author': re.compile (r'^Author:' + _pemail + '$'), 'signed-off-by': re.compile (r'^\s+Signed-off-by:' + _pemail + '.*$'), 'merge': re.compile (r'^Merge:.*$'), diff --git a/reports.py b/reports.py index 9b8cce9..bc1e18c 100644 --- a/reports.py +++ b/reports.py @@ -3,8 +3,8 @@ # # This code is part of the LWN git data miner. # -# Copyright 2007-11 Eklektix, Inc. -# Copyright 2007-11 Jonathan Corbet +# Copyright 2007-12 Eklektix, Inc. +# Copyright 2007-12 Jonathan Corbet # # This file may be distributed under the terms of the GNU General # Public License, version 2. @@ -58,6 +58,10 @@ TRow = ''' %s%d%.1f%% ''' +TRowStr = ''' +%s%d%s +''' + def ReportLine (text, count, pct): global HTMLclass if count == 0: @@ -67,6 +71,15 @@ def ReportLine (text, count, pct): HTMLfile.write (TRow % (HClasses[HTMLclass], text, count, pct)) HTMLclass ^= 1 +def ReportLineStr (text, count, extra): + global HTMLclass + if count == 0: + return + Outfile.write ('%-25s %4d %s\n' % (text, count, extra)) + if HTMLfile: + HTMLfile.write (TRowStr % (HClasses[HTMLclass], text, count, extra)) + HTMLclass ^= 1 + def EndReport (): if HTMLfile: HTMLfile.write ('\n\n') @@ -284,6 +297,34 @@ def ReportByRepCreds (hlist): break EndReport () +# +# Versions. +# +def CompareVersionCounts (h1, h2): + if h1.versions and h2.versions: + return len (h2.versions) - len (h1.versions) + if h2.versions: + return 1 + if h1.versions: + return -1 + return 0 + +def MissedVersions (hv, allv): + missed = [v for v in allv if v not in hv] + missed.reverse () + return ' '.join (missed) + +def ReportVersions (hlist): + hlist.sort (CompareVersionCounts) + BeginReport ('Developers represented in the most kernel versions') + count = 0 + allversions = hlist[0].versions + for h in hlist: + ReportLineStr (h.name, len (h.versions), MissedVersions (h.versions, allversions)) + count += 1 + if count >= ListCount: + break + EndReport () def CompareESOBs (e1, e2): @@ -341,6 +382,33 @@ def EmplReports (elist, totalchanged, cscount): ReportByESOBs (elist) ReportByEHackers (elist) +# +# Who are the unknown hackers? +# +def IsUnknown(h): + empl = h.employer[0][0][1].name + return h.email[0] == empl or empl == '(Unknown)' + +def ReportUnknowns(hlist, cscount): + # + # Trim the list to just the unknowns; try to work properly whether + # mapping to (Unknown) is happening or not. + # + ulist = [ h for h in hlist if IsUnknown(h) ] + ulist.sort(ComparePCount) + count = 0 + BeginReport('Developers with unknown affiliation') + for h in ulist: + pcount = len(h.patches) + if pcount > 0: + ReportLine(h.name, pcount, (pcount*100.0)/cscount) + count += 1 + if count >= ListCount: + break + EndReport() + + + def ReportByFileType (hacker_list): total = {} total_by_hacker = {}