From e1f4b9b6193842861b9daca43298793aa74e6fd7 Mon Sep 17 00:00:00 2001 From: larsk Date: Wed, 15 Aug 2012 17:17:26 +0100 Subject: [PATCH] Added hgstat.py --- hgstat/hgstat.py.txt | 306 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 306 insertions(+) create mode 100644 hgstat/hgstat.py.txt diff --git a/hgstat/hgstat.py.txt b/hgstat/hgstat.py.txt new file mode 100644 index 0000000..d1dfd5c --- /dev/null +++ b/hgstat/hgstat.py.txt @@ -0,0 +1,306 @@ +#!/usr/bin/python + +''' + +hg history -v | hgstat.py + +''' + +import sys, string, re, os, rfc822, time + +found_first_email = 0 +found_first_signed_off = 0 +first_email = '' +first_signed_off = '' +merge = '' +parents = 0 +cset = 'n' + +stats = {} +stats2 = {} +t_real = t_merges = 0 + +alias_list = [ [ 'steven@xensource.com', 'steve@xensource.com' ], + [ 'steven.smith@xensource.com','sos22@cam.ac.uk', 'sos22@cam.ac.uk.', 'sos22@cl.cam.ac.uk', 'sos22@cl.cam.ac.uk.', 'steven.smith@cl.cam.ac.uk','ssmith@xensource.com' ], + [ 'sean.dague@ibm.com', 'sean@dague.net' ], + [ 'rusty.russel@ibm.com','rusty@rustcorp.com.au' ], + [ 'jyoung5@us.ibm.com','jerone@gmail.com' ], + [ 'gerd.knorr@novell.com','kraxel@suse.de', 'kraxel@bytesex.org' ], + [ 'christian@xensource.com','christian.limpach@cl.cam.ac.uk','c@pin.lu','cl349@cl.cam.ac.uk','limpach@cl.cam.ac.uk','cl@netbsd.org', 'christian.limpach@xensource.com' ], + [ 'ian@xensource.com', 'ian@xensoure.com', 'ian.pratt@cl.cam.ac.uk', 'iab@xensource.com' ], + [ 'kmacy@fsmware.com', 'kmacy@netapp.com' ], + [ 'keir@xensource.com','keir.fraser@cl.cam.ac.uk'], + [ 'kurt.garloff@novell.com','garloff@suse.de'], + [ 'mark.williamson@xensource.com', 'mark.williamson@cl.cam.ac.uk' ], + [ 'andrew.warfield@xensource.com', 'andrew.warfield@cl.cam.ac.uk','akw27@cl.cam.ac.uk'], + [ 'xin.b.li@intel.com', 'xin.bi.li@intel.com', 'xin..b.li@intel.com' ], + [ 'vincent@xensource.com', 'vincent@snarc.org' ], + [ 'bthomas@virtualiron.com', 'bjthomas3@gmail.com' ], + [ 'herbert@redhat.com', 'herbert@gondor.apana.org.au' ], + [ 'gdunlap@xensource.com', 'dunlapg@umich.edu' ], + [ 'leendert@us.ibm.com', 'leendert@watson.ibm.com' ], + [ 'muli@il.ibm.com', 'mulix@mulix.org' ], + [ 'ian.campbell@xensource.com', 'ian.campbell@xesource.com' ], + [ 'hollisb@us.ibm.com' ,'hollis@us.ibm.com' ] + ] + + +def diffstat ( cset ): + files = ins = dels = 0 + fd = os.popen( '/bin/bash -c "hg export %s | diffstat"' % cset ) + while 1: + line = fd.readline() + if len(line) == 0: break + #r = re.match('^ (\d+) files changed, (\d+) insertions?\(\+\), (\d+) deletions?\(-\)$', line) + r = re.match('^ (\d+) files changed,', line) + if r: + files = r.group(1) + ins = 0 + dels = 0 + r = re.match('.*?(\d+) insertion.*', line) + if r: + ins = r.group(1) + r = re.match('.*?(\d+) deletion.*', line) + if r: + dels = r.group(1) + return (int(files),int(ins),int(dels)) + +def diffstat2 ( cset ): + files = ins = dels = 0 + #fd = os.popen( '/bin/bash -c "hg export %s | diffstat -p1 -f0 | grep \| | grep -v ia64 | grep -v tpmfront | grep -v tpmback | grep -v vmx | grep -v acm | grep -v vtpm | grep -v security | grep -v firmware | grep -v ioemu"' % cset ) + #fd = os.popen( '/bin/bash -c "hg export %s | diffstat -p1 -f0 | grep \| | grep tools/libxc"' % cset ) + fd = os.popen( '/bin/bash -c "hg export %s | diffstat -p1 -f0 | grep \|"' % cset ) + + while 1: + line = fd.readline() + if len(line) == 0: break + parts1 = string.split(line,'|') + parts = string.split(parts1[1]) + files = files + 1 + ins = ins + int(parts[1]) + dels = dels + int(parts[3]) + print int(files),int(ins),int(dels) + return (int(files),int(ins),int(dels)) + +#diffstat2(5371) +#diffstat2(5370) + +eof=0 +date = 0 + +date_cset = {} +date_step = 24*3600*7 + +while 1: + if eof == 1: break + line = sys.stdin.readline() + if len(line) == 0: eof=1 + + r = re.match('^changeset: (\d+):([0-9a-f]+)$', line) + if eof or r : + + if cset and parents < 2 and merge == 'n': + + t_real = t_real+1 + + if not date_cset.has_key( date ): + date_cset[date] = (1,0) + else: + (a,b) = date_cset[date] + date_cset[date] = (a+1,b) + + (files,ins,dels) = diffstat2( cset ) + m1 = ins + m2 = max(ins-dels,0) + + # print stats from the last chageset + print 'cset:%s Merge:%s%s %s:%s:%s %d:%d Signed:%s Author:%s' % (cset, merge, parents>1 and 'y' or 'n', files, ins, dels, m1, m2, first_signed_off, first_email ) + + author = first_signed_off + if author == '': + author = 'unknown@unknown.com' + if not stats.has_key( author ): + stats[author] = (0,0,0,0) + (aliases,cins,tm1,tm2) = stats[author] + if m1 > 0: + stats[author] = (aliases, cins+1, tm1+m1, tm2+m2) + + else: + t_merges = t_merges+1 + + if not date_cset.has_key( date ): + date_cset[date] = (0,1) + else: + (a,b) = date_cset[date] + date_cset[date] = (a,b+1) + + if not eof: + cset = r.group(1) + cset_long = r.group(2) + #print 'XXX:', line, 'YYYY', r.group(1), '***', r.group(2) + found_first_email = 0 + first_email = '' + found_first_signed_off = 0 + first_signed_off = '' + merge = 'n' + parents = 0 + + #print '-----------------', cset + continue + + r = re.match('^user: (.*)$', line) + if r: + user = r.group(1) + continue + + r = re.match('^parent: (.*)$', line) + if r: + parents = parents + 1 + continue + + r = re.match('^date: (.*)$', line) + if r: + date = int(time.mktime(rfc822.parsedate(r.group(1))) / date_step) + #print 'XXXXX %s ::: %d/%d/%d' % (r.group(1), time.gmtime(date*date_step)[0], time.gmtime(date*date_step)[1], time.gmtime(date*date_step)[2]) + continue + + line = string.lower(line) + + r = re.match('^merge', line) + if r: + merge = 'y' + + r = re.match('^manual', line) + if r: + merge = 'y' + + if not found_first_email: + r = re.match('.*?([a-z0-9._+]+@[a-z0-9._+]+).*', line) + if r: + first_email = string.strip(r.group(1)) + found_first_email = 1 + #print r.group(1) + + if not found_first_signed_off: + r = re.match('^signed-off-by:.*?([a-z0-9._+]+@[a-z0-9._+]+).*', line) + if r: + first_signed_off = string.strip(r.group(1)) + found_first_signed_off = 1 + #print 'SSS', r.group(1) + + +print '############################################################################' +print 't_real=%d t_merges=%d' % (t_real, t_merges) + +keys = stats.keys() +keys.sort() + +for k in keys: + (aliases, cins,m1,m2) = stats[k] + print '%s cins=%d m1=%d m2=%d' % (k,cins,m1,m2) + +print '############################################################################' + + +for user in alias_list: + primary = user[0] + (aliases,cins,m1,m2) = (0,0,0,0) + if stats.has_key(primary): + (aliases,cins,m1,m2) = stats[primary] + for a in user[1:]: + if stats.has_key(a): + (aliases,xcins,xm1,xm2) = stats[a] + (aliases,cins,m1,m2) = (aliases+1,cins+xcins,m1+xm1,m2+xm2) + del stats[a] + if cins > 0: + stats[primary] = (aliases,cins,m1,m2) + +print '############################################################################' + +keys = stats.keys() +keys.sort() + +for k in keys: + (aliases,cins,m1,m2) = stats[k] + print '% 16s aliases=% 4d cins=% 4d ins=% 6d ext=% 6d' % (k,aliases,cins,m1,m2) + stats[k] = (0,cins,m1,m2) + +print '############################################################################' + +keys = stats.keys() +for key in keys: + r = re.match('.*[@.]([^.]+[.][^.]+)', key) + if r: + #print '==',r.group(1) + primary = r.group(1) + + (aliases,cins,m1,m2) = (0,0,0,0) + if stats2.has_key(primary): + (aliases,cins,m1,m2) = stats2[primary] + + (xaliases,xcins,xm1,xm2) = stats[key] + stats2[primary] = (aliases+xaliases+1,cins+xcins,m1+xm1,m2+xm2) + #del stats[key] + #print 'del:', key, primary, (xaliases,xcins,xm1,xm2) + +print '############################################################################' + +print 'total checkins= %d' % (t_real) + +keys2 = stats2.keys() + +for k in keys2: + (aliases,cins,m1,m2) = stats2[k] + print '\n%- 16s aliases=% 4d cins=% 4d ins=% 7d ext=% 7d' % (k,aliases,cins,m1,m2) + #print '\n%- 16s aliases=% 4d cins=% 4d ins=% 7d' % (k,aliases,cins,m1) + for j in keys: + r = re.match('.*[@.]([^.]+[.][^.]+)', j) + if r: + primary = r.group(1) + if primary == k: + (aliases,cins,m1,m2) = stats[j] + print '\t%- 22s cins=% 4d ins=% 7d ext=% 7d' % (string.split(j,'@')[0],cins,m1,m2) + #print '\t%- 22s cins=% 4d ins=% 7d' % (string.split(j,'@')[0],cins,m1) + +print '############################################################################' + + +for k in keys2: + (aliases,cins,m1,m2) = stats2[k] + print '%- 16s aliases=% 4d cins=% 4d ins=% 7d ext=% 7d' % (k,aliases,cins,m1,m2) + #print '\n%- 16s aliases=% 4d cins=% 4d ins=% 7d' % (k,aliases,cins,m1) + for j in keys: + r = re.match('.*[@.]([^.]+[.][^.]+)', j) + if r: + primary = r.group(1) + if primary == k: + (aliases,cins,m1,m2) = stats[j] + pass + #print '\t%- 22s cins=% 4d ins=% 7d ext=% 7d' % (string.split(j,'@')[0],cins,m1,m2) + #print '\t%- 22s cins=% 4d ins=% 7d' % (string.split(j,'@')[0],cins,m1) + +print '############################################################################' + + +print 'checkins over time, real and merges' + +keys = date_cset.keys() +keys.sort() + +for k in keys: + f = time.gmtime(k*date_step) + print '%d/%d/%d %d %d' % (f[0],f[1],f[2], date_cset[k][0], date_cset[k][1]) + +print + + + + + + + + + + + + -- 2.39.5