--- /dev/null
+#!/usr/bin/python
+
+'''
+
+hg history -v | hgstat.py
+
+'''
+
+import sys, string, re, os, rfc822, time
+
+found_first_email = 0
+found_first_signed_off = 0
+first_email = ''
+first_signed_off = ''
+merge = ''
+parents = 0
+cset = 'n'
+
+stats = {}
+stats2 = {}
+t_real = t_merges = 0
+
+alias_list = [ [ 'steven@xensource.com', 'steve@xensource.com' ],
+ [ 'steven.smith@xensource.com','sos22@cam.ac.uk', 'sos22@cam.ac.uk.', 'sos22@cl.cam.ac.uk', 'sos22@cl.cam.ac.uk.', 'steven.smith@cl.cam.ac.uk','ssmith@xensource.com' ],
+ [ 'sean.dague@ibm.com', 'sean@dague.net' ],
+ [ 'rusty.russel@ibm.com','rusty@rustcorp.com.au' ],
+ [ 'jyoung5@us.ibm.com','jerone@gmail.com' ],
+ [ 'gerd.knorr@novell.com','kraxel@suse.de', 'kraxel@bytesex.org' ],
+ [ 'christian@xensource.com','christian.limpach@cl.cam.ac.uk','c@pin.lu','cl349@cl.cam.ac.uk','limpach@cl.cam.ac.uk','cl@netbsd.org', 'christian.limpach@xensource.com' ],
+ [ 'ian@xensource.com', 'ian@xensoure.com', 'ian.pratt@cl.cam.ac.uk', 'iab@xensource.com' ],
+ [ 'kmacy@fsmware.com', 'kmacy@netapp.com' ],
+ [ 'keir@xensource.com','keir.fraser@cl.cam.ac.uk'],
+ [ 'kurt.garloff@novell.com','garloff@suse.de'],
+ [ 'mark.williamson@xensource.com', 'mark.williamson@cl.cam.ac.uk' ],
+ [ 'andrew.warfield@xensource.com', 'andrew.warfield@cl.cam.ac.uk','akw27@cl.cam.ac.uk'],
+ [ 'xin.b.li@intel.com', 'xin.bi.li@intel.com', 'xin..b.li@intel.com' ],
+ [ 'vincent@xensource.com', 'vincent@snarc.org' ],
+ [ 'bthomas@virtualiron.com', 'bjthomas3@gmail.com' ],
+ [ 'herbert@redhat.com', 'herbert@gondor.apana.org.au' ],
+ [ 'gdunlap@xensource.com', 'dunlapg@umich.edu' ],
+ [ 'leendert@us.ibm.com', 'leendert@watson.ibm.com' ],
+ [ 'muli@il.ibm.com', 'mulix@mulix.org' ],
+ [ 'ian.campbell@xensource.com', 'ian.campbell@xesource.com' ],
+ [ 'hollisb@us.ibm.com' ,'hollis@us.ibm.com' ]
+ ]
+
+
+def diffstat ( cset ):
+ files = ins = dels = 0
+ fd = os.popen( '/bin/bash -c "hg export %s | diffstat"' % cset )
+ while 1:
+ line = fd.readline()
+ if len(line) == 0: break
+ #r = re.match('^ (\d+) files changed, (\d+) insertions?\(\+\), (\d+) deletions?\(-\)$', line)
+ r = re.match('^ (\d+) files changed,', line)
+ if r:
+ files = r.group(1)
+ ins = 0
+ dels = 0
+ r = re.match('.*?(\d+) insertion.*', line)
+ if r:
+ ins = r.group(1)
+ r = re.match('.*?(\d+) deletion.*', line)
+ if r:
+ dels = r.group(1)
+ return (int(files),int(ins),int(dels))
+
+def diffstat2 ( cset ):
+ files = ins = dels = 0
+ #fd = os.popen( '/bin/bash -c "hg export %s | diffstat -p1 -f0 | grep \| | grep -v ia64 | grep -v tpmfront | grep -v tpmback | grep -v vmx | grep -v acm | grep -v vtpm | grep -v security | grep -v firmware | grep -v ioemu"' % cset )
+ #fd = os.popen( '/bin/bash -c "hg export %s | diffstat -p1 -f0 | grep \| | grep tools/libxc"' % cset )
+ fd = os.popen( '/bin/bash -c "hg export %s | diffstat -p1 -f0 | grep \|"' % cset )
+
+ while 1:
+ line = fd.readline()
+ if len(line) == 0: break
+ parts1 = string.split(line,'|')
+ parts = string.split(parts1[1])
+ files = files + 1
+ ins = ins + int(parts[1])
+ dels = dels + int(parts[3])
+ print int(files),int(ins),int(dels)
+ return (int(files),int(ins),int(dels))
+
+#diffstat2(5371)
+#diffstat2(5370)
+
+eof=0
+date = 0
+
+date_cset = {}
+date_step = 24*3600*7
+
+while 1:
+ if eof == 1: break
+ line = sys.stdin.readline()
+ if len(line) == 0: eof=1
+
+ r = re.match('^changeset: (\d+):([0-9a-f]+)$', line)
+ if eof or r :
+
+ if cset and parents < 2 and merge == 'n':
+
+ t_real = t_real+1
+
+ if not date_cset.has_key( date ):
+ date_cset[date] = (1,0)
+ else:
+ (a,b) = date_cset[date]
+ date_cset[date] = (a+1,b)
+
+ (files,ins,dels) = diffstat2( cset )
+ m1 = ins
+ m2 = max(ins-dels,0)
+
+ # print stats from the last chageset
+ print 'cset:%s Merge:%s%s %s:%s:%s %d:%d Signed:%s Author:%s' % (cset, merge, parents>1 and 'y' or 'n', files, ins, dels, m1, m2, first_signed_off, first_email )
+
+ author = first_signed_off
+ if author == '':
+ author = 'unknown@unknown.com'
+ if not stats.has_key( author ):
+ stats[author] = (0,0,0,0)
+ (aliases,cins,tm1,tm2) = stats[author]
+ if m1 > 0:
+ stats[author] = (aliases, cins+1, tm1+m1, tm2+m2)
+
+ else:
+ t_merges = t_merges+1
+
+ if not date_cset.has_key( date ):
+ date_cset[date] = (0,1)
+ else:
+ (a,b) = date_cset[date]
+ date_cset[date] = (a,b+1)
+
+ if not eof:
+ cset = r.group(1)
+ cset_long = r.group(2)
+ #print 'XXX:', line, 'YYYY', r.group(1), '***', r.group(2)
+ found_first_email = 0
+ first_email = ''
+ found_first_signed_off = 0
+ first_signed_off = ''
+ merge = 'n'
+ parents = 0
+
+ #print '-----------------', cset
+ continue
+
+ r = re.match('^user: (.*)$', line)
+ if r:
+ user = r.group(1)
+ continue
+
+ r = re.match('^parent: (.*)$', line)
+ if r:
+ parents = parents + 1
+ continue
+
+ r = re.match('^date: (.*)$', line)
+ if r:
+ date = int(time.mktime(rfc822.parsedate(r.group(1))) / date_step)
+ #print 'XXXXX %s ::: %d/%d/%d' % (r.group(1), time.gmtime(date*date_step)[0], time.gmtime(date*date_step)[1], time.gmtime(date*date_step)[2])
+ continue
+
+ line = string.lower(line)
+
+ r = re.match('^merge', line)
+ if r:
+ merge = 'y'
+
+ r = re.match('^manual', line)
+ if r:
+ merge = 'y'
+
+ if not found_first_email:
+ r = re.match('.*?([a-z0-9._+]+@[a-z0-9._+]+).*', line)
+ if r:
+ first_email = string.strip(r.group(1))
+ found_first_email = 1
+ #print r.group(1)
+
+ if not found_first_signed_off:
+ r = re.match('^signed-off-by:.*?([a-z0-9._+]+@[a-z0-9._+]+).*', line)
+ if r:
+ first_signed_off = string.strip(r.group(1))
+ found_first_signed_off = 1
+ #print 'SSS', r.group(1)
+
+
+print '############################################################################'
+print 't_real=%d t_merges=%d' % (t_real, t_merges)
+
+keys = stats.keys()
+keys.sort()
+
+for k in keys:
+ (aliases, cins,m1,m2) = stats[k]
+ print '%s cins=%d m1=%d m2=%d' % (k,cins,m1,m2)
+
+print '############################################################################'
+
+
+for user in alias_list:
+ primary = user[0]
+ (aliases,cins,m1,m2) = (0,0,0,0)
+ if stats.has_key(primary):
+ (aliases,cins,m1,m2) = stats[primary]
+ for a in user[1:]:
+ if stats.has_key(a):
+ (aliases,xcins,xm1,xm2) = stats[a]
+ (aliases,cins,m1,m2) = (aliases+1,cins+xcins,m1+xm1,m2+xm2)
+ del stats[a]
+ if cins > 0:
+ stats[primary] = (aliases,cins,m1,m2)
+
+print '############################################################################'
+
+keys = stats.keys()
+keys.sort()
+
+for k in keys:
+ (aliases,cins,m1,m2) = stats[k]
+ print '% 16s aliases=% 4d cins=% 4d ins=% 6d ext=% 6d' % (k,aliases,cins,m1,m2)
+ stats[k] = (0,cins,m1,m2)
+
+print '############################################################################'
+
+keys = stats.keys()
+for key in keys:
+ r = re.match('.*[@.]([^.]+[.][^.]+)', key)
+ if r:
+ #print '==',r.group(1)
+ primary = r.group(1)
+
+ (aliases,cins,m1,m2) = (0,0,0,0)
+ if stats2.has_key(primary):
+ (aliases,cins,m1,m2) = stats2[primary]
+
+ (xaliases,xcins,xm1,xm2) = stats[key]
+ stats2[primary] = (aliases+xaliases+1,cins+xcins,m1+xm1,m2+xm2)
+ #del stats[key]
+ #print 'del:', key, primary, (xaliases,xcins,xm1,xm2)
+
+print '############################################################################'
+
+print 'total checkins= %d' % (t_real)
+
+keys2 = stats2.keys()
+
+for k in keys2:
+ (aliases,cins,m1,m2) = stats2[k]
+ print '\n%- 16s aliases=% 4d cins=% 4d ins=% 7d ext=% 7d' % (k,aliases,cins,m1,m2)
+ #print '\n%- 16s aliases=% 4d cins=% 4d ins=% 7d' % (k,aliases,cins,m1)
+ for j in keys:
+ r = re.match('.*[@.]([^.]+[.][^.]+)', j)
+ if r:
+ primary = r.group(1)
+ if primary == k:
+ (aliases,cins,m1,m2) = stats[j]
+ print '\t%- 22s cins=% 4d ins=% 7d ext=% 7d' % (string.split(j,'@')[0],cins,m1,m2)
+ #print '\t%- 22s cins=% 4d ins=% 7d' % (string.split(j,'@')[0],cins,m1)
+
+print '############################################################################'
+
+
+for k in keys2:
+ (aliases,cins,m1,m2) = stats2[k]
+ print '%- 16s aliases=% 4d cins=% 4d ins=% 7d ext=% 7d' % (k,aliases,cins,m1,m2)
+ #print '\n%- 16s aliases=% 4d cins=% 4d ins=% 7d' % (k,aliases,cins,m1)
+ for j in keys:
+ r = re.match('.*[@.]([^.]+[.][^.]+)', j)
+ if r:
+ primary = r.group(1)
+ if primary == k:
+ (aliases,cins,m1,m2) = stats[j]
+ pass
+ #print '\t%- 22s cins=% 4d ins=% 7d ext=% 7d' % (string.split(j,'@')[0],cins,m1,m2)
+ #print '\t%- 22s cins=% 4d ins=% 7d' % (string.split(j,'@')[0],cins,m1)
+
+print '############################################################################'
+
+
+print 'checkins over time, real and merges'
+
+keys = date_cset.keys()
+keys.sort()
+
+for k in keys:
+ f = time.gmtime(k*date_step)
+ print '%d/%d/%d %d %d' % (f[0],f[1],f[2], date_cset[k][0], date_cset[k][1])
+
+print
+
+
+
+
+
+
+
+
+
+
+
+