From: Germán Póo-Caamaño Date: Thu, 23 Jun 2011 00:25:17 +0000 (-0700) Subject: Added option to get the stats from numstat instead of diff X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=935be113b3ad3023b102ec00ba0272315a6d27e7;p=people%2Flarsk%2Fxenproject-org-gitdm.git Added option to get the stats from numstat instead of diff The option --numstat of git log gives the statistics of lines added and removed file. Hence, it is not necessary to parser a raw diff. Another benefit, it is a less verbose log to be processed, which helps to process long logs. This also prepares the code for counting the changes per file type. Signed-off-by: Germán Póo-Caamaño --- diff --git a/README b/README index 7226541..bc2d6b3 100644 --- a/README +++ b/README @@ -20,6 +20,10 @@ Run it like this: git log -p -M [details] | gitdm [options] +Alternatively, you can run with: + + git log --numstat -M [details] | gitdm -n [options] + The [details] tell git which changesets are of interest; the [options] can be: @@ -44,6 +48,8 @@ be: -l num Only list the top entries in each report. + -n Use --numstat instead of generated patches to get the statistics. + -o file Write text output to the given file (default is stdout). -r pat Only generate statistics for changes to files whose @@ -68,6 +74,10 @@ looks like: git log -p -M v2.6.19..v2.6.20 | \ gitdm -u -s -a -o results -h results.html +or: + + git log --numstat -M v2.6.19..v2.6.20 | \ + gitdm -u -s -a -n -o results -h results.html CONFIGURATION FILE diff --git a/gitdm b/gitdm index 84b4f5b..67eb77b 100755 --- a/gitdm +++ b/gitdm @@ -37,6 +37,7 @@ DumpDB = 0 CFName = 'gitdm.config' DirName = '' Aggregate = 'month' +Numstat = 0 # # Options: @@ -48,6 +49,7 @@ Aggregate = 'month' # -D Output date statistics # -h hfile HTML output to hfile # -l count Maximum length for output lists +# -n Use numstats instead of generated patch from git log # -o file File for text output # -r pattern Restrict to files matching pattern # -s Ignore author SOB lines @@ -59,9 +61,9 @@ Aggregate = 'month' def ParseOpts (): global MapUnknown, DevReports global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB - global CFName, CSVFile, DirName, Aggregate + global CFName, CSVFile, DirName, Aggregate, Numstat - opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:o:r:suwx:z') + opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:r:suwx:z') for opt in opts: if opt[0] == '-a': AkpmOverLt = 1 @@ -77,6 +79,8 @@ def ParseOpts (): reports.SetHTMLOutput (open (opt[1], 'w')) elif opt[0] == '-l': reports.SetMaxList (int (opt[1])) + elif opt[0] == '-n': + Numstat = 1 elif opt[0] == '-o': reports.SetOutput (open (opt[1], 'w')) elif opt[0] == '-r': @@ -248,20 +252,36 @@ def grabpatch(): sys.stderr.write ('Funky date: %s\n' % p.date) p.date = Today continue - # - # If we have a file filter, check for file lines. - # - if FileFilter: - ignore = ApplyFileFilter (Line, ignore) - # - # OK, maybe it's part of the diff itself. - # - if not ignore: - if Padd.match (Line): - p.added += 1 - continue - if Prem.match (Line): - p.removed += 1 + if not Numstat: + # + # If we have a file filter, check for file lines. + # + if FileFilter: + ignore = ApplyFileFilter (Line, ignore) + # + # OK, maybe it's part of the diff itself. + # + if not ignore: + if Padd.match (Line): + p.added += 1 + continue + if Prem.match (Line): + p.removed += 1 + else: + # Get the statistics (lines added/removes) using numstats + # and without requiring a diff (--numstat instead -p) + m = Pnumstat.match (Line) + if m: + # If we have a file filter, check for file lines. + if FileFilter and not FileFilter.search (m.group(3)): + continue + + try: + p.added += int(m.group(1)) + p.removed += int(m.group(2)) + except ValueError: + # A binary file (image, etc.) is marked with '-' + pass if '@' in p.author.name: GripeAboutAuthorName (p.author.name) diff --git a/patterns.py b/patterns.py index e63efb6..423c521 100644 --- a/patterns.py +++ b/patterns.py @@ -37,3 +37,7 @@ PExtMerge = re.compile(r'^ +Merge( branch .* of)? ([^ ]+:[^ ]+)\n$') PIntMerge = re.compile(r'^ +(Merge|Pull) .* into .*$') # PIntMerge2 = re.compile(r"^ +Merge branch(es)? '.*$") PIntMerge2 = re.compile(r"^ +Merge .*$") +# +# Another way to get the statistics (per file). It implies --numstat +Pnumstat = re.compile('^(\d+|-)\s+(\d+|-)\s+(.*)$') +Prename = re.compile('(.*)\{(.*) => (.*)\}(.*)')