7782db9260d4 "docs/gen-html-index: Extract titles from HTML documents"
requires HTML::TreeBuilder::XPath.
This is sadly not as widely available as I had hoped. Work around
this problem by making the use of this module optional: instead of
`use'ing at the toplevel, we `require' it in the eval. If it's not
present, then the title is simply not extracted and the filename is
used as before, which is tolerable.
Also add some debugging.
Reported-by: Doug Goldstein <cardoe@cardoe.com>
Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
Reviewed-by: Doug Goldstein <cardoe@cardoe.com>
Tested-by: Doug Goldstein <cardoe@cardoe.com>
use Getopt::Long;
use IO::File;
use File::Basename;
-use HTML::TreeBuilder::XPath;
Getopt::Long::Configure('bundling');
our %index;
our $outdir;
+our $debug;
-GetOptions("i=s" => sub { read_index(@_);} )
+GetOptions("i=s" => sub { read_index(@_);},
+ "D" => \$debug)
or die;
($outdir,@docs) = @ARGV;
my $from_html;
eval {
+ require HTML::TreeBuilder::XPath;
my $tree = new HTML::TreeBuilder::XPath;
my $f = "$outdir/$l.html";
open F, '<', $f or die "$l $f $!";
close F;
$from_html = $tree->findvalue("/html/head/title");
};
+ print "$l: get title: $@" if $@ && $debug;
return $from_html if $from_html;
return basename($l);