#!/usr/bin/env perl
# $Id: tl-check-files-by-format 52673 2019-11-07 18:38:33Z karl $
# Copyright Manuel P\'egouri\'e-Gonnard, 2010-2016. WTFPL v2.
# Check that files in various formats are somewhat valid.
#
# This used to be run from cron, but no longer is, because it takes many
# hours, and there are too many false positives. Inserting all the
# exclusions got too annoying. Give up. --karl, 2aug19.
#
# The following formats are checked currently: pfb, pdf, tfm, vf, otf.
#
# For each format, there is one routine &check_<format> and an exclusion
# 'list' (hash reference) $exclude_<format>: the keys are either a
# directory name if they have a trailing slash, or a file name. If a key
# named '*base*' is present, then the paths are relative to this base,
# otherwise to $tlroot. The values of the keys may be free or have a
# special meaning depending on the associated check_<format> routine.
#
# Formats that might be added:
# - afm with (afm2pl?)

BEGIN {
#  $^W = 1;
  ($mydir = $0) =~ s,/[^/]*$,,;
  $tlroot = "$mydir/../..";
  unshift (@INC, "$tlroot/tlpkg");
}

use File::Basename;
use File::Temp;
use Pod::Usage;
use Getopt::Long;

use TeXLive::TLPDB;


# Most of these breakages need to be reported upstream, or documented
# that they are unavoidable.  Add comments if you pursue any of them.

# value has no meaning here
my $exclude_otf = { '*base*' => 'texmf-dist/fonts/opentype',
  'texmf-dist/doc/fonts/cm-unicode/Fontmap.CMU.otf' => 1,
};

# 1 means pdfinfo issues errors but still exits successfully
# 2 means pdfinfo exits non-zero
# -1 means skip completely.
my $exclude_pdf = {
     # 14nov11 author cannot fix:
  'texmf-dist/doc/fonts/frcursive/frcursive.pdf' => 1,
     # 17apr14 unknown:
  'texmf-dist/doc/generic/pst-perspective/' => 1,
     # 2jul16+20dec15 "couldn't read xref table", but seems basically ok:
  'texmf-dist/doc/latex/bookcover/figures/ekflogo.pdf' => 1,
  'texmf-dist/doc/latex/dvdcoll/dcexample.pdf' => 1,   # acroform but ok
  'texmf-dist/doc/latex/visualfaq/visualFAQ.pdf' => 1, # acroform but ok
  'texmf-dist/doc/latex/ksp-thesis/ksp-thesis.pdf' => 1,
     # 4aug16 pdfinfo from xpdf 3.03 crashes (reported to derekn):
  'texmf-dist/doc/latex/ocgx/demo-ocgx.pdf' => -1,
     # not a PDF file:
  'texmf-dist/dvips/tetex/config.pdf' => -1,
};

# the value has no meaning here
my $exclude_pfb = { '*base*' => 'texmf-dist/fonts/type1',
  'adobe/courier/' => 1,
  'adobe/sourcecodepro/' => 1, # SourceCodePro-{Regular,BoldIt}.pfb, 10nov12 contacted
  'arabi/arabeyes/' => 1,
  'arabi/farsiweb/' => 1,
  'arkandis/adfsymbols/' => 1,
  'arkandis/libris/' => 1,
  'arkandis/venturis/' => 1,
  'arphic/bkaiu/' => 1,
  'arphic/bsmiu/' => 1,
  'arphic/gbsnu/' => 1,
  'arphic/gkaiu/' => 1,
  'gust/poltawski/' => 1,
  'nowacki/iwona/' => 1,
  'nowacki/kurier/' => 1,
  'public/allrunes/' => 1,
  'public/amsfonts/cm/' => 1,
  'public/amsfonts/cmextra/' => 1,
  'public/arev/' => 1,
  'public/ascii-font/' => 1,
  'public/aspectratio/' => 1, # 11feb12 reported to author
  'public/augie/' => 1,
  'public/aurical/' => 1,
  'public/bbold-type1/' => 1, # old y&y fonts, won't be changing
  'public/belleek/' => 1,
  'public/bera/' => 1,
  'public/brushscr/' => 1,
  'public/burmese/' => 1,
  'public/carolmin-ps/' => 1,
  'public/chemarrow/' => 1,
  'public/cjhebrew/' => 1,
  'public/cm-super/' => 1,
  'public/cm-unicode/cmunobi.pfb' => 1,
  'public/cmcyr/' => 1,
  'public/countriesofeurope/' => 1, # 22apr12 not reported
  'public/cs/' => 1,
  'public/doublestroke/' => 1,
  'public/ebgaramond/' => 1,	# 9jul14 bug in fontforge/t1lint, not reported
  'public/epiolmec/' => 1,
  'public/esstix/' => 1,	# 12may11 author fixed some, others will remain
  'public/ethiop-t1/' => 1,
  'public/eurosym/' => 1,
  'public/fbb/' => 1,		# 23aug13 bluevalues, not fixable
  'public/fira/' => 1,		# 2015 author unable/unwilling
  'public/foekfont/' => 1,
  'public/fonetika/' => 1,
  'public/fontawesome/' => 1,	# 2015 author unable/unwilling
  'public/fourier/' => 1,
  'public/gfsartemisia/' => 1,
  'public/gfsbaskerville/' => 1,
  'public/gfsbodoni/' => 1,
  'public/gfscomplutum/' => 1,
  'public/gfsdidot/' => 1,
  'public/gfsneohellenic/' => 1,
  'public/gfssolomos/' => 1,
  'public/hacm/' => 1,
  'public/initials/' => 1,
  'public/ipaex-type1/' => 1,	# 20may13 too many to bother reporting
  'public/itrans/' => 1,
  'public/kerkis/' => 1,
  'public/kpfonts/' => 1,
  'public/libertine/' => 1,	# 5jan13 maintainer declines to fix triviality
  'public/libertinust1math/'=>1,# 7apr16 hint triviality not fixed
  'public/linearA/' => 1,
  'public/lm/' => 1,
  'public/lxfonts/' => 1,
  'public/marvosym/' => 1,
  'public/mathabx-type1/' => 1, # 14jan11 wrote bnb/preining --karl
  'public/mathdesign/mdpus/' => 1,# 3jan16 author informed long ago, no release
  'public/mathpazo/' => 1,
  'public/newpx/' => 1,		# 21aug16 bluezones not worth fixing
  'public/newtx/' => 1,		# 21aug16 bluezones not worth fixing
  'public/ocherokee/' => 1,
  'public/oinuit/' => 1,
  'public/old-arrows/' => 1,	# 2015 author unable/unwilling
  'public/omega/' => 1,
  'public/phaistos/' => 1,
  'public/pl/' => 1,
  'public/playfair/' => 1,	# 15mar14 known hinting issues
  'public/pxfonts/' => 1,
  'public/rsfs/' => 1,
  'public/starfont/' => 1,	# 4oct10 reported to author 
  'public/staves/' => 1,
  'public/stmaryrd/' => 1,	# blue values not defined, not maintained
  'public/tabvar/' => 1,
  'public/tex-gyre/' => 1,
  'public/txfonts/' => 1,
  'public/txfontsb/' => 1,
  'public/wasy2-ps/' => 1,	# too old, will not be updated
  'public/xypic/' => 1,
  'public/yhmath/' => 1,
  'texmf-dist/doc/fonts/cm-unicode/Fontmap.CMU.pfb' => 1,
  'uhc/umj/' => 1,
  'urw/avantgar/' => 1,
  'urw/bookman/' => 1,
  'urw/courier/' => 1,
  'urw/helvetic/' => 1,
  'urw/ncntrsbk/' => 1,
  'urw/palatino/' => 1,
  'urw/symbol/' => 1,
  'urw/times/' => 1,
  'urw/zapfding/' => 1,
  'vntex/arevvn/' => 1,
  'vntex/comicsansvn/' => 1,
  'vntex/txttvn/' => 1,
  'vntex/urwvn/' => 1,
  'vntex/vnr/' => 1,
  'wadalab/dgj/' => 1,
  'wadalab/dmj/' => 1,
  'wadalab/mcj/' => 1,
  'wadalab/mrj/' => 1,
  'tlpkg/texworks/share/fonts/' => 1, # these come from gs
};


# the value has no meaning here
my $exclude_tfm = { '*base*' => 'texmf-dist/fonts/tfm',
  'jknappen/ec/' => 1,
  'jknappen/fc/' => 1,
  'public/arev/' => 1,
  'public/chess/' => 1,		# original has extra bytes, not worth fixing
  'public/cmcyr/' => 1,
  'public/dozenal/' => 1,	# wrote author 31oct12
  'public/gfsbodoni/' => 1,	# wrote author 12dec12
  'public/japanese-otf/' => 1,	# japanese tfm's
  'public/malayalam/' => 1,
  'public/mathdesign/mdici/' => 1,# 3jan16 author informed long ago, no release
  'public/mathdesign/mdpgd/' => 1,
  'public/mathdesign/mdpus/' => 1,
  'public/wnri/' => 1,
  'public/wsuipa/' => 1,
};


# the value has no meaning here
my $exclude_vf = { '*base*' => 'texmf-dist/fonts/vf',
  'public/ae/' => 1,
  'public/bgreek/' => 1,
  'public/eco/' => 1,
  'public/epigrafica/' => 1,
  'public/fonts-tlwg/' => 1,	# reported to author, early 2012
  'public/gfsartemisia/' => 1,
  'public/gfscomplutum/' => 1,
  'public/gfsdidot/' => 1,
  'public/gfsneohellenic/' => 1,
  'public/gfsporson/' => 1,
  'public/gfssolomos/' => 1,
  'public/hfoldsty/' => 1,
  'public/kerkis/' => 1,
  'public/mathdesign/mdbch/' => 1, #3jan16 author informed long ago, no release
  'public/mathdesign/mdbch/' => 1, #3jan16 author informed long ago, no release
  'public/mathdesign/mdici/' => 1,
  'public/mathdesign/mdpgd/' => 1,
  'public/mathdesign/mdpus/' => 1,
  'public/mathdesign/mdput/' => 1,
  'public/mathdesign/mdugm/' => 1,
  'public/sansmathfonts/' => 2,	# 20may13 ok with cmssbx12.tfm from sauter
  'public/txfontsb/' => 1,
  'public/uppunctlm/' => 1,
  'public/zefonts/' => 1,
  'vntex/comicsansvn/' => 1,
};


my $tmpdir = File::Temp::tempdir(CLEANUP => 1);

my $opt_all = 0;
my $opt_help = 0;

TeXLive::TLUtils::process_logging_options ();
GetOptions ("all" => \$opt_all,
            "help|?" => \$opt_help) or pod2usage (2);
pod2usage ("-exitstatus" => 0, "-verbose" => 2) if $opt_help;


exit(&main());


sub main {
  my @files = get_file_list();

  for my $file (grep { /\.otf$/ } @files) {
    check_otf($file);}

  for my $file (grep { /\.pdf$/ } @files) {
    check_pdf($file); }

  for my $file (grep { /\.pfb$/ } @files) {
    check_pfb($file); }

  for my $file (grep { /\.tfm$/ } @files) {
    check_tfm($file); }

  for my $file (grep { /\.vf$/ } @files) {
    check_vf($file); }

  return 0;
}

# get the list of files in the database of a TL installation
sub get_file_list {
  my $tlpdb = TeXLive::TLPDB->new(root => $tlroot);

  my @files = ();
  for my $tlpname ($tlpdb->list_packages) {
    my $tlp = $tlpdb->get_package($tlpname);
    push(@files, $tlp->docfiles, $tlp->runfiles, $tlp->srcfiles);
  }

  # [u]ptex font files use their own formats; don't bother checking,
  # or adding lots of individual entries to the exclude lists.
  @files = grep ($_ !~ m![-/]u?ptex(-fonts)?/!, @files);

  # It takes a long time (about an hour) to check every file.  And there
  # is no value in checking unchanged files every day.  So we filter the
  # file list (unless --all is given) by mtime ... unless this happens
  # to be the first of the month, when we'll check everything, just to
  # be reminded of anything that may have crept in.
  # 
  if (! ($opt_all || `date +%d` == 1)) {
    my @filtered = ();
    for my $f (@files) {
      push (@filtered, $f) if (-M "$tlroot/$f" < 4);
    }
    @files = @filtered;
  }
  return @files;
}


# return the value in exclude list associated to a file, or undef
sub exclude_value {
  my ($exclude, $file) = @_;
  my $base = $exclude->{'*base*'};
  $file =~ s!^$base/!! if $base;
  (my $filedir = $file) =~ s![^/]*$!!;
  return $exclude->{$file} || $exclude->{$filedir};
}


# the checking routines for each file type.

sub check_pdf {
  my ($file) = @_;
  my $excl_val = exclude_value($exclude_pdf, $file) || 0;
  return if $excl_val < 0;
  #
  my $errfile = "$tmpdir/pdferr";
  unlink($errfile);
  my $bad_exit = system("pdfinfo $tlroot/$file >/dev/null 2>$errfile");
  my $badness = $bad_exit ? 2 : -s $errfile ? 1 : 0;
  return if $badness <= $excl_val;
  print "Bad PDF: $file\n" if $badness == 2;
  print "Damaged PDF: $file\n" if $badness == 1;
}

sub check_pfb {
  my ($file) = @_;
  return if defined exclude_value($exclude_pfb, $file);
  my $errfile = "$tmpdir/pfberr";
  unlink($errfile);
  my $bad = system("t1lint -q $tlroot/$file");
  print "Bad pfb: $file\n" if $bad;
}

sub check_tfm {
  my ($file) = @_;
  return if defined exclude_value($exclude_tfm, $file);
  my $outfile = "$tmpdir/tfmout.pl";
  my $errfile = "$tmpdir/tfmerr";
  unlink($errfile);
  system("tftopl $tlroot/$file $outfile 2>$errfile");
  print "Bad tfm: $file\n" if -s $errfile;
}

sub check_vf {
  my ($file) = @_;
  return if defined exclude_value($exclude_vf, $file);
  (my $tfmfile = $file) =~ s!/vf/!/tfm/!;
  $tfmfile =~ s/\.vf$/.tfm/;
  return if defined exclude_value($exclude_tfm, $tfmfile);
  my $outfile = "$tmpdir/vfout.vp";
  my $errfile = "$tmpdir/vferr";
  unlink($errfile);
  system("vftovp $tlroot/$file $tlroot/$tfmfile $outfile 2>$errfile");
  print "Bad vf: $file\n" if -s $errfile;
}

sub check_otf {
  my ($file) = @_;
  return if defined exclude_value($exclude_otf, $file);
  my $bad = system("otfinfo --info $tlroot/$file >/dev/null 2>&1");
  print "Bad otf: $file\n" if $bad;
}

# vim: sw=2 expandtab
