#  expireIMGs.pl
#
#  Process a list of HTML files, removing IMG tags that have expired.
#  We know this is so if the tag has an EXPIRES attribute whose ISO
#  date value has passed.
#
#  We try to do it intelligently, and only rewrite the file if an
#  expired image tag is found.
#
#  Example:
#
#    <IMG SRC="foo.gif" EXPIRES="2000-07-18">
#
#  will be removed on or after 18 July 2000.
#
#  The EXPIRES attribute may appear anywhere in the IMG tag. The
#  quotes around the date are optional.
#
#  If an <IMG> tag is surrounded by <A></A> tags, that anchor tag
#  will be removed as well.
#
#  Input: a list of files to process.
#
#  Mark L. Irons
#  18 July 2000
#
#--------------------------------------------------------------------
#  Get current date and convert it to ISO format
#
($s,$m,$h,$curday,$curmon,$curyear,$w,$y,$d) = localtime;
$curyear=$curyear+1900;                       # living in 21st century
$curmon=$curmon+1;                            # Jan is 1st month, not 0th
if ($curmon < 10) { $curmon = "0".$curmon; }  # pad month to two digits
if ($curday < 10) { $curday = "0".$curday; }  # pad day to two digits
$curdate = $curyear."-".$curmon."-".$curday;  # put it all together
#
#--------------------------------------------------------------------
#  Set patterns to search on:
#
#             <IMG ... EXPIRES="yyyy-mm-dd" ... >
#   or <A ...><IMG ... EXPIRES="yyyy-mm-dd" ... ></A>
#
#  $1 becomes an ISO date on match
#
$imgpattern = '<A [^>]*><IMG[^<]* EXPIRES="?(\d\d\d\d-\d\d-\d\d)"?[^>]*></A>|<IMG[^<]* EXPIRES="?(\d\d\d\d-\d\d-\d\d)"?[^>]*>';
#
#--------------------------------------------------------------------
#  Loop over files, processing each.
#
while (<>) {
  $filename = $_;
  if (!open(HTMLFILE,$filename)) {
    chop $filename;
    warn "Can't open $filename, skipping: $!\n";
    next;
  }
  $modifiedFlag = 0;                    # haven't changed file
  $modifiedFile = "";                   # text of modified file
  while (<HTMLFILE>) {
    $outputbuffer = "";
    $originalLine = $_;                 # save original line
    if (/$imgpattern/) {                # if there are expiring tags
      do {                              # process tag
        $outputbuffer .= $`;            # save beginning of line
        if ($1 gt $curdate) {           # check whether tag has expired
          $outputbuffer .= $&;          # if not, keep tag
        }                               # else lose it (append nothing)
        else {
          $modifiedFlag = 1;            # set modified file flag
                                        # now close up doubled spaces
          if ( (length($outputbuffer) > 0) && (length($') > 0) &&
                (substr($',0,1) eq " ") &&
                (substr($outputbuffer,length($outputbuffer)-1,1) eq " ") ) {
            chop($outputbuffer);
          }
        }
        $_ = $';                        # process remainder of line
      }
      while (/$imgpattern/);            # continue while there are more tags
      $outputbuffer .= $';              # no more matches - save rest of line
    }
    else {
      $outputbuffer = $_;
    }
    # do not preserve lines that have been made blank
    if ((length($originalLine) == 1) || (length($outputbuffer) > 1)) {
      $modifiedFile .= $outputbuffer;
    }
  } # processing single file
  close(HTMLFILE);                      # close the input file
  if ($modifiedFlag) {
    if (open(HTMLFILE,"> ".$filename)) {
      print HTMLFILE $modifiedFile;     # write the modified contents
      close HTMLFILE;                   # and close it of course
      print "Expired tags from $filename";
    }
  }
} # all files
