utils/ldp_print/fix_print_html.lib

   1 #
   2 # fix_print_html.lib
   3 #
   4 #   Dan Scott  / <dan.scott (at) acm.org>
   5 #   Ferg       / <gferg (at) sgi.com>
   6 #
   7 #   Used to prepare single-file HTML variant for PDF/Postscript creation
   8 #   thru htmldoc.
   9 #
  10 # log:
  11 #     16Oct2000 - 0.1   - initial entry <gferg (at) sgi.com>
  12 #     03Apr2001 - 0.2   - fix for <preface>
  13 #     05Jul2001 - 0.3   - fix for <tt> and -f
  14 #     12Oct2001 - 0.4   - fix for sections; loop thru both files (body/title)
  15 #     27Nov2001 - 0.5   - fixed bug in determining where doc-index lies
  16 #     18Jan2002 - 0.5.1 - entity fix (822*)
  17 #     02Apr2002 - 0.6   - misc fixes (bibliography/appendix, etc).
  18 #     04Apr2002 - 0.7   - fix for newer DSSSL
  19 #
  20
  21 sub fix_print_html {
  22
  23    my($in,$out,$ttl) = @_;
  24
  25    open(IN_FILE, "< $in") || do {
  26         print "fix_print_html: cannot open $in: $!\n";
  27         return 0;
  28    };
  29
  30    my($buf, $ttl_buf) = '';
  31    my($indx) = -1;
  32    my($is_article) = 1;
  33    while(<IN_FILE>) {
  34
  35          if( $indx == 1 ) {
  36
  37              # ignore everything until we see the chapter or sect
  38              #
  39              if( $_ =~ /CLASS="CHAP/i || $_ =~ /CLASS="PREF/i
  40                  ||
  41                  $_ =~ /CLASS="SECT/i )  {
  42
  43                  $buf .= $_;
  44                  $indx++;
  45
  46              } else {
  47                  next;
  48              }
  49
  50          } elsif( $indx == 0 ) {
  51
  52              # write out the title page file
  53              #
  54              if( $_ =~ /CLASS="TOC"/ ) {
  55
  56                  $ttl_buf .= "></DIV>\n</BODY>\n</HTML>\n";
  57                  $ttl_buf =~ s/<\/H1\n/<\/H1\n><P><BR><BR\n/ms;
  58                  $ttl_buf =~ s/<HR><\/DIV\n><HR>/<HR><\/DIV\n>/ms;
  59                  &fix_html(\$ttl_buf, 1);
  60
  61                  open(TOC_FILE, "> $ttl") || do {
  62                       print "fix_print_html: cannot open $ttl: $!\n";
  63                       close(IN_FILE);
  64                       return 0;
  65                  };
  66                  print TOC_FILE $ttl_buf;
  67                  close(TOC_FILE);
  68                  $ttl_buf = '';
  69                  $indx++;
  70
  71              } else {
  72                 $ttl_buf .= $_;
  73              }
  74
  75          } elsif( $indx < 0 ) {
  76
  77              if( $_ =~ /CLASS="BOOK"/i ) {
  78                  $is_article = 0;
  79              }
  80
  81              # up to this point, both buffers get the line
  82              #
  83              if( $_ =~ /CLASS="TITLEPAGE"/ ) {
  84
  85                  $ttl_buf .= $_ . ">\n<P>\n<BR><BR><BR><BR>\n<\/P\n";
  86                  $indx++;
  87
  88              } else {
  89                  $buf .= $_;
  90                  $ttl_buf .= $_;
  91              }
  92
  93          } else {
  94
  95              $buf .= $_;
  96          }
  97    }
  98    close(IN_FILE);
  99
 100
 101    # fix body file
 102    #
 103    open(OUT_FILE, "> $out") || do {
 104         print "fix_print_html: cannot open $out: $!\n";
 105         return 0;
 106    };
 107
 108    &fix_html(\$buf, $is_article);
 109
 110    print OUT_FILE $buf;
 111    close(OUT_FILE);
 112
 113
 114    return 1;
 115 }
 116
 117
 118 sub fix_html {
 119
 120    my($buf, $is_article) = @_;
 121    my($indx) = -1;
 122
 123
 124    # make corrections and write out the file
 125    #
 126
 127    $$buf =~ s/(\n><LI\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
 128    $$buf =~ s/(\n><LI\n><DIV\nCLASS="FORMALPARA"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
 129    $$buf =~ s/(\n><LI\nSTYLE="[^\"]+"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
 130    if( $is_article == 0 ) {
 131        $$buf =~
 132          s/(\nCLASS="SECT[TION\d]+"\n>)<H1\n(.*?)<\/H1/$1<H0\n$2<\/H0/gims;
 133        $$buf =~
 134          s/(\nCLASS="SECT[TION\d]+"\n><HR>)<H1\n(.*?)<\/H1/$1<H0\n$2<\/H0/gims;
 135    }
 136    $$buf =~ s/<H1(\nCLASS="INDEXDIV"\n)(.*?)<\/H1/<H2$1$2<\/H2/gims;
 137    if( ($indx = rindex($$buf, "<H1\n><A\nNAME=\"DOC-INDEX\"")) > -1 ) {
 138        $$buf = substr($$buf, 0, $indx);
 139        $$buf .= "\n<\/BODY>\n<\/HTML>\n\n";
 140    } elsif( ($indx = rindex($$buf, "<H1\n><A\nNAME=\"doc-index\"")) > -1 ) {
 141        $$buf = substr($$buf, 0, $indx);
 142        $$buf .= "\n<\/BODY>\n<\/HTML>\n\n";
 143    }
 144
 145    $$buf =~ s/\&\#13;//g;
 146    $$buf =~ s/\&\#60;/\&lt;/g;
 147    $$buf =~ s/\&\#62;/\&gt;/g;
 148    $$buf =~ s/\&\#8211;/\-/g;
 149    $$buf =~ s/\&\#8220;/\"/g;
 150    $$buf =~ s/\&\#8221;/\"/g;
 151    $$buf =~ s/WIDTH=\"\d\"//g;
 152    $$buf =~ s/><[\/]*TBODY//g;
 153    $$buf =~ s/><[\/]*THEAD//g;
 154    $$buf =~ s/TYPE=\"1\"\n//gim;
 155
 156    $$buf =~ s/<P\nCLASS="LITERALLAYOUT"(.*?)<\/P/<P CLASS="LITERALLAYOUT"><FONT FACE=\"courier\"$1<\/FONT><\/P/gms;
 157
 158    my($cnt, $j) = 0;
 159
 160    if( $$buf !~ /<H1/ ) {
 161
 162        # for newer docbook styles, set h2 to h1, etc.
 163        #
 164        for($cnt=2; $cnt < 7; $cnt++ ) {
 165            $j = $cnt - 1;
 166            $$buf =~ s/<H${cnt}/<H${j}/g;
 167            $$buf =~ s/<\/H${cnt}/<\/H${j}/g;
 168        }
 169
 170    } elsif( $is_article == 0 ) {
 171
 172        # decrement the headers by 1 and then re-set the
 173        # chapter level only to H1...
 174        #
 175        for($cnt=5; $cnt >= 0; $cnt--) {
 176            $j = $cnt + 1;
 177            if( $cnt == 0 ) {
 178                $j = 2;
 179            }
 180            $$buf =~ s/<H${cnt}/<H${j}/g;
 181            $$buf =~ s/<\/H${cnt}/<\/H${j}/g;
 182        }
 183
 184        my(@l) = split(/\n/, $$buf);
 185        for( $cnt=0; $cnt < (@l + 0); $cnt++ ) {
 186
 187             if( $j == 1 ) {
 188                 if( $l[$cnt] =~ /<DIV/ ) {
 189                     $j = 0;
 190                     next;
 191                 }
 192                 $l[$cnt] =~ s/<H2/<H1/g;
 193                 $l[$cnt] =~ s/<\/H2/<\/H1/g;
 194             }
 195
 196             if( $l[$cnt] =~ /^CLASS=\"CHAP/i
 197                 ||
 198                 $l[$cnt] =~ /^NAME=\"BIBL/i
 199                 ||
 200                 $l[$cnt] =~ /^CLASS=\"APPENDIX/i
 201                 ||
 202                 $l[$cnt] =~ /^CLASS=\"GLOSSARY/i
 203                 ||
 204                 $l[$cnt] =~ /^CLASS=\"PREF/i ) {
 205                 $j = 1;
 206             }
 207        }
 208
 209        $$buf = join("\n", @l);
 210
 211    }
 212    $$buf =~ s/><DIV\nCLASS="\w+"\n//gms;
 213    $$buf =~ s/><\/DIV\n//gms;
 214
 215    $buf =~ s/<SPAN\n[^>]*?>//gms;
 216    $buf =~ s/<\/SPAN\n>//gms;
 217
 218    $$buf =~ s/(><LI\n)><P\n(.*?)<\/P\n>(<\/LI\n)/$1$2$3/gms;
 219
 220    return;
 221 }
 222
 223
 224 # Return true from package include
 225 #
 226 1;
 227