--- /dev/null
+
+######################################################################
+ ldp_print - print tool/script for DocBook SGML/XML documents
+######################################################################
+
+ Copyright (C) 2002-2000 - Greg Ferguson (gferg@metalab.unc.edu)
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+######################################################################
+
+This process/script is used in the production environment for the
+LDP. It relies on the HTMLDOC software package (GPL'ed) which can be
+obtained from the Easy Software Products (c) web site:
+
+ http://www.easysw.com/htmldoc/
+
+This process creates a PDF variant from the single-file HTML
+representation of a DocBook SGML (or XML) instance. The simple
+wrapper script (ldp_print) assumes that the file was created using
+{open}jade in a manner similar to:
+
+ jade -t sgml -i html -V nochunks -d $style $fname > $fname.html
+
+Give the script the filename as an argument. It will then parse the
+file into 'title.html' and 'body.html' and send each to htmldoc (as
+the corresponding title page and body of the document).
+
+
+CAVEATS
+=======
+
+o Assumes perl is in /usr/bin; adjust if necessary
+
+o You may need to specify where the htmldoc executable resides.
+ The script assumes it's within your $PATH.
+
+o If you want Postscript as an output variant, uncomment the
+ appropriate lines (see below).
+
+o Relies on output from a DocBook instance created via DSSSL/{open}jade!
+
+o Cleans up (removes) the intermediate files it creates (but not the
+ PDF or Postscript files, obviously!)
+
+o Works silently; PDF (PostScript) will be created in the same directory
+ as was specified for the input (single-file HTML) file.
+
+o Provided without warranty or support!
+
+o I ran into a problem with htmldoc v1.8.8 which required a source
+ code change (I was getting a core dump from the htmldoc process).
+ Here is the change required:
+
+ htmldoc/ps-pdf.cxx :
+ 3662,3665d3661
+ < /* gjf = 11Oct2000 */
+ < if( temprow == NULL )
+ < break;
+ <
+
+ UPDATE (2001-10-10): It appears that later versions of htmldoc
+ have this problem corrected. The patch is not required.
+
+====
+Greg Ferguson / gferg (at) metalab.unc.edu
+11 Jan 2000
+
--- /dev/null
+#
+# fix_print_html.lib
+#
+# Dan Scott / <dan.scott (at) acm.org>
+# Ferg / <gferg (at) sgi.com>
+#
+# Used to prepare single-file HTML variant for PDF/Postscript creation
+# thru htmldoc.
+#
+# log:
+# 16Oct2000 - 0.1 - initial entry <gferg (at) sgi.com>
+# 03Apr2001 - 0.2 - fix for <preface>
+# 05Jul2001 - 0.3 - fix for <tt> and -f
+# 12Oct2001 - 0.4 - fix for sections; loop thru both files (body/title)
+# 27Nov2001 - 0.5 - fixed bug in determining where doc-index lies
+# 18Jan2002 - 0.5.1 - entity fix (822*)
+# 02Apr2002 - 0.6 - misc fixes (bibliography/appendix, etc).
+# 04Apr2002 - 0.7 - fix for newer DSSSL
+#
+
+sub fix_print_html {
+
+ my($in,$out,$ttl) = @_;
+
+ open(IN_FILE, "< $in") || do {
+ print "fix_print_html: cannot open $in: $!\n";
+ return 0;
+ };
+
+ my($buf, $ttl_buf) = '';
+ my($indx) = -1;
+ my($is_article) = 1;
+ while(<IN_FILE>) {
+
+ if( $indx == 1 ) {
+
+ # ignore everything until we see the chapter or sect
+ #
+ if( $_ =~ /CLASS="CHAP/i || $_ =~ /CLASS="PREF/i
+ ||
+ $_ =~ /CLASS="SECT/i ) {
+
+ $buf .= $_;
+ $indx++;
+
+ } else {
+ next;
+ }
+
+ } elsif( $indx == 0 ) {
+
+ # write out the title page file
+ #
+ if( $_ =~ /CLASS="TOC"/ ) {
+
+ $ttl_buf .= "></DIV>\n</BODY>\n</HTML>\n";
+ $ttl_buf =~ s/<\/H1\n/<\/H1\n><P><BR><BR\n/ms;
+ $ttl_buf =~ s/<HR><\/DIV\n><HR>/<HR><\/DIV\n>/ms;
+ &fix_html(\$ttl_buf, 1);
+
+ open(TOC_FILE, "> $ttl") || do {
+ print "fix_print_html: cannot open $ttl: $!\n";
+ close(IN_FILE);
+ return 0;
+ };
+ print TOC_FILE $ttl_buf;
+ close(TOC_FILE);
+ $ttl_buf = '';
+ $indx++;
+
+ } else {
+ $ttl_buf .= $_;
+ }
+
+ } elsif( $indx < 0 ) {
+
+ if( $_ =~ /CLASS="BOOK"/i ) {
+ $is_article = 0;
+ }
+
+ # up to this point, both buffers get the line
+ #
+ if( $_ =~ /CLASS="TITLEPAGE"/ ) {
+
+ $ttl_buf .= $_ . ">\n<P>\n<BR><BR><BR><BR>\n<\/P\n";
+ $indx++;
+
+ } else {
+ $buf .= $_;
+ $ttl_buf .= $_;
+ }
+
+ } else {
+
+ $buf .= $_;
+ }
+ }
+ close(IN_FILE);
+
+
+ # fix body file
+ #
+ open(OUT_FILE, "> $out") || do {
+ print "fix_print_html: cannot open $out: $!\n";
+ return 0;
+ };
+
+ &fix_html(\$buf, $is_article);
+
+ print OUT_FILE $buf;
+ close(OUT_FILE);
+
+
+ return 1;
+}
+
+
+sub fix_html {
+
+ my($buf, $is_article) = @_;
+ my($indx) = -1;
+
+
+ # make corrections and write out the file
+ #
+
+ $$buf =~ s/(\n><LI\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
+ $$buf =~ s/(\n><LI\n><DIV\nCLASS="FORMALPARA"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
+ $$buf =~ s/(\n><LI\nSTYLE="[^\"]+"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
+ if( $is_article == 0 ) {
+ $$buf =~
+ s/(\nCLASS="SECT[TION\d]+"\n>)<H1\n(.*?)<\/H1/$1<H0\n$2<\/H0/gims;
+ $$buf =~
+ s/(\nCLASS="SECT[TION\d]+"\n><HR>)<H1\n(.*?)<\/H1/$1<H0\n$2<\/H0/gims;
+ }
+ $$buf =~ s/<H1(\nCLASS="INDEXDIV"\n)(.*?)<\/H1/<H2$1$2<\/H2/gims;
+ if( ($indx = rindex($$buf, "<H1\n><A\nNAME=\"DOC-INDEX\"")) > -1 ) {
+ $$buf = substr($$buf, 0, $indx);
+ $$buf .= "\n<\/BODY>\n<\/HTML>\n\n";
+ } elsif( ($indx = rindex($$buf, "<H1\n><A\nNAME=\"doc-index\"")) > -1 ) {
+ $$buf = substr($$buf, 0, $indx);
+ $$buf .= "\n<\/BODY>\n<\/HTML>\n\n";
+ }
+
+ $$buf =~ s/\&\#13;//g;
+ $$buf =~ s/\&\#60;/\</g;
+ $$buf =~ s/\&\#62;/\>/g;
+ $$buf =~ s/\&\#8211;/\-/g;
+ $$buf =~ s/\&\#8220;/\"/g;
+ $$buf =~ s/\&\#8221;/\"/g;
+ $$buf =~ s/WIDTH=\"\d\"//g;
+ $$buf =~ s/><[\/]*TBODY//g;
+ $$buf =~ s/><[\/]*THEAD//g;
+ $$buf =~ s/TYPE=\"1\"\n//gim;
+
+ $$buf =~ s/<P\nCLASS="LITERALLAYOUT"(.*?)<\/P/<P CLASS="LITERALLAYOUT"><FONT FACE=\"courier\"$1<\/FONT><\/P/gms;
+
+ my($cnt, $j) = 0;
+
+ if( $$buf !~ /<H1/ ) {
+
+ # for newer docbook styles, set h2 to h1, etc.
+ #
+ for($cnt=2; $cnt < 7; $cnt++ ) {
+ $j = $cnt - 1;
+ $$buf =~ s/<H${cnt}/<H${j}/g;
+ $$buf =~ s/<\/H${cnt}/<\/H${j}/g;
+ }
+
+ } elsif( $is_article == 0 ) {
+
+ # decrement the headers by 1 and then re-set the
+ # chapter level only to H1...
+ #
+ for($cnt=5; $cnt >= 0; $cnt--) {
+ $j = $cnt + 1;
+ if( $cnt == 0 ) {
+ $j = 2;
+ }
+ $$buf =~ s/<H${cnt}/<H${j}/g;
+ $$buf =~ s/<\/H${cnt}/<\/H${j}/g;
+ }
+
+ my(@l) = split(/\n/, $$buf);
+ for( $cnt=0; $cnt < (@l + 0); $cnt++ ) {
+
+ if( $j == 1 ) {
+ if( $l[$cnt] =~ /<DIV/ ) {
+ $j = 0;
+ next;
+ }
+ $l[$cnt] =~ s/<H2/<H1/g;
+ $l[$cnt] =~ s/<\/H2/<\/H1/g;
+ }
+
+ if( $l[$cnt] =~ /^CLASS=\"CHAP/i
+ ||
+ $l[$cnt] =~ /^NAME=\"BIBL/i
+ ||
+ $l[$cnt] =~ /^CLASS=\"APPENDIX/i
+ ||
+ $l[$cnt] =~ /^CLASS=\"GLOSSARY/i
+ ||
+ $l[$cnt] =~ /^CLASS=\"PREF/i ) {
+ $j = 1;
+ }
+ }
+
+ $$buf = join("\n", @l);
+
+ }
+ $$buf =~ s/><DIV\nCLASS="\w+"\n//gms;
+ $$buf =~ s/><\/DIV\n//gms;
+
+ $buf =~ s/<SPAN\n[^>]*?>//gms;
+ $buf =~ s/<\/SPAN\n>//gms;
+
+ $$buf =~ s/(><LI\n)><P\n(.*?)<\/P\n>(<\/LI\n)/$1$2$3/gms;
+
+ return;
+}
+
+
+# Return true from package include
+#
+1;
+
--- /dev/null
+#!/usr/bin/perl -w
+#
+# usage: ldp_print <single_file.html>
+#
+# Creates a PDF variant of a single-file HTML representation of a
+# DocBook SGML (or XML) instance. This simple wrapper assumes that
+# the file was created using {open}jade in a manner similar to:
+#
+# jade -t sgml -i html -V nochunks -d $style $fname > $fname.html
+#
+# Give this script the filename as an argument. It will then parse
+# the file into 'title.html' and 'body.html' and send each to
+# htmldoc (as the corresponding title page and body of the document).
+#
+#
+# CAVEATS:
+#
+# Assumes perl is in /usr/bin; adjust if necessary
+#
+# You may need to specify where the htmldoc executable resides.
+# The script assumes it's within your $PATH.
+#
+# If you want Postscript as an output variant, uncomment the
+# appropriate lines (see below).
+#
+# Relies on output from a DocBook instance created via DSSSL/{open}jade!
+#
+# Cleans up (removes) the intermediate files it creates (but not the
+# PDF or Postscript files, obviously!)
+#
+# Works silently; PDF (PostScript) will be created in the same directory
+# as was specified for the input (single-file HTML) file.
+#
+# Provided without warranty or support!
+#
+# gferg@sgi.com / Ferg (used as part of the LDP production env)
+#
+
+use strict;
+push(@INC, "./");
+require 'fix_print_html.lib';
+
+if( $ARGV[0] eq '' || !(-r $ARGV[0]) ) {
+ die "\nusage: ldp_print <single_file.html>\n\n";
+}
+
+my($fname_wo_ext) = $ARGV[0];
+$fname_wo_ext =~ s/\.[\w]+$//;
+
+
+# create new files from single HTML file to use for print
+#
+&fix_print_html($ARGV[0], 'body.html', 'title.html');
+
+my($cmd) = "htmldoc --size universal -t pdf -f ${fname_wo_ext}.pdf " .
+ "--firstpage p1 --titlefile title.html body.html --footer c.1";
+
+# For postscript output; append onto the above cmd string:
+#
+# "; htmldoc --size universal -t ps -f ${fname_wo_ext}.ps " .
+# "--firstpage p1 --titlefile title.html body.html";
+#
+system($cmd);
+die "\nldp_print: could not create ${fname_wo_ext}.pdf ($!)\n" if ($?);
+
+# cleanup
+#
+system("rm -f body.html title.html");
+
+exit(0);
+