From: hal9 Date: Mon, 12 Aug 2002 05:04:23 +0000 (+0000) Subject: Adding ldp_print stuff for pdf generation of docs. X-Git-Tag: v_3_1_archive_branchpoint~212 X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=commitdiff_plain;h=520cd9953f7ad41fde7ad7e837276f547463e5d4 Adding ldp_print stuff for pdf generation of docs. --- diff --git a/utils/ldp_print/README b/utils/ldp_print/README new file mode 100644 index 00000000..833ae0bd --- /dev/null +++ b/utils/ldp_print/README @@ -0,0 +1,80 @@ + +###################################################################### + ldp_print - print tool/script for DocBook SGML/XML documents +###################################################################### + + Copyright (C) 2002-2000 - Greg Ferguson (gferg@metalab.unc.edu) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +###################################################################### + +This process/script is used in the production environment for the +LDP. It relies on the HTMLDOC software package (GPL'ed) which can be +obtained from the Easy Software Products (c) web site: + + http://www.easysw.com/htmldoc/ + +This process creates a PDF variant from the single-file HTML +representation of a DocBook SGML (or XML) instance. The simple +wrapper script (ldp_print) assumes that the file was created using +{open}jade in a manner similar to: + + jade -t sgml -i html -V nochunks -d $style $fname > $fname.html + +Give the script the filename as an argument. It will then parse the +file into 'title.html' and 'body.html' and send each to htmldoc (as +the corresponding title page and body of the document). + + +CAVEATS +======= + +o Assumes perl is in /usr/bin; adjust if necessary + +o You may need to specify where the htmldoc executable resides. + The script assumes it's within your $PATH. + +o If you want Postscript as an output variant, uncomment the + appropriate lines (see below). + +o Relies on output from a DocBook instance created via DSSSL/{open}jade! + +o Cleans up (removes) the intermediate files it creates (but not the + PDF or Postscript files, obviously!) + +o Works silently; PDF (PostScript) will be created in the same directory + as was specified for the input (single-file HTML) file. + +o Provided without warranty or support! + +o I ran into a problem with htmldoc v1.8.8 which required a source + code change (I was getting a core dump from the htmldoc process). + Here is the change required: + + htmldoc/ps-pdf.cxx : + 3662,3665d3661 + < /* gjf = 11Oct2000 */ + < if( temprow == NULL ) + < break; + < + + UPDATE (2001-10-10): It appears that later versions of htmldoc + have this problem corrected. The patch is not required. + +==== +Greg Ferguson / gferg (at) metalab.unc.edu +11 Jan 2000 + diff --git a/utils/ldp_print/VERSION b/utils/ldp_print/VERSION new file mode 100644 index 00000000..8f00eb7d --- /dev/null +++ b/utils/ldp_print/VERSION @@ -0,0 +1 @@ +0.7.0, 2002-04-04 diff --git a/utils/ldp_print/fix_print_html.lib b/utils/ldp_print/fix_print_html.lib new file mode 100644 index 00000000..fdb9ab44 --- /dev/null +++ b/utils/ldp_print/fix_print_html.lib @@ -0,0 +1,227 @@ +# +# fix_print_html.lib +# +# Dan Scott / +# Ferg / +# +# Used to prepare single-file HTML variant for PDF/Postscript creation +# thru htmldoc. +# +# log: +# 16Oct2000 - 0.1 - initial entry +# 03Apr2001 - 0.2 - fix for +# 05Jul2001 - 0.3 - fix for and -f +# 12Oct2001 - 0.4 - fix for sections; loop thru both files (body/title) +# 27Nov2001 - 0.5 - fixed bug in determining where doc-index lies +# 18Jan2002 - 0.5.1 - entity fix (822*) +# 02Apr2002 - 0.6 - misc fixes (bibliography/appendix, etc). +# 04Apr2002 - 0.7 - fix for newer DSSSL +# + +sub fix_print_html { + + my($in,$out,$ttl) = @_; + + open(IN_FILE, "< $in") || do { + print "fix_print_html: cannot open $in: $!\n"; + return 0; + }; + + my($buf, $ttl_buf) = ''; + my($indx) = -1; + my($is_article) = 1; + while() { + + if( $indx == 1 ) { + + # ignore everything until we see the chapter or sect + # + if( $_ =~ /CLASS="CHAP/i || $_ =~ /CLASS="PREF/i + || + $_ =~ /CLASS="SECT/i ) { + + $buf .= $_; + $indx++; + + } else { + next; + } + + } elsif( $indx == 0 ) { + + # write out the title page file + # + if( $_ =~ /CLASS="TOC"/ ) { + + $ttl_buf .= ">\n\n\n"; + $ttl_buf =~ s/<\/H1\n/<\/H1\n>


<\/DIV\n>


/
<\/DIV\n>/ms; + &fix_html(\$ttl_buf, 1); + + open(TOC_FILE, "> $ttl") || do { + print "fix_print_html: cannot open $ttl: $!\n"; + close(IN_FILE); + return 0; + }; + print TOC_FILE $ttl_buf; + close(TOC_FILE); + $ttl_buf = ''; + $indx++; + + } else { + $ttl_buf .= $_; + } + + } elsif( $indx < 0 ) { + + if( $_ =~ /CLASS="BOOK"/i ) { + $is_article = 0; + } + + # up to this point, both buffers get the line + # + if( $_ =~ /CLASS="TITLEPAGE"/ ) { + + $ttl_buf .= $_ . ">\n

\n



\n<\/P\n"; + $indx++; + + } else { + $buf .= $_; + $ttl_buf .= $_; + } + + } else { + + $buf .= $_; + } + } + close(IN_FILE); + + + # fix body file + # + open(OUT_FILE, "> $out") || do { + print "fix_print_html: cannot open $out: $!\n"; + return 0; + }; + + &fix_html(\$buf, $is_article); + + print OUT_FILE $buf; + close(OUT_FILE); + + + return 1; +} + + +sub fix_html { + + my($buf, $is_article) = @_; + my($indx) = -1; + + + # make corrections and write out the file + # + + $$buf =~ s/(\n>/$1$2\n/gms; + $$buf =~ s/(\n>/$1$2\n/gms; + $$buf =~ s/(\n>/$1$2\n/gms; + if( $is_article == 0 ) { + $$buf =~ + s/(\nCLASS="SECT[TION\d]+"\n>)


) -1 ) { + $$buf = substr($$buf, 0, $indx); + $$buf .= "\n<\/BODY>\n<\/HTML>\n\n"; + } elsif( ($indx = rindex($$buf, " -1 ) { + $$buf = substr($$buf, 0, $indx); + $$buf .= "\n<\/BODY>\n<\/HTML>\n\n"; + } + + $$buf =~ s/\&\#13;//g; + $$buf =~ s/\&\#60;/\</g; + $$buf =~ s/\&\#62;/\>/g; + $$buf =~ s/\&\#8211;/\-/g; + $$buf =~ s/\&\#8220;/\"/g; + $$buf =~ s/\&\#8221;/\"/g; + $$buf =~ s/WIDTH=\"\d\"//g; + $$buf =~ s/><[\/]*TBODY//g; + $$buf =~ s/><[\/]*THEAD//g; + $$buf =~ s/TYPE=\"1\"\n//gim; + + $$buf =~ s/<\/P/gms; + + my($cnt, $j) = 0; + + if( $$buf !~ /

= 0; $cnt--) { + $j = $cnt + 1; + if( $cnt == 0 ) { + $j = 2; + } + $$buf =~ s/<\/DIV\n//gms; + + $buf =~ s/]*?>//gms; + $buf =~ s/<\/SPAN\n>//gms; + + $$buf =~ s/(>(<\/LI\n)/$1$2$3/gms; + + return; +} + + +# Return true from package include +# +1; + diff --git a/utils/ldp_print/ldp_print b/utils/ldp_print/ldp_print new file mode 100755 index 00000000..67129b96 --- /dev/null +++ b/utils/ldp_print/ldp_print @@ -0,0 +1,71 @@ +#!/usr/bin/perl -w +# +# usage: ldp_print +# +# Creates a PDF variant of a single-file HTML representation of a +# DocBook SGML (or XML) instance. This simple wrapper assumes that +# the file was created using {open}jade in a manner similar to: +# +# jade -t sgml -i html -V nochunks -d $style $fname > $fname.html +# +# Give this script the filename as an argument. It will then parse +# the file into 'title.html' and 'body.html' and send each to +# htmldoc (as the corresponding title page and body of the document). +# +# +# CAVEATS: +# +# Assumes perl is in /usr/bin; adjust if necessary +# +# You may need to specify where the htmldoc executable resides. +# The script assumes it's within your $PATH. +# +# If you want Postscript as an output variant, uncomment the +# appropriate lines (see below). +# +# Relies on output from a DocBook instance created via DSSSL/{open}jade! +# +# Cleans up (removes) the intermediate files it creates (but not the +# PDF or Postscript files, obviously!) +# +# Works silently; PDF (PostScript) will be created in the same directory +# as was specified for the input (single-file HTML) file. +# +# Provided without warranty or support! +# +# gferg@sgi.com / Ferg (used as part of the LDP production env) +# + +use strict; +push(@INC, "./"); +require 'fix_print_html.lib'; + +if( $ARGV[0] eq '' || !(-r $ARGV[0]) ) { + die "\nusage: ldp_print \n\n"; +} + +my($fname_wo_ext) = $ARGV[0]; +$fname_wo_ext =~ s/\.[\w]+$//; + + +# create new files from single HTML file to use for print +# +&fix_print_html($ARGV[0], 'body.html', 'title.html'); + +my($cmd) = "htmldoc --size universal -t pdf -f ${fname_wo_ext}.pdf " . + "--firstpage p1 --titlefile title.html body.html --footer c.1"; + +# For postscript output; append onto the above cmd string: +# +# "; htmldoc --size universal -t ps -f ${fname_wo_ext}.ps " . +# "--firstpage p1 --titlefile title.html body.html"; +# +system($cmd); +die "\nldp_print: could not create ${fname_wo_ext}.pdf ($!)\n" if ($?); + +# cleanup +# +system("rm -f body.html title.html"); + +exit(0); +