From: Fabian Keil Date: Fri, 26 Feb 2021 09:35:36 +0000 (+0100) Subject: Remove obsolete pcre code X-Git-Tag: v_3_0_33~99 X-Git-Url: http://www.privoxy.org/gitweb/%22https:/developer-manual/man-page/static/@user-manual@@actions-help-prefix@ACTIONS-FILE?a=commitdiff_plain;h=878f6fc9589394ee23f5e0e9bd549f88532a3978;p=privoxy.git Remove obsolete pcre code It was already detached from the build since d7c2657e0b. --- diff --git a/pcre/.gitignore b/pcre/.gitignore deleted file mode 100644 index a504754c..00000000 --- a/pcre/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -chartables.c -dftables -vc_dftables.plg -vc_dftables -vc_dftables_dbg -dftables.exe diff --git a/pcre/Makefile.in b/pcre/Makefile.in deleted file mode 100644 index 94edf499..00000000 --- a/pcre/Makefile.in +++ /dev/null @@ -1,219 +0,0 @@ - -# Makefile.in for PCRE (Perl-Compatible Regular Expression) library. - -#---------------------------------------------------------------------------# -# To build mingw32 DLL uncomment the next two lines. This addition for # -# mingw32 was contributed by . I (Philip # -# Hazel) don't know anything about it! There are some additional targets at # -# the bottom of this Makefile. # -#---------------------------------------------------------------------------# -# -# include dll.mk -# DLL_LDFLAGS=-s - - -#---------------------------------------------------------------------------# -# The next few lines are modified by "configure" to insert data that it is # -# given in its arguments, or which it finds out for itself. # -#---------------------------------------------------------------------------# - -# BINDIR is the directory in which the pcregrep command is installed. -# INCDIR is the directory in which the public header file pcre.h is installed. -# LIBDIR is the directory in which the libraries are installed. -# MANDIR is the directory in which the man pages are installed. -# The pcretest program, as it is a test program, does not get installed -# anywhere. - -prefix = @prefix@ -exec_prefix = @exec_prefix@ - -BINDIR = @bindir@ -LIBDIR = @libdir@ -INCDIR = @includedir@ -MANDIR = @mandir@ - -CC = @CC@ -CFLAGS = @CFLAGS@ -RANLIB = @RANLIB@ -UTF8 = @UTF8@ - -# LIBTOOL defaults to "./libtool", which enables the building of shared -# libraries. If "configure" is called with --disable-shared-libraries, LIBTOOL -# is set to "", which stops shared libraries from being built, and LIBSUFFIX -# is set to "a" instead of "la", which causes the shared libraries not to be -# installed. - -LIBTOOL = @LIBTOOL@ -LIBSUFFIX = @LIBSUFFIX@ - -# These are the version numbers for the shared libraries - -PCRELIBVERSION = @PCRE_LIB_VERSION@ -PCREPOSIXLIBVERSION = @PCRE_POSIXLIB_VERSION@ - - -#---------------------------------------------------------------------------# -# A copy of install-sh is in this distribution and is used by default. # -#---------------------------------------------------------------------------# - -INSTALL = ./install-sh -c -INSTALL_DATA = ${INSTALL} -m 644 - - -#---------------------------------------------------------------------------# -# For almost all systems, the command to create a library is "ar cq", but # -# there is at least one where it is different, so this command must be # -# configurable. However, I haven't got round to learning how to make # -# "configure" find this out for itself. It is necessary to use a command # -# such as "make AR='ar -rc'" if you need to vary this. The setting of AR is # -# *not* passed over to ./ltconfig, because it does its own setting up. # -#---------------------------------------------------------------------------# - -AR = ar cq - - -############################################################################## - - -OBJ = maketables.o get.o study.o pcre.o -LOBJ = maketables.lo get.lo study.lo pcre.lo - -all: libtool libpcre.$(LIBSUFFIX) libpcreposix.$(LIBSUFFIX) pcretest pcregrep - -libtool: config.guess config.sub ltconfig ltmain.sh - @if test "$(LIBTOOL)" = "./libtool"; then \ - echo '--- Building libtool ---'; \ - CC=$(CC) CFLAGS='$(CFLAGS)' RANLIB='$(RANLIB)' ./ltconfig ./ltmain.sh; \ - echo '--- Built libtool ---'; fi - -pcregrep: libpcre.$(LIBSUFFIX) pcregrep.o - @echo ' ' - @echo '--- Building pcregrep utility' - @echo ' ' - $(LIBTOOL) $(CC) $(CFLAGS) -o pcregrep pcregrep.o libpcre.$(LIBSUFFIX) - -pcretest: libpcre.$(LIBSUFFIX) libpcreposix.$(LIBSUFFIX) pcretest.o - @echo ' ' - @echo '--- Building pcretest testing program' - @echo ' ' - $(LIBTOOL) $(PURIFY) $(CC) $(CFLAGS) -o pcretest pcretest.o \ - libpcre.$(LIBSUFFIX) libpcreposix.$(LIBSUFFIX) - -libpcre.a: $(OBJ) - @echo ' ' - @echo '--- Building static library: libpcre' - @echo ' ' - -rm -f libpcre.a - $(AR) libpcre.a $(OBJ) - $(RANLIB) libpcre.a - -libpcre.la: $(OBJ) - @echo ' ' - @echo '--- Building shared library: libpcre' - @echo ' ' - -rm -f libpcre.la - ./libtool $(CC) -version-info '$(PCRELIBVERSION)' -o libpcre.la -rpath $(LIBDIR) $(LOBJ) - -libpcreposix.a: pcreposix.o - @echo ' ' - @echo '--- Building static library: libpcreposix' - @echo ' ' - -rm -f libpcreposix.a - $(AR) libpcreposix.a pcreposix.o - $(RANLIB) libpcreposix.a - -libpcreposix.la: pcreposix.o - @echo ' ' - @echo '--- Building shared library: libpcreposix' - @echo ' ' - -rm -f libpcreposix.la - ./libtool $(CC) -version-info '$(PCREPOSIXLIBVERSION)' -o libpcreposix.la -rpath $(LIBDIR) pcreposix.lo - -pcre.o: chartables.c pcre.c pcre.h internal.h config.h Makefile - $(LIBTOOL) $(CC) -c $(CFLAGS) $(UTF8) pcre.c - -pcreposix.o: pcreposix.c pcreposix.h internal.h pcre.h config.h Makefile - $(LIBTOOL) $(CC) -c $(CFLAGS) pcreposix.c - -maketables.o: maketables.c pcre.h internal.h config.h Makefile - $(LIBTOOL) $(CC) -c $(CFLAGS) maketables.c - -get.o: get.c pcre.h internal.h config.h Makefile - $(LIBTOOL) $(CC) -c $(CFLAGS) get.c - -study.o: study.c pcre.h internal.h config.h Makefile - $(LIBTOOL) $(CC) -c $(CFLAGS) $(UTF8) study.c - -pcretest.o: pcretest.c pcre.h config.h Makefile - $(CC) -c $(CFLAGS) $(UTF8) pcretest.c - -pcregrep.o: pcregrep.c pcre.h Makefile config.h - $(CC) -c $(CFLAGS) $(UTF8) pcregrep.c - -# An auxiliary program makes the default character table source - -chartables.c: dftables - ./dftables >chartables.c - -dftables: dftables.c maketables.c pcre.h internal.h config.h Makefile - $(CC) -o dftables $(CFLAGS) dftables.c - -install: all - $(LIBTOOL) $(INSTALL_DATA) libpcre.$(LIBSUFFIX) $(DESTDIR)/$(LIBDIR)/libpcre.$(LIBSUFFIX) - $(LIBTOOL) $(INSTALL_DATA) libpcreposix.$(LIBSUFFIX) $(DESTDIR)/$(LIBDIR)/libpcreposix.$(LIBSUFFIX) - $(INSTALL_DATA) pcre.h $(DESTDIR)/$(INCDIR)/pcre.h - $(INSTALL_DATA) pcreposix.h $(DESTDIR)/$(INCDIR)/pcreposix.h - $(INSTALL_DATA) doc/pcre.3 $(DESTDIR)/$(MANDIR)/man3/pcre.3 - $(INSTALL_DATA) doc/pcreposix.3 $(DESTDIR)/$(MANDIR)/man3/pcreposix.3 - $(INSTALL_DATA) doc/pcregrep.1 $(DESTDIR)/$(MANDIR)/man1/pcregrep.1 - @if test "$(LIBTOOL)" = "./libtool"; then \ - echo ' '; \ - echo '--- Rebuilding pcregrep to use installed shared library ---'; \ - echo $(CC) $(CFLAGS) -o pcregrep pcregrep.o -L$(DESTDIR)/$(LIBDIR) -lpcre; \ - $(CC) $(CFLAGS) -o pcregrep pcregrep.o -L$(DESTDIR)/$(LIBDIR) -lpcre; \ - echo '--- Rebuilding pcretest to use installed shared library ---'; \ - echo $(CC) $(CFLAGS) -o pcretest pcretest.o -L$(DESTDIR)/$(LIBDIR) -lpcre -lpcreposix; \ - $(CC) $(CFLAGS) -o pcretest pcretest.o -L$(DESTDIR)/$(LIBDIR) -lpcre -lpcreposix; \ - fi - $(INSTALL) pcregrep $(DESTDIR)/$(BINDIR)/pcregrep - $(INSTALL) pcre-config $(DESTDIR)/$(BINDIR)/pcre-config - -# We deliberately omit dftables and chartables.c from 'make clean'; once made -# chartables.c shouldn't change, and if people have edited the tables by hand, -# you don't want to throw them away. - -clean:; -rm -rf *.o *.lo *.a *.la .libs pcretest pcregrep testtry - -# But "make distclean" should get back to a virgin distribution - -distclean: clean - -rm -f chartables.c libtool pcre-config pcre.h \ - Makefile config.h config.status config.log config.cache - -check: runtest - -test: runtest - -runtest: all - ./RunTest - -######## MINGW32 ############### MINGW32 ############### MINGW32 ############# - -# This addition for mingw32 was contributed by Paul Sokolovsky -# . I (PH) don't know anything about it! - -dll: _dll libpcre.dll.a pcregrep_d pcretest_d - -_dll: - $(MAKE) CFLAGS=-DSTATIC pcre.dll - -pcre.dll: $(OBJ) pcreposix.o pcre.def -libpcre.dll.a: pcre.def - -pcregrep_d: libpcre.dll.a pcregrep.o - $(CC) $(CFLAGS) -L. -o pcregrep pcregrep.o -lpcre.dll - -pcretest_d: libpcre.dll.a pcretest.o - $(PURIFY) $(CC) $(CFLAGS) -L. -o pcretest pcretest.o -lpcre.dll - -# End diff --git a/pcre/RunTest.in b/pcre/RunTest.in deleted file mode 100644 index 6e4eb085..00000000 --- a/pcre/RunTest.in +++ /dev/null @@ -1,148 +0,0 @@ -#! /bin/sh - -# This file is generated by configure from RunTest.in. Make any changes -# to that file. - -# Run PCRE tests - -cf=diff - -# Select which tests to run; if no selection, run all - -do1=no -do2=no -do3=no -do4=no -do5=no -do6=no - -while [ $# -gt 0 ] ; do - case $1 in - 1) do1=yes;; - 2) do2=yes;; - 3) do3=yes;; - 4) do4=yes;; - 5) do5=yes;; - 6) do6=yes;; - *) echo "Unknown test number $1"; exit 1;; - esac - shift -done - -if [ "@UTF8@" = "" ] ; then - if [ $do5 = yes ] ; then - echo "Can't run test 5 because UFT8 support is not configured" - exit 1 - fi - if [ $do6 = yes ] ; then - echo "Can't run test 6 because UFT8 support is not configured" - exit 1 - fi -fi - -if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a\ - $do5 = no -a $do6 = no ] ; then - do1=yes - do2=yes - do3=yes - do4=yes - if [ "@UTF8@" != "" ] ; then do5=yes; fi - if [ "@UTF8@" != "" ] ; then do6=yes; fi -fi - -# Primary test, Perl-compatible - -if [ $do1 = yes ] ; then - echo "Testing main functionality (Perl compatible)" - ./pcretest testdata/testinput1 testtry - if [ $? = 0 ] ; then - $cf testtry testdata/testoutput1 - if [ $? != 0 ] ; then exit 1; fi - else exit 1 - fi -fi - -# PCRE tests that are not Perl-compatible - API & error tests, mostly - -if [ $do2 = yes ] ; then - echo "Testing API and error handling (not Perl compatible)" - ./pcretest -i testdata/testinput2 testtry - if [ $? = 0 ] ; then - $cf testtry testdata/testoutput2 - if [ $? != 0 ] ; then exit 1; fi - else exit 1 - fi -fi - -# Additional Perl-compatible tests for Perl 5.005's new features - -if [ $do3 = yes ] ; then - echo "Testing Perl 5.005 features (Perl 5.005 compatible)" - ./pcretest testdata/testinput3 testtry - if [ $? = 0 ] ; then - $cf testtry testdata/testoutput3 - if [ $? != 0 ] ; then exit 1; fi - else exit 1 - fi -fi - -if [ $do1 = yes -a $do2 = yes -a $do3 = yes ] ; then - echo " " - echo "The three main tests all ran OK" - echo " " -fi - -# Locale-specific tests, provided the "fr" locale is available - -if [ $do4 = yes ] ; then - locale -a | grep '^fr$' >/dev/null - if [ $? -eq 0 ] ; then - echo "Testing locale-specific features (using 'fr' locale)" - ./pcretest testdata/testinput4 testtry - if [ $? = 0 ] ; then - $cf testtry testdata/testoutput4 - if [ $? != 0 ] ; then - echo " " - echo "Locale test did not run entirely successfully." - echo "This usually means that there is a problem with the locale" - echo "settings rather than a bug in PCRE." - else - echo "Locale test ran OK" - fi - echo " " - else exit 1 - fi - else - echo "Cannot test locale-specific features - 'fr' locale not found," - echo "or the \"locale\" command is not available to check for it." - echo " " - fi -fi - -# Additional tests for UTF8 support - -if [ $do5 = yes ] ; then - echo "Testing experimental, incomplete UTF8 support (Perl compatible)" - ./pcretest testdata/testinput5 testtry - if [ $? = 0 ] ; then - $cf testtry testdata/testoutput5 - if [ $? != 0 ] ; then exit 1; fi - else exit 1 - fi - echo "UTF8 test ran OK" - echo " " -fi - -if [ $do6 = yes ] ; then - echo "Testing API and internals for UTF8 support (not Perl compatible)" - ./pcretest testdata/testinput6 testtry - if [ $? = 0 ] ; then - $cf testtry testdata/testoutput6 - if [ $? != 0 ] ; then exit 1; fi - else exit 1 - fi - echo "UTF8 internals test ran OK" - echo " " -fi - -# End diff --git a/pcre/config.guess b/pcre/config.guess deleted file mode 100644 index e1b58717..00000000 --- a/pcre/config.guess +++ /dev/null @@ -1,1121 +0,0 @@ -#! /bin/sh -# Attempt to guess a canonical system name. -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999 -# Free Software Foundation, Inc. -# -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Written by Per Bothner . -# The master version of this file is at the FSF in /home/gd/gnu/lib. -# Please send patches to . -# -# This script attempts to guess a canonical system name similar to -# config.sub. If it succeeds, it prints the system name on stdout, and -# exits with 0. Otherwise, it exits with 1. -# -# The plan is that this can be called by configure scripts if you -# don't specify an explicit system type (host/target name). -# -# Only a few systems have been added to this list; please add others -# (but try to keep the structure clean). -# - -# Use $HOST_CC if defined. $CC may point to a cross-compiler -if test x"$CC_FOR_BUILD" = x; then - if test x"$HOST_CC" != x; then - CC_FOR_BUILD="$HOST_CC" - else - if test x"$CC" != x; then - CC_FOR_BUILD="$CC" - else - CC_FOR_BUILD=cc - fi - fi -fi - - -# This is needed to find uname on a Pyramid OSx when run in the BSD universe. -# (ghazi@noc.rutgers.edu 8/24/94.) -if (test -f /.attbin/uname) >/dev/null 2>&1 ; then - PATH=$PATH:/.attbin ; export PATH -fi - -UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown -UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown -UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown -UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown - -dummy=dummy-$$ -trap 'rm -f $dummy.c $dummy.o $dummy; exit 1' 1 2 15 - -# Note: order is significant - the case branches are not exclusive. - -case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in - alpha:OSF1:*:*) - if test $UNAME_RELEASE = "V4.0"; then - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` - fi - # A Vn.n version is a released version. - # A Tn.n version is a released field test version. - # A Xn.n version is an unreleased experimental baselevel. - # 1.2 uses "1.2" for uname -r. - cat <$dummy.s - .globl main - .ent main -main: - .frame \$30,0,\$26,0 - .prologue 0 - .long 0x47e03d80 # implver $0 - lda \$2,259 - .long 0x47e20c21 # amask $2,$1 - srl \$1,8,\$2 - sll \$2,2,\$2 - sll \$0,3,\$0 - addl \$1,\$0,\$0 - addl \$2,\$0,\$0 - ret \$31,(\$26),1 - .end main -EOF - $CC_FOR_BUILD $dummy.s -o $dummy 2>/dev/null - if test "$?" = 0 ; then - ./$dummy - case "$?" in - 7) - UNAME_MACHINE="alpha" - ;; - 15) - UNAME_MACHINE="alphaev5" - ;; - 14) - UNAME_MACHINE="alphaev56" - ;; - 10) - UNAME_MACHINE="alphapca56" - ;; - 16) - UNAME_MACHINE="alphaev6" - ;; - esac - fi - rm -f $dummy.s $dummy - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[VTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - exit 0 ;; - Alpha\ *:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # Should we change UNAME_MACHINE based on the output of uname instead - # of the specific Alpha model? - echo alpha-pc-interix - exit 0 ;; - 21064:Windows_NT:50:3) - echo alpha-dec-winnt3.5 - exit 0 ;; - Amiga*:UNIX_System_V:4.0:*) - echo m68k-cbm-sysv4 - exit 0;; - amiga:NetBSD:*:*) - echo m68k-cbm-netbsd${UNAME_RELEASE} - exit 0 ;; - amiga:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - *:[Aa]miga[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-amigaos - exit 0 ;; - arc64:OpenBSD:*:*) - echo mips64el-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - arc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - hkmips:OpenBSD:*:*) - echo mips-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - pmax:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sgi:OpenBSD:*:*) - echo mips-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - wgrisc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - *:OS/390:*:*) - echo i370-ibm-openedition - exit 0 ;; - arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) - echo arm-acorn-riscix${UNAME_RELEASE} - exit 0;; - arm32:NetBSD:*:*) - echo arm-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` - exit 0 ;; - SR2?01:HI-UX/MPP:*:*) - echo hppa1.1-hitachi-hiuxmpp - exit 0;; - Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) - # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. - if test "`(/bin/universe) 2>/dev/null`" = att ; then - echo pyramid-pyramid-sysv3 - else - echo pyramid-pyramid-bsd - fi - exit 0 ;; - NILE*:*:*:dcosx) - echo pyramid-pyramid-svr4 - exit 0 ;; - sun4H:SunOS:5.*:*) - echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) - echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - i86pc:SunOS:5.*:*) - echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - sun4*:SunOS:6*:*) - # According to config.sub, this is the proper way to canonicalize - # SunOS6. Hard to guess exactly what SunOS6 will be like, but - # it's likely to be more like Solaris than SunOS4. - echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - sun4*:SunOS:*:*) - case "`/usr/bin/arch -k`" in - Series*|S4*) - UNAME_RELEASE=`uname -v` - ;; - esac - # Japanese Language versions have a version number like `4.1.3-JL'. - echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` - exit 0 ;; - sun3*:SunOS:*:*) - echo m68k-sun-sunos${UNAME_RELEASE} - exit 0 ;; - sun*:*:4.2BSD:*) - UNAME_RELEASE=`(head -1 /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 - case "`/bin/arch`" in - sun3) - echo m68k-sun-sunos${UNAME_RELEASE} - ;; - sun4) - echo sparc-sun-sunos${UNAME_RELEASE} - ;; - esac - exit 0 ;; - aushp:SunOS:*:*) - echo sparc-auspex-sunos${UNAME_RELEASE} - exit 0 ;; - atari*:NetBSD:*:*) - echo m68k-atari-netbsd${UNAME_RELEASE} - exit 0 ;; - atari*:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - # The situation for MiNT is a little confusing. The machine name - # can be virtually everything (everything which is not - # "atarist" or "atariste" at least should have a processor - # > m68000). The system name ranges from "MiNT" over "FreeMiNT" - # to the lowercase version "mint" (or "freemint"). Finally - # the system name "TOS" denotes a system which is actually not - # MiNT. But MiNT is downward compatible to TOS, so this should - # be no problem. - atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; - atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; - *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; - milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) - echo m68k-milan-mint${UNAME_RELEASE} - exit 0 ;; - hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) - echo m68k-hades-mint${UNAME_RELEASE} - exit 0 ;; - *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) - echo m68k-unknown-mint${UNAME_RELEASE} - exit 0 ;; - sun3*:NetBSD:*:*) - echo m68k-sun-netbsd${UNAME_RELEASE} - exit 0 ;; - sun3*:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mac68k:NetBSD:*:*) - echo m68k-apple-netbsd${UNAME_RELEASE} - exit 0 ;; - mac68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme88k:OpenBSD:*:*) - echo m88k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - powerpc:machten:*:*) - echo powerpc-apple-machten${UNAME_RELEASE} - exit 0 ;; - macppc:NetBSD:*:*) - echo powerpc-apple-netbsd${UNAME_RELEASE} - exit 0 ;; - RISC*:Mach:*:*) - echo mips-dec-mach_bsd4.3 - exit 0 ;; - RISC*:ULTRIX:*:*) - echo mips-dec-ultrix${UNAME_RELEASE} - exit 0 ;; - VAX*:ULTRIX*:*:*) - echo vax-dec-ultrix${UNAME_RELEASE} - exit 0 ;; - 2020:CLIX:*:* | 2430:CLIX:*:*) - echo clipper-intergraph-clix${UNAME_RELEASE} - exit 0 ;; - mips:*:*:UMIPS | mips:*:*:RISCos) - sed 's/^ //' << EOF >$dummy.c -#ifdef __cplusplus - int main (int argc, char *argv[]) { -#else - int main (argc, argv) int argc; char *argv[]; { -#endif - #if defined (host_mips) && defined (MIPSEB) - #if defined (SYSTYPE_SYSV) - printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_SVR4) - printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) - printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); - #endif - #endif - exit (-1); - } -EOF - $CC_FOR_BUILD $dummy.c -o $dummy \ - && ./$dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \ - && rm $dummy.c $dummy && exit 0 - rm -f $dummy.c $dummy - echo mips-mips-riscos${UNAME_RELEASE} - exit 0 ;; - Night_Hawk:Power_UNIX:*:*) - echo powerpc-harris-powerunix - exit 0 ;; - m88k:CX/UX:7*:*) - echo m88k-harris-cxux7 - exit 0 ;; - m88k:*:4*:R4*) - echo m88k-motorola-sysv4 - exit 0 ;; - m88k:*:3*:R3*) - echo m88k-motorola-sysv3 - exit 0 ;; - AViiON:dgux:*:*) - # DG/UX returns AViiON for all architectures - UNAME_PROCESSOR=`/usr/bin/uname -p` - if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110] - then - if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ - [ ${TARGET_BINARY_INTERFACE}x = x ] - then - echo m88k-dg-dgux${UNAME_RELEASE} - else - echo m88k-dg-dguxbcs${UNAME_RELEASE} - fi - else - echo i586-dg-dgux${UNAME_RELEASE} - fi - exit 0 ;; - M88*:DolphinOS:*:*) # DolphinOS (SVR3) - echo m88k-dolphin-sysv3 - exit 0 ;; - M88*:*:R3*:*) - # Delta 88k system running SVR3 - echo m88k-motorola-sysv3 - exit 0 ;; - XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) - echo m88k-tektronix-sysv3 - exit 0 ;; - Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) - echo m68k-tektronix-bsd - exit 0 ;; - *:IRIX*:*:*) - echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` - exit 0 ;; - ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. - echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id - exit 0 ;; # Note that: echo "'`uname -s`'" gives 'AIX ' - i?86:AIX:*:*) - echo i386-ibm-aix - exit 0 ;; - *:AIX:2:3) - if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then - sed 's/^ //' << EOF >$dummy.c - #include - - main() - { - if (!__power_pc()) - exit(1); - puts("powerpc-ibm-aix3.2.5"); - exit(0); - } -EOF - $CC_FOR_BUILD $dummy.c -o $dummy && ./$dummy && rm $dummy.c $dummy && exit 0 - rm -f $dummy.c $dummy - echo rs6000-ibm-aix3.2.5 - elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then - echo rs6000-ibm-aix3.2.4 - else - echo rs6000-ibm-aix3.2 - fi - exit 0 ;; - *:AIX:*:4) - IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | head -1 | awk '{ print $1 }'` - if /usr/sbin/lsattr -EHl ${IBM_CPU_ID} | grep POWER >/dev/null 2>&1; then - IBM_ARCH=rs6000 - else - IBM_ARCH=powerpc - fi - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` - else - IBM_REV=4.${UNAME_RELEASE} - fi - echo ${IBM_ARCH}-ibm-aix${IBM_REV} - exit 0 ;; - *:AIX:*:*) - echo rs6000-ibm-aix - exit 0 ;; - ibmrt:4.4BSD:*|romp-ibm:BSD:*) - echo romp-ibm-bsd4.4 - exit 0 ;; - ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC NetBSD and - echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to - exit 0 ;; # report: romp-ibm BSD 4.3 - *:BOSX:*:*) - echo rs6000-bull-bosx - exit 0 ;; - DPX/2?00:B.O.S.:*:*) - echo m68k-bull-sysv3 - exit 0 ;; - 9000/[34]??:4.3bsd:1.*:*) - echo m68k-hp-bsd - exit 0 ;; - hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) - echo m68k-hp-bsd4.4 - exit 0 ;; - 9000/[34678]??:HP-UX:*:*) - case "${UNAME_MACHINE}" in - 9000/31? ) HP_ARCH=m68000 ;; - 9000/[34]?? ) HP_ARCH=m68k ;; - 9000/[678][0-9][0-9]) - sed 's/^ //' << EOF >$dummy.c - #include - #include - - int main () - { - #if defined(_SC_KERNEL_BITS) - long bits = sysconf(_SC_KERNEL_BITS); - #endif - long cpu = sysconf (_SC_CPU_VERSION); - - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1"); break; - case CPU_PA_RISC2_0: - #if defined(_SC_KERNEL_BITS) - switch (bits) - { - case 64: puts ("hppa2.0w"); break; - case 32: puts ("hppa2.0n"); break; - default: puts ("hppa2.0"); break; - } break; - #else /* !defined(_SC_KERNEL_BITS) */ - puts ("hppa2.0"); break; - #endif - default: puts ("hppa1.0"); break; - } - exit (0); - } -EOF - (CCOPTS= $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null ) && HP_ARCH=`./$dummy` - rm -f $dummy.c $dummy - esac - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - echo ${HP_ARCH}-hp-hpux${HPUX_REV} - exit 0 ;; - 3050*:HI-UX:*:*) - sed 's/^ //' << EOF >$dummy.c - #include - int - main () - { - long cpu = sysconf (_SC_CPU_VERSION); - /* The order matters, because CPU_IS_HP_MC68K erroneously returns - true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct - results, however. */ - if (CPU_IS_PA_RISC (cpu)) - { - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; - case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; - default: puts ("hppa-hitachi-hiuxwe2"); break; - } - } - else if (CPU_IS_HP_MC68K (cpu)) - puts ("m68k-hitachi-hiuxwe2"); - else puts ("unknown-hitachi-hiuxwe2"); - exit (0); - } -EOF - $CC_FOR_BUILD $dummy.c -o $dummy && ./$dummy && rm $dummy.c $dummy && exit 0 - rm -f $dummy.c $dummy - echo unknown-hitachi-hiuxwe2 - exit 0 ;; - 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) - echo hppa1.1-hp-bsd - exit 0 ;; - 9000/8??:4.3bsd:*:*) - echo hppa1.0-hp-bsd - exit 0 ;; - *9??*:MPE/iX:*:*) - echo hppa1.0-hp-mpeix - exit 0 ;; - hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) - echo hppa1.1-hp-osf - exit 0 ;; - hp8??:OSF1:*:*) - echo hppa1.0-hp-osf - exit 0 ;; - i?86:OSF1:*:*) - if [ -x /usr/sbin/sysversion ] ; then - echo ${UNAME_MACHINE}-unknown-osf1mk - else - echo ${UNAME_MACHINE}-unknown-osf1 - fi - exit 0 ;; - parisc*:Lites*:*:*) - echo hppa1.1-hp-lites - exit 0 ;; - hppa*:OpenBSD:*:*) - echo hppa-unknown-openbsd - exit 0 ;; - C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) - echo c1-convex-bsd - exit 0 ;; - C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit 0 ;; - C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) - echo c34-convex-bsd - exit 0 ;; - C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) - echo c38-convex-bsd - exit 0 ;; - C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) - echo c4-convex-bsd - exit 0 ;; - CRAY*X-MP:*:*:*) - echo xmp-cray-unicos - exit 0 ;; - CRAY*Y-MP:*:*:*) - echo ymp-cray-unicos${UNAME_RELEASE} - exit 0 ;; - CRAY*[A-Z]90:*:*:*) - echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ - | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ - -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ - exit 0 ;; - CRAY*TS:*:*:*) - echo t90-cray-unicos${UNAME_RELEASE} - exit 0 ;; - CRAY*T3E:*:*:*) - echo alpha-cray-unicosmk${UNAME_RELEASE} - exit 0 ;; - CRAY-2:*:*:*) - echo cray2-cray-unicos - exit 0 ;; - F300:UNIX_System_V:*:*) - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` - echo "f300-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit 0 ;; - F301:UNIX_System_V:*:*) - echo f301-fujitsu-uxpv`echo $UNAME_RELEASE | sed 's/ .*//'` - exit 0 ;; - hp3[0-9][05]:NetBSD:*:*) - echo m68k-hp-netbsd${UNAME_RELEASE} - exit 0 ;; - hp300:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - i?86:BSD/386:*:* | i?86:BSD/OS:*:*) - echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} - exit 0 ;; - sparc*:BSD/OS:*:*) - echo sparc-unknown-bsdi${UNAME_RELEASE} - exit 0 ;; - *:BSD/OS:*:*) - echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} - exit 0 ;; - *:FreeBSD:*:*) - if test -x /usr/bin/objformat; then - if test "elf" = "`/usr/bin/objformat`"; then - echo ${UNAME_MACHINE}-unknown-freebsdelf`echo ${UNAME_RELEASE}|sed -e 's/[-_].*//'` - exit 0 - fi - fi - echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` - exit 0 ;; - *:NetBSD:*:*) - echo ${UNAME_MACHINE}-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*//'` - exit 0 ;; - *:OpenBSD:*:*) - echo ${UNAME_MACHINE}-unknown-openbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` - exit 0 ;; - i*:CYGWIN*:*) - echo ${UNAME_MACHINE}-pc-cygwin - exit 0 ;; - i*:MINGW*:*) - echo ${UNAME_MACHINE}-pc-mingw32 - exit 0 ;; - i*:Windows_NT*:* | Pentium*:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we - # UNAME_MACHINE based on the output of uname instead of i386? - echo i386-pc-interix - exit 0 ;; - i*:UWIN*:*) - echo ${UNAME_MACHINE}-pc-uwin - exit 0 ;; - p*:CYGWIN*:*) - echo powerpcle-unknown-cygwin - exit 0 ;; - prep*:SunOS:5.*:*) - echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - *:GNU:*:*) - echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` - exit 0 ;; - *:Linux:*:*) - - # The BFD linker knows what the default object file format is, so - # first see if it will tell us. cd to the root directory to prevent - # problems with other programs or directories called `ld' in the path. - ld_help_string=`cd /; ld --help 2>&1` - ld_supported_emulations=`echo $ld_help_string \ - | sed -ne '/supported emulations:/!d - s/[ ][ ]*/ /g - s/.*supported emulations: *// - s/ .*// - p'` - case "$ld_supported_emulations" in - *ia64) - echo "${UNAME_MACHINE}-unknown-linux" - exit 0 - ;; - i?86linux) - echo "${UNAME_MACHINE}-pc-linux-gnuaout" - exit 0 - ;; - i?86coff) - echo "${UNAME_MACHINE}-pc-linux-gnucoff" - exit 0 - ;; - sparclinux) - echo "${UNAME_MACHINE}-unknown-linux-gnuaout" - exit 0 - ;; - armlinux) - echo "${UNAME_MACHINE}-unknown-linux-gnuaout" - exit 0 - ;; - elf32arm*) - echo "${UNAME_MACHINE}-unknown-linux-gnu" - exit 0 - ;; - armelf_linux*) - echo "${UNAME_MACHINE}-unknown-linux-gnu" - exit 0 - ;; - m68klinux) - echo "${UNAME_MACHINE}-unknown-linux-gnuaout" - exit 0 - ;; - elf32ppc) - # Determine Lib Version - cat >$dummy.c < -#if defined(__GLIBC__) -extern char __libc_version[]; -extern char __libc_release[]; -#endif -main(argc, argv) - int argc; - char *argv[]; -{ -#if defined(__GLIBC__) - printf("%s %s\n", __libc_version, __libc_release); -#else - printf("unkown\n"); -#endif - return 0; -} -EOF - LIBC="" - $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null - if test "$?" = 0 ; then - ./$dummy | grep 1\.99 > /dev/null - if test "$?" = 0 ; then - LIBC="libc1" - fi - fi - rm -f $dummy.c $dummy - echo powerpc-unknown-linux-gnu${LIBC} - exit 0 - ;; - esac - - if test "${UNAME_MACHINE}" = "alpha" ; then - sed 's/^ //' <$dummy.s - .globl main - .ent main - main: - .frame \$30,0,\$26,0 - .prologue 0 - .long 0x47e03d80 # implver $0 - lda \$2,259 - .long 0x47e20c21 # amask $2,$1 - srl \$1,8,\$2 - sll \$2,2,\$2 - sll \$0,3,\$0 - addl \$1,\$0,\$0 - addl \$2,\$0,\$0 - ret \$31,(\$26),1 - .end main -EOF - LIBC="" - $CC_FOR_BUILD $dummy.s -o $dummy 2>/dev/null - if test "$?" = 0 ; then - ./$dummy - case "$?" in - 7) - UNAME_MACHINE="alpha" - ;; - 15) - UNAME_MACHINE="alphaev5" - ;; - 14) - UNAME_MACHINE="alphaev56" - ;; - 10) - UNAME_MACHINE="alphapca56" - ;; - 16) - UNAME_MACHINE="alphaev6" - ;; - esac - - objdump --private-headers $dummy | \ - grep ld.so.1 > /dev/null - if test "$?" = 0 ; then - LIBC="libc1" - fi - fi - rm -f $dummy.s $dummy - echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ; exit 0 - elif test "${UNAME_MACHINE}" = "mips" ; then - cat >$dummy.c </dev/null && ./$dummy "${UNAME_MACHINE}" && rm $dummy.c $dummy && exit 0 - rm -f $dummy.c $dummy - else - # Either a pre-BFD a.out linker (linux-gnuoldld) - # or one that does not give us useful --help. - # GCC wants to distinguish between linux-gnuoldld and linux-gnuaout. - # If ld does not provide *any* "supported emulations:" - # that means it is gnuoldld. - echo "$ld_help_string" | grep >/dev/null 2>&1 "supported emulations:" - test $? != 0 && echo "${UNAME_MACHINE}-pc-linux-gnuoldld" && exit 0 - - case "${UNAME_MACHINE}" in - i?86) - VENDOR=pc; - ;; - *) - VENDOR=unknown; - ;; - esac - # Determine whether the default compiler is a.out or elf - cat >$dummy.c < -#ifdef __cplusplus - int main (int argc, char *argv[]) { -#else - int main (argc, argv) int argc; char *argv[]; { -#endif -#ifdef __ELF__ -# ifdef __GLIBC__ -# if __GLIBC__ >= 2 - printf ("%s-${VENDOR}-linux-gnu\n", argv[1]); -# else - printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]); -# endif -# else - printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]); -# endif -#else - printf ("%s-${VENDOR}-linux-gnuaout\n", argv[1]); -#endif - return 0; -} -EOF - $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null && ./$dummy "${UNAME_MACHINE}" && rm $dummy.c $dummy && exit 0 - rm -f $dummy.c $dummy - fi ;; -# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. earlier versions -# are messed up and put the nodename in both sysname and nodename. - i?86:DYNIX/ptx:4*:*) - echo i386-sequent-sysv4 - exit 0 ;; - i?86:UNIX_SV:4.2MP:2.*) - # Unixware is an offshoot of SVR4, but it has its own version - # number series starting with 2... - # I am not positive that other SVR4 systems won't match this, - # I just have to hope. -- rms. - # Use sysv4.2uw... so that sysv4* matches it. - echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} - exit 0 ;; - i?86:*:4.*:* | i?86:SYSTEM_V:4.*:*) - UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` - if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then - echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} - else - echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} - fi - exit 0 ;; - i?86:*:5:7*) - # Fixed at (any) Pentium or better - UNAME_MACHINE=i586 - if [ ${UNAME_SYSTEM} = "UnixWare" ] ; then - echo ${UNAME_MACHINE}-sco-sysv${UNAME_RELEASE}uw${UNAME_VERSION} - else - echo ${UNAME_MACHINE}-pc-sysv${UNAME_RELEASE} - fi - exit 0 ;; - i?86:*:3.2:*) - if test -f /usr/options/cb.name; then - UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then - UNAME_REL=`(/bin/uname -X|egrep Release|sed -e 's/.*= //')` - (/bin/uname -X|egrep i80486 >/dev/null) && UNAME_MACHINE=i486 - (/bin/uname -X|egrep '^Machine.*Pentium' >/dev/null) \ - && UNAME_MACHINE=i586 - (/bin/uname -X|egrep '^Machine.*Pent ?II' >/dev/null) \ - && UNAME_MACHINE=i686 - (/bin/uname -X|egrep '^Machine.*Pentium Pro' >/dev/null) \ - && UNAME_MACHINE=i686 - echo ${UNAME_MACHINE}-pc-sco$UNAME_REL - else - echo ${UNAME_MACHINE}-pc-sysv32 - fi - exit 0 ;; - pc:*:*:*) - # uname -m prints for DJGPP always 'pc', but it prints nothing about - # the processor, so we play safe by assuming i386. - echo i386-pc-msdosdjgpp - exit 0 ;; - Intel:Mach:3*:*) - echo i386-pc-mach3 - exit 0 ;; - paragon:*:*:*) - echo i860-intel-osf1 - exit 0 ;; - i860:*:4.*:*) # i860-SVR4 - if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then - echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 - else # Add other i860-SVR4 vendors below as they are discovered. - echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 - fi - exit 0 ;; - mini*:CTIX:SYS*5:*) - # "miniframe" - echo m68010-convergent-sysv - exit 0 ;; - M68*:*:R3V[567]*:*) - test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;; - 3[34]??:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 4850:*:4.0:3.0) - OS_REL='' - test -r /etc/.relid \ - && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4.3${OS_REL} && exit 0 - /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;; - 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4 && exit 0 ;; - m68*:LynxOS:2.*:*) - echo m68k-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - mc68030:UNIX_System_V:4.*:*) - echo m68k-atari-sysv4 - exit 0 ;; - i?86:LynxOS:2.*:* | i?86:LynxOS:3.[01]*:*) - echo i386-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - TSUNAMI:LynxOS:2.*:*) - echo sparc-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - rs6000:LynxOS:2.*:* | PowerPC:LynxOS:2.*:*) - echo rs6000-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - SM[BE]S:UNIX_SV:*:*) - echo mips-dde-sysv${UNAME_RELEASE} - exit 0 ;; - RM*:ReliantUNIX-*:*:*) - echo mips-sni-sysv4 - exit 0 ;; - RM*:SINIX-*:*:*) - echo mips-sni-sysv4 - exit 0 ;; - *:SINIX-*:*:*) - if uname -p 2>/dev/null >/dev/null ; then - UNAME_MACHINE=`(uname -p) 2>/dev/null` - echo ${UNAME_MACHINE}-sni-sysv4 - else - echo ns32k-sni-sysv - fi - exit 0 ;; - PENTIUM:CPunix:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort - # says - echo i586-unisys-sysv4 - exit 0 ;; - *:UNIX_System_V:4*:FTX*) - # From Gerald Hewes . - # How about differentiating between stratus architectures? -djm - echo hppa1.1-stratus-sysv4 - exit 0 ;; - *:*:*:FTX*) - # From seanf@swdc.stratus.com. - echo i860-stratus-sysv4 - exit 0 ;; - mc68*:A/UX:*:*) - echo m68k-apple-aux${UNAME_RELEASE} - exit 0 ;; - news*:NEWS-OS:*:6*) - echo mips-sony-newsos6 - exit 0 ;; - R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) - if [ -d /usr/nec ]; then - echo mips-nec-sysv${UNAME_RELEASE} - else - echo mips-unknown-sysv${UNAME_RELEASE} - fi - exit 0 ;; - BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. - echo powerpc-be-beos - exit 0 ;; - BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. - echo powerpc-apple-beos - exit 0 ;; - BePC:BeOS:*:*) # BeOS running on Intel PC compatible. - echo i586-pc-beos - exit 0 ;; - SX-4:SUPER-UX:*:*) - echo sx4-nec-superux${UNAME_RELEASE} - exit 0 ;; - SX-5:SUPER-UX:*:*) - echo sx5-nec-superux${UNAME_RELEASE} - exit 0 ;; - Power*:Rhapsody:*:*) - echo powerpc-apple-rhapsody${UNAME_RELEASE} - exit 0 ;; - *:Rhapsody:*:*) - echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} - exit 0 ;; - *:QNX:*:4*) - echo i386-qnx-qnx${UNAME_VERSION} - exit 0 ;; -esac - -#echo '(No uname command or uname output not recognized.)' 1>&2 -#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 - -cat >$dummy.c < -# include -#endif -main () -{ -#if defined (sony) -#if defined (MIPSEB) - /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, - I don't know.... */ - printf ("mips-sony-bsd\n"); exit (0); -#else -#include - printf ("m68k-sony-newsos%s\n", -#ifdef NEWSOS4 - "4" -#else - "" -#endif - ); exit (0); -#endif -#endif - -#if defined (__arm) && defined (__acorn) && defined (__unix) - printf ("arm-acorn-riscix"); exit (0); -#endif - -#if defined (hp300) && !defined (hpux) - printf ("m68k-hp-bsd\n"); exit (0); -#endif - -#if defined (NeXT) -#if !defined (__ARCHITECTURE__) -#define __ARCHITECTURE__ "m68k" -#endif - int version; - version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; - if (version < 4) - printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); - else - printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); - exit (0); -#endif - -#if defined (MULTIMAX) || defined (n16) -#if defined (UMAXV) - printf ("ns32k-encore-sysv\n"); exit (0); -#else -#if defined (CMU) - printf ("ns32k-encore-mach\n"); exit (0); -#else - printf ("ns32k-encore-bsd\n"); exit (0); -#endif -#endif -#endif - -#if defined (__386BSD__) - printf ("i386-pc-bsd\n"); exit (0); -#endif - -#if defined (sequent) -#if defined (i386) - printf ("i386-sequent-dynix\n"); exit (0); -#endif -#if defined (ns32000) - printf ("ns32k-sequent-dynix\n"); exit (0); -#endif -#endif - -#if defined (_SEQUENT_) - struct utsname un; - - uname(&un); - - if (strncmp(un.version, "V2", 2) == 0) { - printf ("i386-sequent-ptx2\n"); exit (0); - } - if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ - printf ("i386-sequent-ptx1\n"); exit (0); - } - printf ("i386-sequent-ptx\n"); exit (0); - -#endif - -#if defined (vax) -#if !defined (ultrix) - printf ("vax-dec-bsd\n"); exit (0); -#else - printf ("vax-dec-ultrix\n"); exit (0); -#endif -#endif - -#if defined (alliant) && defined (i860) - printf ("i860-alliant-bsd\n"); exit (0); -#endif - - exit (1); -} -EOF - -$CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null && ./$dummy && rm $dummy.c $dummy && exit 0 -rm -f $dummy.c $dummy - -# Apollos put the system type in the environment. - -test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; } - -# Convex versions that predate uname can use getsysinfo(1) - -if [ -x /usr/convex/getsysinfo ] -then - case `getsysinfo -f cpu_type` in - c1*) - echo c1-convex-bsd - exit 0 ;; - c2*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit 0 ;; - c34*) - echo c34-convex-bsd - exit 0 ;; - c38*) - echo c38-convex-bsd - exit 0 ;; - c4*) - echo c4-convex-bsd - exit 0 ;; - esac -fi - -#echo '(Unable to guess system type)' 1>&2 - -exit 1 diff --git a/pcre/config.h b/pcre/config.h deleted file mode 100644 index c767cbb4..00000000 --- a/pcre/config.h +++ /dev/null @@ -1,5 +0,0 @@ - -/* For Privoxy, we just use Privoxy's config.h */ - -#include "../config.h" - diff --git a/pcre/config.in b/pcre/config.in deleted file mode 100644 index 02f42593..00000000 --- a/pcre/config.in +++ /dev/null @@ -1,33 +0,0 @@ - -/* On Unix systems config.in is converted by configure into config.h. PCRE is -written in Standard C, but there are a few non-standard things it can cope -with, allowing it to run on SunOS4 and other "close to standard" systems. - -On a non-Unix system you should just copy this file into config.h and change -the definitions of HAVE_STRERROR and HAVE_MEMMOVE to 1. Unfortunately, because -of the way autoconf works, these cannot be made the defaults. If your system -has bcopy() and not memmove(), change the definition of HAVE_BCOPY instead of -HAVE_MEMMOVE. If your system has neither bcopy() nor memmove(), leave them both -as 0; an emulation function will be used. */ - -/* Define to empty if the keyword does not work. */ - -#undef const - -/* Define to `unsigned' if doesn't define size_t. */ - -#undef size_t - -/* The following two definitions are mainly for the benefit of SunOS4, which -doesn't have the strerror() or memmove() functions that should be present in -all Standard C libraries. The macros HAVE_STRERROR and HAVE_MEMMOVE should -normally be defined with the value 1 for other systems, but unfortunately we -can't make this the default because "configure" files generated by autoconf -will only change 0 to 1; they won't change 1 to 0 if the functions are not -found. If HAVE_MEMMOVE is set to 1, the value of HAVE_BCOPY is not relevant. */ - -#define HAVE_STRERROR 0 -#define HAVE_MEMMOVE 0 -#define HAVE_BCOPY 0 - -/* End */ diff --git a/pcre/config.sub b/pcre/config.sub deleted file mode 100644 index 28426bb8..00000000 --- a/pcre/config.sub +++ /dev/null @@ -1,1232 +0,0 @@ -#! /bin/sh -# Configuration validation subroutine script, version 1.1. -# Copyright (C) 1991, 92-97, 1998, 1999 Free Software Foundation, Inc. -# This file is (in principle) common to ALL GNU software. -# The presence of a machine in this file suggests that SOME GNU software -# can handle that machine. It does not imply ALL GNU software can. -# -# This file is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Configuration subroutine to validate and canonicalize a configuration type. -# Supply the specified configuration type as an argument. -# If it is invalid, we print an error message on stderr and exit with code 1. -# Otherwise, we print the canonical config type on stdout and succeed. - -# This file is supposed to be the same for all GNU packages -# and recognize all the CPU types, system types and aliases -# that are meaningful with *any* GNU software. -# Each package is responsible for reporting which valid configurations -# it does not support. The user should be able to distinguish -# a failure to support a valid configuration from a meaningless -# configuration. - -# The goal of this file is to map all the various variations of a given -# machine specification into a single specification in the form: -# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM -# or in some cases, the newer four-part form: -# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM -# It is wrong to echo any other type of specification. - -if [ x$1 = x ] -then - echo Configuration name missing. 1>&2 - echo "Usage: $0 CPU-MFR-OPSYS" 1>&2 - echo "or $0 ALIAS" 1>&2 - echo where ALIAS is a recognized configuration type. 1>&2 - exit 1 -fi - -# First pass through any local machine types. -case $1 in - *local*) - echo $1 - exit 0 - ;; - *) - ;; -esac - -# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). -# Here we must recognize all the valid KERNEL-OS combinations. -maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` -case $maybe_os in - linux-gnu*) - os=-$maybe_os - basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` - ;; - *) - basic_machine=`echo $1 | sed 's/-[^-]*$//'` - if [ $basic_machine != $1 ] - then os=`echo $1 | sed 's/.*-/-/'` - else os=; fi - ;; -esac - -### Let's recognize common machines as not being operating systems so -### that things like config.sub decstation-3100 work. We also -### recognize some manufacturers as not being operating systems, so we -### can provide default operating systems below. -case $os in - -sun*os*) - # Prevent following clause from handling this invalid input. - ;; - -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ - -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ - -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ - -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ - -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ - -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ - -apple) - os= - basic_machine=$1 - ;; - -sim | -cisco | -oki | -wec | -winbond) - os= - basic_machine=$1 - ;; - -scout) - ;; - -wrs) - os=-vxworks - basic_machine=$1 - ;; - -hiux*) - os=-hiuxwe2 - ;; - -sco5) - os=-sco3.2v5 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco4) - os=-sco3.2v4 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco3.2.[4-9]*) - os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco3.2v[4-9]*) - # Don't forget version if it is 3.2v4 or newer. - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco*) - os=-sco3.2v2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -udk*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -isc) - os=-isc2.2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -clix*) - basic_machine=clipper-intergraph - ;; - -isc*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -lynx*) - os=-lynxos - ;; - -ptx*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` - ;; - -windowsnt*) - os=`echo $os | sed -e 's/windowsnt/winnt/'` - ;; - -psos*) - os=-psos - ;; - -mint | -mint[0-9]*) - basic_machine=m68k-atari - os=-mint - ;; -esac - -# Decode aliases for certain CPU-COMPANY combinations. -case $basic_machine in - # Recognize the basic CPU types without company name. - # Some are omitted here because they have special meanings below. - tahoe | i860 | ia64 | m32r | m68k | m68000 | m88k | ns32k | arc | arm \ - | arme[lb] | pyramid | mn10200 | mn10300 | tron | a29k \ - | 580 | i960 | h8300 \ - | hppa | hppa1.0 | hppa1.1 | hppa2.0 | hppa2.0w | hppa2.0n \ - | alpha | alphaev[4-7] | alphaev56 | alphapca5[67] \ - | we32k | ns16k | clipper | i370 | sh | powerpc | powerpcle \ - | 1750a | dsp16xx | pdp11 | mips16 | mips64 | mipsel | mips64el \ - | mips64orion | mips64orionel | mipstx39 | mipstx39el \ - | mips64vr4300 | mips64vr4300el | mips64vr4100 | mips64vr4100el \ - | mips64vr5000 | miprs64vr5000el | mcore \ - | sparc | sparclet | sparclite | sparc64 | sparcv9 | v850 | c4x \ - | thumb | d10v | fr30) - basic_machine=$basic_machine-unknown - ;; - m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | z8k | v70 | h8500 | w65 | pj | pjl) - ;; - - # We use `pc' rather than `unknown' - # because (1) that's what they normally are, and - # (2) the word "unknown" tends to confuse beginning users. - i[34567]86) - basic_machine=$basic_machine-pc - ;; - # Object if more than one company name word. - *-*-*) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; - # Recognize the basic CPU types with company name. - # FIXME: clean up the formatting here. - vax-* | tahoe-* | i[34567]86-* | i860-* | ia64-* | m32r-* | m68k-* | m68000-* \ - | m88k-* | sparc-* | ns32k-* | fx80-* | arc-* | arm-* | c[123]* \ - | mips-* | pyramid-* | tron-* | a29k-* | romp-* | rs6000-* \ - | power-* | none-* | 580-* | cray2-* | h8300-* | h8500-* | i960-* \ - | xmp-* | ymp-* \ - | hppa-* | hppa1.0-* | hppa1.1-* | hppa2.0-* | hppa2.0w-* | hppa2.0n-* \ - | alpha-* | alphaev[4-7]-* | alphaev56-* | alphapca5[67]-* \ - | we32k-* | cydra-* | ns16k-* | pn-* | np1-* | xps100-* \ - | clipper-* | orion-* \ - | sparclite-* | pdp11-* | sh-* | powerpc-* | powerpcle-* \ - | sparc64-* | sparcv9-* | sparc86x-* | mips16-* | mips64-* | mipsel-* \ - | mips64el-* | mips64orion-* | mips64orionel-* \ - | mips64vr4100-* | mips64vr4100el-* | mips64vr4300-* | mips64vr4300el-* \ - | mipstx39-* | mipstx39el-* | mcore-* \ - | f301-* | armv*-* | t3e-* \ - | m88110-* | m680[01234]0-* | m683?2-* | m68360-* | z8k-* | d10v-* \ - | thumb-* | v850-* | d30v-* | tic30-* | c30-* | fr30-* ) - ;; - # Recognize the various machine names and aliases which stand - # for a CPU type and a company and sometimes even an OS. - 386bsd) - basic_machine=i386-unknown - os=-bsd - ;; - 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) - basic_machine=m68000-att - ;; - 3b*) - basic_machine=we32k-att - ;; - a29khif) - basic_machine=a29k-amd - os=-udi - ;; - adobe68k) - basic_machine=m68010-adobe - os=-scout - ;; - alliant | fx80) - basic_machine=fx80-alliant - ;; - altos | altos3068) - basic_machine=m68k-altos - ;; - am29k) - basic_machine=a29k-none - os=-bsd - ;; - amdahl) - basic_machine=580-amdahl - os=-sysv - ;; - amiga | amiga-*) - basic_machine=m68k-cbm - ;; - amigaos | amigados) - basic_machine=m68k-cbm - os=-amigaos - ;; - amigaunix | amix) - basic_machine=m68k-cbm - os=-sysv4 - ;; - apollo68) - basic_machine=m68k-apollo - os=-sysv - ;; - apollo68bsd) - basic_machine=m68k-apollo - os=-bsd - ;; - aux) - basic_machine=m68k-apple - os=-aux - ;; - balance) - basic_machine=ns32k-sequent - os=-dynix - ;; - convex-c1) - basic_machine=c1-convex - os=-bsd - ;; - convex-c2) - basic_machine=c2-convex - os=-bsd - ;; - convex-c32) - basic_machine=c32-convex - os=-bsd - ;; - convex-c34) - basic_machine=c34-convex - os=-bsd - ;; - convex-c38) - basic_machine=c38-convex - os=-bsd - ;; - cray | ymp) - basic_machine=ymp-cray - os=-unicos - ;; - cray2) - basic_machine=cray2-cray - os=-unicos - ;; - [ctj]90-cray) - basic_machine=c90-cray - os=-unicos - ;; - crds | unos) - basic_machine=m68k-crds - ;; - da30 | da30-*) - basic_machine=m68k-da30 - ;; - decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) - basic_machine=mips-dec - ;; - delta | 3300 | motorola-3300 | motorola-delta \ - | 3300-motorola | delta-motorola) - basic_machine=m68k-motorola - ;; - delta88) - basic_machine=m88k-motorola - os=-sysv3 - ;; - dpx20 | dpx20-*) - basic_machine=rs6000-bull - os=-bosx - ;; - dpx2* | dpx2*-bull) - basic_machine=m68k-bull - os=-sysv3 - ;; - ebmon29k) - basic_machine=a29k-amd - os=-ebmon - ;; - elxsi) - basic_machine=elxsi-elxsi - os=-bsd - ;; - encore | umax | mmax) - basic_machine=ns32k-encore - ;; - es1800 | OSE68k | ose68k | ose | OSE) - basic_machine=m68k-ericsson - os=-ose - ;; - fx2800) - basic_machine=i860-alliant - ;; - genix) - basic_machine=ns32k-ns - ;; - gmicro) - basic_machine=tron-gmicro - os=-sysv - ;; - h3050r* | hiux*) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - h8300hms) - basic_machine=h8300-hitachi - os=-hms - ;; - h8300xray) - basic_machine=h8300-hitachi - os=-xray - ;; - h8500hms) - basic_machine=h8500-hitachi - os=-hms - ;; - harris) - basic_machine=m88k-harris - os=-sysv3 - ;; - hp300-*) - basic_machine=m68k-hp - ;; - hp300bsd) - basic_machine=m68k-hp - os=-bsd - ;; - hp300hpux) - basic_machine=m68k-hp - os=-hpux - ;; - hp3k9[0-9][0-9] | hp9[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hp9k2[0-9][0-9] | hp9k31[0-9]) - basic_machine=m68000-hp - ;; - hp9k3[2-9][0-9]) - basic_machine=m68k-hp - ;; - hp9k6[0-9][0-9] | hp6[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hp9k7[0-79][0-9] | hp7[0-79][0-9]) - basic_machine=hppa1.1-hp - ;; - hp9k78[0-9] | hp78[0-9]) - # FIXME: really hppa2.0-hp - basic_machine=hppa1.1-hp - ;; - hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) - # FIXME: really hppa2.0-hp - basic_machine=hppa1.1-hp - ;; - hp9k8[0-9][13679] | hp8[0-9][13679]) - basic_machine=hppa1.1-hp - ;; - hp9k8[0-9][0-9] | hp8[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hppa-next) - os=-nextstep3 - ;; - hppaosf) - basic_machine=hppa1.1-hp - os=-osf - ;; - hppro) - basic_machine=hppa1.1-hp - os=-proelf - ;; - i370-ibm* | ibm*) - basic_machine=i370-ibm - ;; -# I'm not sure what "Sysv32" means. Should this be sysv3.2? - i[34567]86v32) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv32 - ;; - i[34567]86v4*) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv4 - ;; - i[34567]86v) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv - ;; - i[34567]86sol2) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-solaris2 - ;; - i386mach) - basic_machine=i386-mach - os=-mach - ;; - i386-vsta | vsta) - basic_machine=i386-unknown - os=-vsta - ;; - i386-go32 | go32) - basic_machine=i386-unknown - os=-go32 - ;; - i386-mingw32 | mingw32) - basic_machine=i386-unknown - os=-mingw32 - ;; - i386-qnx | qnx) - basic_machine=i386-qnx - ;; - iris | iris4d) - basic_machine=mips-sgi - case $os in - -irix*) - ;; - *) - os=-irix4 - ;; - esac - ;; - isi68 | isi) - basic_machine=m68k-isi - os=-sysv - ;; - m88k-omron*) - basic_machine=m88k-omron - ;; - magnum | m3230) - basic_machine=mips-mips - os=-sysv - ;; - merlin) - basic_machine=ns32k-utek - os=-sysv - ;; - miniframe) - basic_machine=m68000-convergent - ;; - *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) - basic_machine=m68k-atari - os=-mint - ;; - mipsel*-linux*) - basic_machine=mipsel-unknown - os=-linux-gnu - ;; - mips*-linux*) - basic_machine=mips-unknown - os=-linux-gnu - ;; - mips3*-*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` - ;; - mips3*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown - ;; - monitor) - basic_machine=m68k-rom68k - os=-coff - ;; - msdos) - basic_machine=i386-unknown - os=-msdos - ;; - mvs) - basic_machine=i370-ibm - os=-mvs - ;; - ncr3000) - basic_machine=i486-ncr - os=-sysv4 - ;; - netbsd386) - basic_machine=i386-unknown - os=-netbsd - ;; - netwinder) - basic_machine=armv4l-rebel - os=-linux - ;; - news | news700 | news800 | news900) - basic_machine=m68k-sony - os=-newsos - ;; - news1000) - basic_machine=m68030-sony - os=-newsos - ;; - news-3600 | risc-news) - basic_machine=mips-sony - os=-newsos - ;; - necv70) - basic_machine=v70-nec - os=-sysv - ;; - next | m*-next ) - basic_machine=m68k-next - case $os in - -nextstep* ) - ;; - -ns2*) - os=-nextstep2 - ;; - *) - os=-nextstep3 - ;; - esac - ;; - nh3000) - basic_machine=m68k-harris - os=-cxux - ;; - nh[45]000) - basic_machine=m88k-harris - os=-cxux - ;; - nindy960) - basic_machine=i960-intel - os=-nindy - ;; - mon960) - basic_machine=i960-intel - os=-mon960 - ;; - np1) - basic_machine=np1-gould - ;; - op50n-* | op60c-*) - basic_machine=hppa1.1-oki - os=-proelf - ;; - OSE68000 | ose68000) - basic_machine=m68000-ericsson - os=-ose - ;; - os68k) - basic_machine=m68k-none - os=-os68k - ;; - pa-hitachi) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - paragon) - basic_machine=i860-intel - os=-osf - ;; - pbd) - basic_machine=sparc-tti - ;; - pbb) - basic_machine=m68k-tti - ;; - pc532 | pc532-*) - basic_machine=ns32k-pc532 - ;; - pentium | p5 | k5 | k6 | nexen) - basic_machine=i586-pc - ;; - pentiumpro | p6 | 6x86) - basic_machine=i686-pc - ;; - pentiumii | pentium2) - basic_machine=i786-pc - ;; - pentium-* | p5-* | k5-* | k6-* | nexen-*) - basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumpro-* | p6-* | 6x86-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumii-* | pentium2-*) - basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pn) - basic_machine=pn-gould - ;; - power) basic_machine=rs6000-ibm - ;; - ppc) basic_machine=powerpc-unknown - ;; - ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppcle | powerpclittle | ppc-le | powerpc-little) - basic_machine=powerpcle-unknown - ;; - ppcle-* | powerpclittle-*) - basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ps2) - basic_machine=i386-ibm - ;; - rom68k) - basic_machine=m68k-rom68k - os=-coff - ;; - rm[46]00) - basic_machine=mips-siemens - ;; - rtpc | rtpc-*) - basic_machine=romp-ibm - ;; - sa29200) - basic_machine=a29k-amd - os=-udi - ;; - sequent) - basic_machine=i386-sequent - ;; - sh) - basic_machine=sh-hitachi - os=-hms - ;; - sparclite-wrs) - basic_machine=sparclite-wrs - os=-vxworks - ;; - sps7) - basic_machine=m68k-bull - os=-sysv2 - ;; - spur) - basic_machine=spur-unknown - ;; - st2000) - basic_machine=m68k-tandem - ;; - stratus) - basic_machine=i860-stratus - os=-sysv4 - ;; - sun2) - basic_machine=m68000-sun - ;; - sun2os3) - basic_machine=m68000-sun - os=-sunos3 - ;; - sun2os4) - basic_machine=m68000-sun - os=-sunos4 - ;; - sun3os3) - basic_machine=m68k-sun - os=-sunos3 - ;; - sun3os4) - basic_machine=m68k-sun - os=-sunos4 - ;; - sun4os3) - basic_machine=sparc-sun - os=-sunos3 - ;; - sun4os4) - basic_machine=sparc-sun - os=-sunos4 - ;; - sun4sol2) - basic_machine=sparc-sun - os=-solaris2 - ;; - sun3 | sun3-*) - basic_machine=m68k-sun - ;; - sun4) - basic_machine=sparc-sun - ;; - sun386 | sun386i | roadrunner) - basic_machine=i386-sun - ;; - symmetry) - basic_machine=i386-sequent - os=-dynix - ;; - t3e) - basic_machine=t3e-cray - os=-unicos - ;; - tx39) - basic_machine=mipstx39-unknown - ;; - tx39el) - basic_machine=mipstx39el-unknown - ;; - tower | tower-32) - basic_machine=m68k-ncr - ;; - udi29k) - basic_machine=a29k-amd - os=-udi - ;; - ultra3) - basic_machine=a29k-nyu - os=-sym1 - ;; - v810 | necv810) - basic_machine=v810-nec - os=-none - ;; - vaxv) - basic_machine=vax-dec - os=-sysv - ;; - vms) - basic_machine=vax-dec - os=-vms - ;; - vpp*|vx|vx-*) - basic_machine=f301-fujitsu - ;; - vxworks960) - basic_machine=i960-wrs - os=-vxworks - ;; - vxworks68) - basic_machine=m68k-wrs - os=-vxworks - ;; - vxworks29k) - basic_machine=a29k-wrs - os=-vxworks - ;; - w65*) - basic_machine=w65-wdc - os=-none - ;; - w89k-*) - basic_machine=hppa1.1-winbond - os=-proelf - ;; - xmp) - basic_machine=xmp-cray - os=-unicos - ;; - xps | xps100) - basic_machine=xps100-honeywell - ;; - z8k-*-coff) - basic_machine=z8k-unknown - os=-sim - ;; - none) - basic_machine=none-none - os=-none - ;; - -# Here we handle the default manufacturer of certain CPU types. It is in -# some cases the only manufacturer, in others, it is the most popular. - w89k) - basic_machine=hppa1.1-winbond - ;; - op50n) - basic_machine=hppa1.1-oki - ;; - op60c) - basic_machine=hppa1.1-oki - ;; - mips) - if [ x$os = x-linux-gnu ]; then - basic_machine=mips-unknown - else - basic_machine=mips-mips - fi - ;; - romp) - basic_machine=romp-ibm - ;; - rs6000) - basic_machine=rs6000-ibm - ;; - vax) - basic_machine=vax-dec - ;; - pdp11) - basic_machine=pdp11-dec - ;; - we32k) - basic_machine=we32k-att - ;; - sparc | sparcv9) - basic_machine=sparc-sun - ;; - cydra) - basic_machine=cydra-cydrome - ;; - orion) - basic_machine=orion-highlevel - ;; - orion105) - basic_machine=clipper-highlevel - ;; - mac | mpw | mac-mpw) - basic_machine=m68k-apple - ;; - pmac | pmac-mpw) - basic_machine=powerpc-apple - ;; - c4x*) - basic_machine=c4x-none - os=-coff - ;; - *) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; -esac - -# Here we canonicalize certain aliases for manufacturers. -case $basic_machine in - *-digital*) - basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` - ;; - *-commodore*) - basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` - ;; - *) - ;; -esac - -# Decode manufacturer-specific aliases for certain operating systems. - -if [ x"$os" != x"" ] -then -case $os in - # First match some system type aliases - # that might get confused with valid system types. - # -solaris* is a basic system type, with this one exception. - -solaris1 | -solaris1.*) - os=`echo $os | sed -e 's|solaris1|sunos4|'` - ;; - -solaris) - os=-solaris2 - ;; - -svr4*) - os=-sysv4 - ;; - -unixware*) - os=-sysv4.2uw - ;; - -gnu/linux*) - os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` - ;; - # First accept the basic system types. - # The portable systems comes first. - # Each alternative MUST END IN A *, to match a version number. - # -sysv* is not here because it comes later, after sysvr4. - -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ - | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ - | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ - | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* \ - | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ - | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ - | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \ - | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ - | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ - | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ - | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \ - | -interix* | -uwin* | -rhapsody* | -opened* | -openstep* | -oskit*) - # Remember, each alternative MUST END IN *, to match a version number. - ;; - -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ - | -windows* | -osx | -abug | -netware* | -os9* | -beos* \ - | -macos* | -mpw* | -magic* | -mon960* | -lnews*) - ;; - -mac*) - os=`echo $os | sed -e 's|mac|macos|'` - ;; - -linux*) - os=`echo $os | sed -e 's|linux|linux-gnu|'` - ;; - -sunos5*) - os=`echo $os | sed -e 's|sunos5|solaris2|'` - ;; - -sunos6*) - os=`echo $os | sed -e 's|sunos6|solaris3|'` - ;; - -opened*) - os=-openedition - ;; - -osfrose*) - os=-osfrose - ;; - -osf*) - os=-osf - ;; - -utek*) - os=-bsd - ;; - -dynix*) - os=-bsd - ;; - -acis*) - os=-aos - ;; - -386bsd) - os=-bsd - ;; - -ctix* | -uts*) - os=-sysv - ;; - -ns2 ) - os=-nextstep2 - ;; - # Preserve the version number of sinix5. - -sinix5.*) - os=`echo $os | sed -e 's|sinix|sysv|'` - ;; - -sinix*) - os=-sysv4 - ;; - -triton*) - os=-sysv3 - ;; - -oss*) - os=-sysv3 - ;; - -qnx) - os=-qnx4 - ;; - -svr4) - os=-sysv4 - ;; - -svr3) - os=-sysv3 - ;; - -sysvr4) - os=-sysv4 - ;; - # This must come after -sysvr4. - -sysv*) - ;; - -ose*) - os=-ose - ;; - -es1800*) - os=-ose - ;; - -xenix) - os=-xenix - ;; - -*mint | -*MiNT) - os=-mint - ;; - -none) - ;; - *) - # Get rid of the `-' at the beginning of $os. - os=`echo $os | sed 's/[^-]*-//'` - echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 - exit 1 - ;; -esac -else - -# Here we handle the default operating systems that come with various machines. -# The value should be what the vendor currently ships out the door with their -# machine or put another way, the most popular os provided with the machine. - -# Note that if you're going to try to match "-MANUFACTURER" here (say, -# "-sun"), then you have to tell the case statement up towards the top -# that MANUFACTURER isn't an operating system. Otherwise, code above -# will signal an error saying that MANUFACTURER isn't an operating -# system, and we'll never get to this point. - -case $basic_machine in - *-acorn) - os=-riscix1.2 - ;; - arm*-rebel) - os=-linux - ;; - arm*-semi) - os=-aout - ;; - pdp11-*) - os=-none - ;; - *-dec | vax-*) - os=-ultrix4.2 - ;; - m68*-apollo) - os=-domain - ;; - i386-sun) - os=-sunos4.0.2 - ;; - m68000-sun) - os=-sunos3 - # This also exists in the configure program, but was not the - # default. - # os=-sunos4 - ;; - m68*-cisco) - os=-aout - ;; - mips*-cisco) - os=-elf - ;; - mips*-*) - os=-elf - ;; - *-tti) # must be before sparc entry or we get the wrong os. - os=-sysv3 - ;; - sparc-* | *-sun) - os=-sunos4.1.1 - ;; - *-be) - os=-beos - ;; - *-ibm) - os=-aix - ;; - *-wec) - os=-proelf - ;; - *-winbond) - os=-proelf - ;; - *-oki) - os=-proelf - ;; - *-hp) - os=-hpux - ;; - *-hitachi) - os=-hiux - ;; - i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) - os=-sysv - ;; - *-cbm) - os=-amigaos - ;; - *-dg) - os=-dgux - ;; - *-dolphin) - os=-sysv3 - ;; - m68k-ccur) - os=-rtu - ;; - m88k-omron*) - os=-luna - ;; - *-next ) - os=-nextstep - ;; - *-sequent) - os=-ptx - ;; - *-crds) - os=-unos - ;; - *-ns) - os=-genix - ;; - i370-*) - os=-mvs - ;; - *-next) - os=-nextstep3 - ;; - *-gould) - os=-sysv - ;; - *-highlevel) - os=-bsd - ;; - *-encore) - os=-bsd - ;; - *-sgi) - os=-irix - ;; - *-siemens) - os=-sysv4 - ;; - *-masscomp) - os=-rtu - ;; - f301-fujitsu) - os=-uxpv - ;; - *-rom68k) - os=-coff - ;; - *-*bug) - os=-coff - ;; - *-apple) - os=-macos - ;; - *-atari*) - os=-mint - ;; - *) - os=-none - ;; -esac -fi - -# Here we handle the case where we know the os, and the CPU type, but not the -# manufacturer. We pick the logical manufacturer. -vendor=unknown -case $basic_machine in - *-unknown) - case $os in - -riscix*) - vendor=acorn - ;; - -sunos*) - vendor=sun - ;; - -aix*) - vendor=ibm - ;; - -beos*) - vendor=be - ;; - -hpux*) - vendor=hp - ;; - -mpeix*) - vendor=hp - ;; - -hiux*) - vendor=hitachi - ;; - -unos*) - vendor=crds - ;; - -dgux*) - vendor=dg - ;; - -luna*) - vendor=omron - ;; - -genix*) - vendor=ns - ;; - -mvs* | -opened*) - vendor=ibm - ;; - -ptx*) - vendor=sequent - ;; - -vxsim* | -vxworks*) - vendor=wrs - ;; - -aux*) - vendor=apple - ;; - -hms*) - vendor=hitachi - ;; - -mpw* | -macos*) - vendor=apple - ;; - -*mint | -*MiNT) - vendor=atari - ;; - esac - basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` - ;; -esac - -echo $basic_machine$os diff --git a/pcre/configure b/pcre/configure deleted file mode 100644 index fbd3831e..00000000 --- a/pcre/configure +++ /dev/null @@ -1,1568 +0,0 @@ -#! /bin/sh - -# Guess values for system-dependent variables and create Makefiles. -# Generated automatically using autoconf version 2.13 -# Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc. -# -# This configure script is free software; the Free Software Foundation -# gives unlimited permission to copy, distribute and modify it. - -# Defaults: -ac_help= -ac_default_prefix=/usr/local -# Any additions from configure.in: -ac_help="$ac_help - --disable-shared build PCRE as a static library" -ac_help="$ac_help - --enable-utf8 enable UTF8 support (incomplete)" - -# Initialize some variables set by options. -# The variables have the same names as the options, with -# dashes changed to underlines. -build=NONE -cache_file=./config.cache -exec_prefix=NONE -host=NONE -no_create= -nonopt=NONE -no_recursion= -prefix=NONE -program_prefix=NONE -program_suffix=NONE -program_transform_name=s,x,x, -silent= -site= -srcdir= -target=NONE -verbose= -x_includes=NONE -x_libraries=NONE -bindir='${exec_prefix}/bin' -sbindir='${exec_prefix}/sbin' -libexecdir='${exec_prefix}/libexec' -datadir='${prefix}/share' -sysconfdir='${prefix}/etc' -sharedstatedir='${prefix}/com' -localstatedir='${prefix}/var' -libdir='${exec_prefix}/lib' -includedir='${prefix}/include' -oldincludedir='/usr/include' -infodir='${prefix}/info' -mandir='${prefix}/man' - -# Initialize some other variables. -subdirs= -MFLAGS= MAKEFLAGS= -SHELL=${CONFIG_SHELL-/bin/sh} -# Maximum number of lines to put in a shell here document. -ac_max_here_lines=12 - -ac_prev= -for ac_option -do - - # If the previous option needs an argument, assign it. - if test -n "$ac_prev"; then - eval "$ac_prev=\$ac_option" - ac_prev= - continue - fi - - case "$ac_option" in - -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) ac_optarg= ;; - esac - - # Accept the important Cygnus configure options, so we can diagnose typos. - - case "$ac_option" in - - -bindir | --bindir | --bindi | --bind | --bin | --bi) - ac_prev=bindir ;; - -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) - bindir="$ac_optarg" ;; - - -build | --build | --buil | --bui | --bu) - ac_prev=build ;; - -build=* | --build=* | --buil=* | --bui=* | --bu=*) - build="$ac_optarg" ;; - - -cache-file | --cache-file | --cache-fil | --cache-fi \ - | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) - ac_prev=cache_file ;; - -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ - | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) - cache_file="$ac_optarg" ;; - - -datadir | --datadir | --datadi | --datad | --data | --dat | --da) - ac_prev=datadir ;; - -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ - | --da=*) - datadir="$ac_optarg" ;; - - -disable-* | --disable-*) - ac_feature=`echo $ac_option|sed -e 's/-*disable-//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then - { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } - fi - ac_feature=`echo $ac_feature| sed 's/-/_/g'` - eval "enable_${ac_feature}=no" ;; - - -enable-* | --enable-*) - ac_feature=`echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then - { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } - fi - ac_feature=`echo $ac_feature| sed 's/-/_/g'` - case "$ac_option" in - *=*) ;; - *) ac_optarg=yes ;; - esac - eval "enable_${ac_feature}='$ac_optarg'" ;; - - -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ - | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ - | --exec | --exe | --ex) - ac_prev=exec_prefix ;; - -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ - | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ - | --exec=* | --exe=* | --ex=*) - exec_prefix="$ac_optarg" ;; - - -gas | --gas | --ga | --g) - # Obsolete; use --with-gas. - with_gas=yes ;; - - -help | --help | --hel | --he) - # Omit some internal or obsolete options to make the list less imposing. - # The list generated by autoconf has been trimmed to remove many - # options that are totally irrelevant to PCRE (e.g. relating to X), - # or are not supported by its Makefile. - # The list generated by autoconf has been trimmed to remove many - # options that are totally irrelevant to PCRE (e.g. relating to X), - # or are not supported by its Makefile. - # The list generated by autoconf has been trimmed to remove many - # options that are totally irrelevant to PCRE (e.g. relating to X), - # or are not supported by its Makefile. - # This message is too long to be a string in the A/UX 3.1 sh. - cat << EOF -Usage: ./configure [options] -Options: [defaults in brackets after descriptions] -Configuration: - --cache-file=FILE cache test results in FILE - --help print this message - --no-create do not create output files - --quiet, --silent do not print \`checking...' messages - --version print the version of autoconf that created configure -Directory and file names: - --prefix=PREFIX install architecture-independent files in PREFIX - [$ac_default_prefix] - --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX - [same as prefix] - --bindir=DIR user executables in DIR [EPREFIX/bin] - --libdir=DIR object code libraries in DIR [EPREFIX/lib] - --includedir=DIR C header files in DIR [PREFIX/include] - --mandir=DIR man documentation in DIR [PREFIX/man] -EOF - cat << EOF -EOF - if test -n "$ac_help"; then - echo "--enable and --with options recognized:$ac_help" - fi - exit 0 ;; - - -host | --host | --hos | --ho) - ac_prev=host ;; - -host=* | --host=* | --hos=* | --ho=*) - host="$ac_optarg" ;; - - -includedir | --includedir | --includedi | --included | --include \ - | --includ | --inclu | --incl | --inc) - ac_prev=includedir ;; - -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ - | --includ=* | --inclu=* | --incl=* | --inc=*) - includedir="$ac_optarg" ;; - - -infodir | --infodir | --infodi | --infod | --info | --inf) - ac_prev=infodir ;; - -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) - infodir="$ac_optarg" ;; - - -libdir | --libdir | --libdi | --libd) - ac_prev=libdir ;; - -libdir=* | --libdir=* | --libdi=* | --libd=*) - libdir="$ac_optarg" ;; - - -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ - | --libexe | --libex | --libe) - ac_prev=libexecdir ;; - -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ - | --libexe=* | --libex=* | --libe=*) - libexecdir="$ac_optarg" ;; - - -localstatedir | --localstatedir | --localstatedi | --localstated \ - | --localstate | --localstat | --localsta | --localst \ - | --locals | --local | --loca | --loc | --lo) - ac_prev=localstatedir ;; - -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ - | --localstate=* | --localstat=* | --localsta=* | --localst=* \ - | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) - localstatedir="$ac_optarg" ;; - - -mandir | --mandir | --mandi | --mand | --man | --ma | --m) - ac_prev=mandir ;; - -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) - mandir="$ac_optarg" ;; - - -nfp | --nfp | --nf) - # Obsolete; use --without-fp. - with_fp=no ;; - - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c) - no_create=yes ;; - - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) - no_recursion=yes ;; - - -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ - | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ - | --oldin | --oldi | --old | --ol | --o) - ac_prev=oldincludedir ;; - -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ - | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ - | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) - oldincludedir="$ac_optarg" ;; - - -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) - ac_prev=prefix ;; - -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) - prefix="$ac_optarg" ;; - - -program-prefix | --program-prefix | --program-prefi | --program-pref \ - | --program-pre | --program-pr | --program-p) - ac_prev=program_prefix ;; - -program-prefix=* | --program-prefix=* | --program-prefi=* \ - | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) - program_prefix="$ac_optarg" ;; - - -program-suffix | --program-suffix | --program-suffi | --program-suff \ - | --program-suf | --program-su | --program-s) - ac_prev=program_suffix ;; - -program-suffix=* | --program-suffix=* | --program-suffi=* \ - | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) - program_suffix="$ac_optarg" ;; - - -program-transform-name | --program-transform-name \ - | --program-transform-nam | --program-transform-na \ - | --program-transform-n | --program-transform- \ - | --program-transform | --program-transfor \ - | --program-transfo | --program-transf \ - | --program-trans | --program-tran \ - | --progr-tra | --program-tr | --program-t) - ac_prev=program_transform_name ;; - -program-transform-name=* | --program-transform-name=* \ - | --program-transform-nam=* | --program-transform-na=* \ - | --program-transform-n=* | --program-transform-=* \ - | --program-transform=* | --program-transfor=* \ - | --program-transfo=* | --program-transf=* \ - | --program-trans=* | --program-tran=* \ - | --progr-tra=* | --program-tr=* | --program-t=*) - program_transform_name="$ac_optarg" ;; - - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil) - silent=yes ;; - - -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) - ac_prev=sbindir ;; - -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ - | --sbi=* | --sb=*) - sbindir="$ac_optarg" ;; - - -sharedstatedir | --sharedstatedir | --sharedstatedi \ - | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ - | --sharedst | --shareds | --shared | --share | --shar \ - | --sha | --sh) - ac_prev=sharedstatedir ;; - -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ - | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ - | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ - | --sha=* | --sh=*) - sharedstatedir="$ac_optarg" ;; - - -site | --site | --sit) - ac_prev=site ;; - -site=* | --site=* | --sit=*) - site="$ac_optarg" ;; - - -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) - ac_prev=srcdir ;; - -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) - srcdir="$ac_optarg" ;; - - -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ - | --syscon | --sysco | --sysc | --sys | --sy) - ac_prev=sysconfdir ;; - -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ - | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) - sysconfdir="$ac_optarg" ;; - - -target | --target | --targe | --targ | --tar | --ta | --t) - ac_prev=target ;; - -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) - target="$ac_optarg" ;; - - -v | -verbose | --verbose | --verbos | --verbo | --verb) - verbose=yes ;; - - -version | --version | --versio | --versi | --vers) - echo "configure generated by autoconf version 2.13" - exit 0 ;; - - -with-* | --with-*) - ac_package=`echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then - { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } - fi - ac_package=`echo $ac_package| sed 's/-/_/g'` - case "$ac_option" in - *=*) ;; - *) ac_optarg=yes ;; - esac - eval "with_${ac_package}='$ac_optarg'" ;; - - -without-* | --without-*) - ac_package=`echo $ac_option|sed -e 's/-*without-//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then - { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } - fi - ac_package=`echo $ac_package| sed 's/-/_/g'` - eval "with_${ac_package}=no" ;; - - --x) - # Obsolete; use --with-x. - with_x=yes ;; - - -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ - | --x-incl | --x-inc | --x-in | --x-i) - ac_prev=x_includes ;; - -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ - | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) - x_includes="$ac_optarg" ;; - - -x-libraries | --x-libraries | --x-librarie | --x-librari \ - | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) - ac_prev=x_libraries ;; - -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ - | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) - x_libraries="$ac_optarg" ;; - - -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; } - ;; - - *) - if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then - echo "configure: warning: $ac_option: invalid host type" 1>&2 - fi - if test "x$nonopt" != xNONE; then - { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } - fi - nonopt="$ac_option" - ;; - - esac -done - -if test -n "$ac_prev"; then - { echo "configure: error: missing argument to --`echo $ac_prev | sed 's/_/-/g'`" 1>&2; exit 1; } -fi - -trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 - -# File descriptor usage: -# 0 standard input -# 1 file creation -# 2 errors and warnings -# 3 some systems may open it to /dev/tty -# 4 used on the Kubota Titan -# 6 checking for... messages and results -# 5 compiler messages saved in config.log -if test "$silent" = yes; then - exec 6>/dev/null -else - exec 6>&1 -fi -exec 5>./config.log - -echo "\ -This file contains any messages produced by compilers while -running configure, to aid debugging if configure makes a mistake. -" 1>&5 - -# Strip out --no-create and --no-recursion so they do not pile up. -# Also quote any args containing shell metacharacters. -ac_configure_args= -for ac_arg -do - case "$ac_arg" in - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c) ;; - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;; - *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*) - ac_configure_args="$ac_configure_args '$ac_arg'" ;; - *) ac_configure_args="$ac_configure_args $ac_arg" ;; - esac -done - -# NLS nuisances. -# Only set these to C if already set. These must not be set unconditionally -# because not all systems understand e.g. LANG=C (notably SCO). -# Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'! -# Non-C LC_CTYPE values break the ctype check. -if test "${LANG+set}" = set; then LANG=C; export LANG; fi -if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi -if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi -if test "${LC_CTYPE+set}" = set; then LC_CTYPE=C; export LC_CTYPE; fi - -# confdefs.h avoids OS command line length limits that DEFS can exceed. -rm -rf conftest* confdefs.h -# AIX cpp loses on an empty file, so make sure it contains at least a newline. -echo > confdefs.h - -# A filename unique to this package, relative to the directory that -# configure is in, which we can look for to find out if srcdir is correct. -ac_unique_file=dftables.c - -# Find the source files, if location was not specified. -if test -z "$srcdir"; then - ac_srcdir_defaulted=yes - # Try the directory containing this script, then its parent. - ac_prog=$0 - ac_confdir=`echo $ac_prog|sed 's%/[^/][^/]*$%%'` - test "x$ac_confdir" = "x$ac_prog" && ac_confdir=. - srcdir=$ac_confdir - if test ! -r $srcdir/$ac_unique_file; then - srcdir=.. - fi -else - ac_srcdir_defaulted=no -fi -if test ! -r $srcdir/$ac_unique_file; then - if test "$ac_srcdir_defaulted" = yes; then - { echo "configure: error: can not find sources in $ac_confdir or .." 1>&2; exit 1; } - else - { echo "configure: error: can not find sources in $srcdir" 1>&2; exit 1; } - fi -fi -srcdir=`echo "${srcdir}" | sed 's%\([^/]\)/*$%\1%'` - -# Prefer explicitly selected file to automatically selected ones. -if test -z "$CONFIG_SITE"; then - if test "x$prefix" != xNONE; then - CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" - else - CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" - fi -fi -for ac_site_file in $CONFIG_SITE; do - if test -r "$ac_site_file"; then - echo "loading site script $ac_site_file" - . "$ac_site_file" - fi -done - -if test -r "$cache_file"; then - echo "loading cache $cache_file" - . $cache_file -else - echo "creating cache $cache_file" - > $cache_file -fi - -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -ac_exeext= -ac_objext=o -if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then - # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. - if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then - ac_n= ac_c=' -' ac_t=' ' - else - ac_n=-n ac_c= ac_t= - fi -else - ac_n= ac_c='\c' ac_t= -fi - - - - - - -PCRE_MAJOR=3 -PCRE_MINOR=4 -PCRE_DATE=22-Aug-2000 -PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR} - - -PCRE_LIB_VERSION=0:1:0 -PCRE_POSIXLIB_VERSION=0:0:0 - - -# Extract the first word of "gcc", so it can be a program name with args. -set dummy gcc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:546: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_CC="gcc" - break - fi - done - IFS="$ac_save_ifs" -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -if test -z "$CC"; then - # Extract the first word of "cc", so it can be a program name with args. -set dummy cc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:576: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_prog_rejected=no - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - if test "$ac_dir/$ac_word" = "/usr/ucb/cc"; then - ac_prog_rejected=yes - continue - fi - ac_cv_prog_CC="cc" - break - fi - done - IFS="$ac_save_ifs" -if test $ac_prog_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $ac_cv_prog_CC - shift - if test $# -gt 0; then - # We chose a different compiler from the bogus one. - # However, it has the same basename, so the bogon will be chosen - # first if we set CC to just the basename; use the full file name. - shift - set dummy "$ac_dir/$ac_word" "$@" - shift - ac_cv_prog_CC="$@" - fi -fi -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - if test -z "$CC"; then - case "`uname -s`" in - *win32* | *WIN32*) - # Extract the first word of "cl", so it can be a program name with args. -set dummy cl; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:627: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_CC="cl" - break - fi - done - IFS="$ac_save_ifs" -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - ;; - esac - fi - test -z "$CC" && { echo "configure: error: no acceptable cc found in \$PATH" 1>&2; exit 1; } -fi - -echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6 -echo "configure:659: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 - -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -cat > conftest.$ac_ext << EOF - -#line 670 "configure" -#include "confdefs.h" - -main(){return(0);} -EOF -if { (eval echo configure:675: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - ac_cv_prog_cc_works=yes - # If we can't run a trivial program, we are probably using a cross compiler. - if (./conftest; exit) 2>/dev/null; then - ac_cv_prog_cc_cross=no - else - ac_cv_prog_cc_cross=yes - fi -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - ac_cv_prog_cc_works=no -fi -rm -fr conftest* -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -echo "$ac_t""$ac_cv_prog_cc_works" 1>&6 -if test $ac_cv_prog_cc_works = no; then - { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; } -fi -echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6 -echo "configure:701: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 -echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6 -cross_compiling=$ac_cv_prog_cc_cross - -echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 -echo "configure:706: checking whether we are using GNU C" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.c <&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then - ac_cv_prog_gcc=yes -else - ac_cv_prog_gcc=no -fi -fi - -echo "$ac_t""$ac_cv_prog_gcc" 1>&6 - -if test $ac_cv_prog_gcc = yes; then - GCC=yes -else - GCC= -fi - -ac_test_CFLAGS="${CFLAGS+set}" -ac_save_CFLAGS="$CFLAGS" -CFLAGS= -echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 -echo "configure:734: checking whether ${CC-cc} accepts -g" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - echo 'void f(){}' > conftest.c -if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then - ac_cv_prog_cc_g=yes -else - ac_cv_prog_cc_g=no -fi -rm -f conftest* - -fi - -echo "$ac_t""$ac_cv_prog_cc_g" 1>&6 -if test "$ac_test_CFLAGS" = set; then - CFLAGS="$ac_save_CFLAGS" -elif test $ac_cv_prog_cc_g = yes; then - if test "$GCC" = yes; then - CFLAGS="-g -O2" - else - CFLAGS="-g" - fi -else - if test "$GCC" = yes; then - CFLAGS="-O2" - else - CFLAGS= - fi -fi - -# Extract the first word of "ranlib", so it can be a program name with args. -set dummy ranlib; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:768: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$RANLIB"; then - ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_RANLIB="ranlib" - break - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_prog_RANLIB" && ac_cv_prog_RANLIB=":" -fi -fi -RANLIB="$ac_cv_prog_RANLIB" -if test -n "$RANLIB"; then - echo "$ac_t""$RANLIB" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - - -echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 -echo "configure:798: checking how to run the C preprocessor" >&5 -# On Suns, sometimes $CPP names a directory. -if test -n "$CPP" && test -d "$CPP"; then - CPP= -fi -if test -z "$CPP"; then -if eval "test \"`echo '$''{'ac_cv_prog_CPP'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - # This must be in double quotes, not single quotes, because CPP may get - # substituted into the Makefile and "${CC-cc}" will confuse make. - CPP="${CC-cc} -E" - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:819: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP="${CC-cc} -E -traditional-cpp" - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:836: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP="${CC-cc} -nologo -E" - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:853: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP=/lib/cpp -fi -rm -f conftest* -fi -rm -f conftest* -fi -rm -f conftest* - ac_cv_prog_CPP="$CPP" -fi - CPP="$ac_cv_prog_CPP" -else - ac_cv_prog_CPP="$CPP" -fi -echo "$ac_t""$CPP" 1>&6 - -echo $ac_n "checking for ANSI C header files""... $ac_c" 1>&6 -echo "configure:878: checking for ANSI C header files" >&5 -if eval "test \"`echo '$''{'ac_cv_header_stdc'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#include -#include -#include -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:891: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - ac_cv_header_stdc=yes -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -if test $ac_cv_header_stdc = yes; then - # SunOS 4.x string.h does not declare mem*, contrary to ANSI. -cat > conftest.$ac_ext < -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "memchr" >/dev/null 2>&1; then - : -else - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. -cat > conftest.$ac_ext < -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "free" >/dev/null 2>&1; then - : -else - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. -if test "$cross_compiling" = yes; then - : -else - cat > conftest.$ac_ext < -#define ISLOWER(c) ('a' <= (c) && (c) <= 'z') -#define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) -#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) -int main () { int i; for (i = 0; i < 256; i++) -if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2); -exit (0); } - -EOF -if { (eval echo configure:958: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - : -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_header_stdc=no -fi -rm -fr conftest* -fi - -fi -fi - -echo "$ac_t""$ac_cv_header_stdc" 1>&6 -if test $ac_cv_header_stdc = yes; then - cat >> confdefs.h <<\EOF -#define STDC_HEADERS 1 -EOF - -fi - -for ac_hdr in limits.h -do -ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:985: checking for $ac_hdr" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:995: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'` - cat >> confdefs.h <&6 -fi -done - - - -echo $ac_n "checking for working const""... $ac_c" 1>&6 -echo "configure:1024: checking for working const" >&5 -if eval "test \"`echo '$''{'ac_cv_c_const'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext <j = 5; -} -{ /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ - const int foo = 10; -} - -; return 0; } -EOF -if { (eval echo configure:1078: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_c_const=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_c_const=no -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_c_const" 1>&6 -if test $ac_cv_c_const = no; then - cat >> confdefs.h <<\EOF -#define const -EOF - -fi - -echo $ac_n "checking for size_t""... $ac_c" 1>&6 -echo "configure:1099: checking for size_t" >&5 -if eval "test \"`echo '$''{'ac_cv_type_size_t'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#if STDC_HEADERS -#include -#include -#endif -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "(^|[^a-zA-Z_0-9])size_t[^a-zA-Z_0-9]" >/dev/null 2>&1; then - rm -rf conftest* - ac_cv_type_size_t=yes -else - rm -rf conftest* - ac_cv_type_size_t=no -fi -rm -f conftest* - -fi -echo "$ac_t""$ac_cv_type_size_t" 1>&6 -if test $ac_cv_type_size_t = no; then - cat >> confdefs.h <<\EOF -#define size_t unsigned -EOF - -fi - - - -for ac_func in bcopy memmove strerror -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:1136: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:1164: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -fi -done - - - -LIBTOOL=./libtool -LIBSUFFIX=la -# Check whether --enable-shared or --disable-shared was given. -if test "${enable_shared+set}" = set; then - enableval="$enable_shared" - if test "$enableval" = "no"; then - LIBTOOL= - LIBSUFFIX=a -fi - -fi - - - -# Check whether --enable-utf8 or --disable-utf8 was given. -if test "${enable_utf8+set}" = set; then - enableval="$enable_utf8" - if test "$enableval" = "yes"; then - UTF8=-DSUPPORT_UTF8 -fi - -fi - - - - - - - - - - - - - - - -trap '' 1 2 15 -cat > confcache <<\EOF -# This file is a shell script that caches the results of configure -# tests run on this system so they can be shared between configure -# scripts and configure runs. It is not useful on other systems. -# If it contains results you don't want to keep, you may remove or edit it. -# -# By default, configure uses ./config.cache as the cache file, -# creating it if it does not exist already. You can give configure -# the --cache-file=FILE option to use a different cache file; that is -# what configure does when it calls configure scripts in -# subdirectories, so they share the cache. -# Giving --cache-file=/dev/null disables caching, for debugging configure. -# config.status only pays attention to the cache file if you give it the -# --recheck option to rerun configure. -# -EOF -# The following way of writing the cache mishandles newlines in values, -# but we know of no workaround that is simple, portable, and efficient. -# So, don't put newlines in cache variables' values. -# Ultrix sh set writes to stderr and can't be redirected directly, -# and sets the high bit in the cache file unless we assign to the vars. -(set) 2>&1 | - case `(ac_space=' '; set | grep ac_space) 2>&1` in - *ac_space=\ *) - # `set' does not quote correctly, so add quotes (double-quote substitution - # turns \\\\ into \\, and sed turns \\ into \). - sed -n \ - -e "s/'/'\\\\''/g" \ - -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p" - ;; - *) - # `set' quotes correctly as required by POSIX, so do not add quotes. - sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p' - ;; - esac >> confcache -if cmp -s $cache_file confcache; then - : -else - if test -w $cache_file; then - echo "updating cache $cache_file" - cat confcache > $cache_file - else - echo "not updating unwritable cache $cache_file" - fi -fi -rm -f confcache - -trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 - -test "x$prefix" = xNONE && prefix=$ac_default_prefix -# Let make expand exec_prefix. -test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' - -# Any assignment to VPATH causes Sun make to only execute -# the first set of double-colon rules, so remove it if not needed. -# If there is a colon in the path, we need to keep it. -if test "x$srcdir" = x.; then - ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d' -fi - -trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15 - -DEFS=-DHAVE_CONFIG_H - -# Without the "./", some shells look in PATH for config.status. -: ${CONFIG_STATUS=./config.status} - -echo creating $CONFIG_STATUS -rm -f $CONFIG_STATUS -cat > $CONFIG_STATUS </dev/null | sed 1q`: -# -# $0 $ac_configure_args -# -# Compiler output produced by configure, useful for debugging -# configure, is in ./config.log if it exists. - -ac_cs_usage="Usage: $CONFIG_STATUS [--recheck] [--version] [--help]" -for ac_option -do - case "\$ac_option" in - -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) - echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion" - exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;; - -version | --version | --versio | --versi | --vers | --ver | --ve | --v) - echo "$CONFIG_STATUS generated by autoconf version 2.13" - exit 0 ;; - -help | --help | --hel | --he | --h) - echo "\$ac_cs_usage"; exit 0 ;; - *) echo "\$ac_cs_usage"; exit 1 ;; - esac -done - -ac_given_srcdir=$srcdir - -trap 'rm -fr `echo "Makefile pcre.h:pcre.in pcre-config:pcre-config.in RunTest:RunTest.in config.h:config.in" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15 -EOF -cat >> $CONFIG_STATUS < conftest.subs <<\\CEOF -$ac_vpsub -$extrasub -s%@SHELL@%$SHELL%g -s%@CFLAGS@%$CFLAGS%g -s%@CPPFLAGS@%$CPPFLAGS%g -s%@CXXFLAGS@%$CXXFLAGS%g -s%@FFLAGS@%$FFLAGS%g -s%@DEFS@%$DEFS%g -s%@LDFLAGS@%$LDFLAGS%g -s%@LIBS@%$LIBS%g -s%@exec_prefix@%$exec_prefix%g -s%@prefix@%$prefix%g -s%@program_transform_name@%$program_transform_name%g -s%@bindir@%$bindir%g -s%@sbindir@%$sbindir%g -s%@libexecdir@%$libexecdir%g -s%@datadir@%$datadir%g -s%@sysconfdir@%$sysconfdir%g -s%@sharedstatedir@%$sharedstatedir%g -s%@localstatedir@%$localstatedir%g -s%@libdir@%$libdir%g -s%@includedir@%$includedir%g -s%@oldincludedir@%$oldincludedir%g -s%@infodir@%$infodir%g -s%@mandir@%$mandir%g -s%@CC@%$CC%g -s%@RANLIB@%$RANLIB%g -s%@CPP@%$CPP%g -s%@HAVE_MEMMOVE@%$HAVE_MEMMOVE%g -s%@HAVE_STRERROR@%$HAVE_STRERROR%g -s%@LIBTOOL@%$LIBTOOL%g -s%@LIBSUFFIX@%$LIBSUFFIX%g -s%@UTF8@%$UTF8%g -s%@PCRE_MAJOR@%$PCRE_MAJOR%g -s%@PCRE_MINOR@%$PCRE_MINOR%g -s%@PCRE_DATE@%$PCRE_DATE%g -s%@PCRE_VERSION@%$PCRE_VERSION%g -s%@PCRE_LIB_VERSION@%$PCRE_LIB_VERSION%g -s%@PCRE_POSIXLIB_VERSION@%$PCRE_POSIXLIB_VERSION%g - -CEOF -EOF - -cat >> $CONFIG_STATUS <<\EOF - -# Split the substitutions into bite-sized pieces for seds with -# small command number limits, like on Digital OSF/1 and HP-UX. -ac_max_sed_cmds=90 # Maximum number of lines to put in a sed script. -ac_file=1 # Number of current file. -ac_beg=1 # First line for current file. -ac_end=$ac_max_sed_cmds # Line after last line for current file. -ac_more_lines=: -ac_sed_cmds="" -while $ac_more_lines; do - if test $ac_beg -gt 1; then - sed "1,${ac_beg}d; ${ac_end}q" conftest.subs > conftest.s$ac_file - else - sed "${ac_end}q" conftest.subs > conftest.s$ac_file - fi - if test ! -s conftest.s$ac_file; then - ac_more_lines=false - rm -f conftest.s$ac_file - else - if test -z "$ac_sed_cmds"; then - ac_sed_cmds="sed -f conftest.s$ac_file" - else - ac_sed_cmds="$ac_sed_cmds | sed -f conftest.s$ac_file" - fi - ac_file=`expr $ac_file + 1` - ac_beg=$ac_end - ac_end=`expr $ac_end + $ac_max_sed_cmds` - fi -done -if test -z "$ac_sed_cmds"; then - ac_sed_cmds=cat -fi -EOF - -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF -for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then - # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". - case "$ac_file" in - *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` - ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; - *) ac_file_in="${ac_file}.in" ;; - esac - - # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories. - - # Remove last slash and all that follows it. Not all systems have dirname. - ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` - if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then - # The file is in a subdirectory. - test ! -d "$ac_dir" && mkdir "$ac_dir" - ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`" - # A "../" for each directory in $ac_dir_suffix. - ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'` - else - ac_dir_suffix= ac_dots= - fi - - case "$ac_given_srcdir" in - .) srcdir=. - if test -z "$ac_dots"; then top_srcdir=. - else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;; - /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;; - *) # Relative path. - srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix" - top_srcdir="$ac_dots$ac_given_srcdir" ;; - esac - - - echo creating "$ac_file" - rm -f "$ac_file" - configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure." - case "$ac_file" in - *Makefile*) ac_comsub="1i\\ -# $configure_input" ;; - *) ac_comsub= ;; - esac - - ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` - sed -e "$ac_comsub -s%@configure_input@%$configure_input%g -s%@srcdir@%$srcdir%g -s%@top_srcdir@%$top_srcdir%g -" $ac_file_inputs | (eval "$ac_sed_cmds") > $ac_file -fi; done -rm -f conftest.s* - -# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where -# NAME is the cpp macro being defined and VALUE is the value it is being given. -# -# ac_d sets the value in "#define NAME VALUE" lines. -ac_dA='s%^\([ ]*\)#\([ ]*define[ ][ ]*\)' -ac_dB='\([ ][ ]*\)[^ ]*%\1#\2' -ac_dC='\3' -ac_dD='%g' -# ac_u turns "#undef NAME" with trailing blanks into "#define NAME VALUE". -ac_uA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' -ac_uB='\([ ]\)%\1#\2define\3' -ac_uC=' ' -ac_uD='\4%g' -# ac_e turns "#undef NAME" without trailing blanks into "#define NAME VALUE". -ac_eA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' -ac_eB='$%\1#\2define\3' -ac_eC=' ' -ac_eD='%g' - -if test "${CONFIG_HEADERS+set}" != set; then -EOF -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF -fi -for ac_file in .. $CONFIG_HEADERS; do if test "x$ac_file" != x..; then - # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". - case "$ac_file" in - *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` - ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; - *) ac_file_in="${ac_file}.in" ;; - esac - - echo creating $ac_file - - rm -f conftest.frag conftest.in conftest.out - ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` - cat $ac_file_inputs > conftest.in - -EOF - -# Transform confdefs.h into a sed script conftest.vals that substitutes -# the proper values into config.h.in to produce config.h. And first: -# Protect against being on the right side of a sed subst in config.status. -# Protect against being in an unquoted here document in config.status. -rm -f conftest.vals -cat > conftest.hdr <<\EOF -s/[\\&%]/\\&/g -s%[\\$`]%\\&%g -s%#define \([A-Za-z_][A-Za-z0-9_]*\) *\(.*\)%${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD}%gp -s%ac_d%ac_u%gp -s%ac_u%ac_e%gp -EOF -sed -n -f conftest.hdr confdefs.h > conftest.vals -rm -f conftest.hdr - -# This sed command replaces #undef with comments. This is necessary, for -# example, in the case of _POSIX_SOURCE, which is predefined and required -# on some systems where configure will not decide to define it. -cat >> conftest.vals <<\EOF -s%^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*%/* & */% -EOF - -# Break up conftest.vals because some shells have a limit on -# the size of here documents, and old seds have small limits too. - -rm -f conftest.tail -while : -do - ac_lines=`grep -c . conftest.vals` - # grep -c gives empty output for an empty file on some AIX systems. - if test -z "$ac_lines" || test "$ac_lines" -eq 0; then break; fi - # Write a limited-size here document to conftest.frag. - echo ' cat > conftest.frag <> $CONFIG_STATUS - sed ${ac_max_here_lines}q conftest.vals >> $CONFIG_STATUS - echo 'CEOF - sed -f conftest.frag conftest.in > conftest.out - rm -f conftest.in - mv conftest.out conftest.in -' >> $CONFIG_STATUS - sed 1,${ac_max_here_lines}d conftest.vals > conftest.tail - rm -f conftest.vals - mv conftest.tail conftest.vals -done -rm -f conftest.vals - -cat >> $CONFIG_STATUS <<\EOF - rm -f conftest.frag conftest.h - echo "/* $ac_file. Generated automatically by configure. */" > conftest.h - cat conftest.in >> conftest.h - rm -f conftest.in - if cmp -s $ac_file conftest.h 2>/dev/null; then - echo "$ac_file is unchanged" - rm -f conftest.h - else - # Remove last slash and all that follows it. Not all systems have dirname. - ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` - if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then - # The file is in a subdirectory. - test ! -d "$ac_dir" && mkdir "$ac_dir" - fi - rm -f $ac_file - mv conftest.h $ac_file - fi -fi; done - -EOF -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF -chmod a+x RunTest pcre-config -exit 0 -EOF -chmod +x $CONFIG_STATUS -rm -fr confdefs* $ac_clean_files -test "$no_create" = yes || ${CONFIG_SHELL-/bin/sh} $CONFIG_STATUS || exit 1 - diff --git a/pcre/configure.in b/pcre/configure.in deleted file mode 100644 index c98387d2..00000000 --- a/pcre/configure.in +++ /dev/null @@ -1,85 +0,0 @@ -dnl Process this file with autoconf to produce a configure script. - -dnl This is required at the start; the name is the name of a file -dnl it should be seeing, to verify it is in the same directory. - -AC_INIT(dftables.c) - -dnl Arrange to build config.h from config.in. Note that pcre.h is -dnl built differently, as it is just a "substitution" file. -dnl Manual says this macro should come right after AC_INIT. -AC_CONFIG_HEADER(config.h:config.in) - -dnl Provide the current PCRE version information. Do not use numbers -dnl with leading zeros for the minor version, as they end up in a C -dnl macro, and may be treated as octal constants. Stick to single -dnl digits for minor numbers less than 10. There are unlikely to be -dnl that many releases anyway. - -PCRE_MAJOR=3 -PCRE_MINOR=4 -PCRE_DATE=22-Aug-2000 -PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR} - -dnl Provide versioning information for libtool shared libraries that -dnl are built by default on Unix systems. - -PCRE_LIB_VERSION=0:1:0 -PCRE_POSIXLIB_VERSION=0:0:0 - -dnl Checks for programs. - -AC_PROG_CC -AC_PROG_RANLIB - -dnl Checks for header files. - -AC_HEADER_STDC -AC_CHECK_HEADERS(limits.h) - -dnl Checks for typedefs, structures, and compiler characteristics. - -AC_C_CONST -AC_TYPE_SIZE_T - -dnl Checks for library functions. - -AC_CHECK_FUNCS(bcopy memmove strerror) - -dnl Handle --enable-shared-libraries - -LIBTOOL=./libtool -LIBSUFFIX=la -AC_ARG_ENABLE(shared, -[ --disable-shared build PCRE as a static library], -if test "$enableval" = "no"; then - LIBTOOL= - LIBSUFFIX=a -fi -) - -dnl Handle --enable-utf8 - -AC_ARG_ENABLE(utf8, -[ --enable-utf8 enable UTF8 support (incomplete)], -if test "$enableval" = "yes"; then - UTF8=-DSUPPORT_UTF8 -fi -) - -dnl "Export" these variables - -AC_SUBST(HAVE_MEMMOVE) -AC_SUBST(HAVE_STRERROR) -AC_SUBST(LIBTOOL) -AC_SUBST(LIBSUFFIX) -AC_SUBST(UTF8) -AC_SUBST(PCRE_MAJOR) -AC_SUBST(PCRE_MINOR) -AC_SUBST(PCRE_DATE) -AC_SUBST(PCRE_VERSION) -AC_SUBST(PCRE_LIB_VERSION) -AC_SUBST(PCRE_POSIXLIB_VERSION) - -dnl This must be last; it determines what files are written -AC_OUTPUT(Makefile pcre.h:pcre.in pcre-config:pcre-config.in RunTest:RunTest.in,[chmod a+x RunTest pcre-config]) diff --git a/pcre/dftables.c b/pcre/dftables.c deleted file mode 100644 index d572dfd3..00000000 --- a/pcre/dftables.c +++ /dev/null @@ -1,148 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* -PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - -Written by: Philip Hazel - - Copyright (c) 1997-2000 University of Cambridge - ------------------------------------------------------------------------------ -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. ------------------------------------------------------------------------------ - -See the file Tech.Notes for some information on the internals. -*/ - - -/* This is a support program to generate the file chartables.c, containing -character tables of various kinds. They are built according to the default C -locale and used as the default tables by PCRE. Now that pcre_maketables is -a function visible to the outside world, we make use of its code from here in -order to be consistent. */ - -#include -#include -#include - -#include "internal.h" - -#define DFTABLES /* maketables.c notices this */ -#include "maketables.c" - - -int main(void) -{ -int i; -unsigned const char *tables = pcre_maketables(); - -printf( - "/*************************************************\n" - "* Perl-Compatible Regular Expressions *\n" - "*************************************************/\n\n" - "/* This file is automatically written by the dftables auxiliary \n" - "program. If you edit it by hand, you might like to edit the Makefile to \n" - "prevent its ever being regenerated.\n\n" - "This file is #included in the compilation of pcre.c to build the default\n" - "character tables which are used when no tables are passed to the compile\n" - "function. */\n\n" - "static unsigned char pcre_default_tables[] = {\n\n" - "/* This table is a lower casing table. */\n\n"); - -printf(" "); -for (i = 0; i < 256; i++) - { - if ((i & 7) == 0 && i != 0) printf("\n "); - printf("%3d", *tables++); - if (i != 255) printf(","); - } -printf(",\n\n"); - -printf("/* This table is a case flipping table. */\n\n"); - -printf(" "); -for (i = 0; i < 256; i++) - { - if ((i & 7) == 0 && i != 0) printf("\n "); - printf("%3d", *tables++); - if (i != 255) printf(","); - } -printf(",\n\n"); - -printf( - "/* This table contains bit maps for various character classes.\n" - "Each map is 32 bytes long and the bits run from the least\n" - "significant end of each byte. The classes that have their own\n" - "maps are: space, xdigit, digit, upper, lower, word, graph\n" - "print, punct, and cntrl. Other classes are built from combinations. */\n\n"); - -printf(" "); -for (i = 0; i < cbit_length; i++) - { - if ((i & 7) == 0 && i != 0) - { - if ((i & 31) == 0) printf("\n"); - printf("\n "); - } - printf("0x%02x", *tables++); - if (i != cbit_length - 1) printf(","); - } -printf(",\n\n"); - -printf( - "/* This table identifies various classes of character by individual bits:\n" - " 0x%02x white space character\n" - " 0x%02x letter\n" - " 0x%02x decimal digit\n" - " 0x%02x hexadecimal digit\n" - " 0x%02x alphanumeric or '_'\n" - " 0x%02x regular expression metacharacter or binary zero\n*/\n\n", - ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word, - ctype_meta); - -printf(" "); -for (i = 0; i < 256; i++) - { - if ((i & 7) == 0 && i != 0) - { - printf(" /* "); - if (isprint(i-8)) printf(" %c -", i-8); - else printf("%3d-", i-8); - if (isprint(i-1)) printf(" %c ", i-1); - else printf("%3d", i-1); - printf(" */\n "); - } - printf("0x%02x", *tables++); - if (i != 255) printf(","); - } - -printf("};/* "); -if (isprint(i-8)) printf(" %c -", i-8); - else printf("%3d-", i-8); -if (isprint(i-1)) printf(" %c ", i-1); - else printf("%3d", i-1); -printf(" */\n\n/* End of chartables.c */\n"); - -return 0; -} - -/* End of dftables.c */ diff --git a/pcre/dll.mk b/pcre/dll.mk deleted file mode 100644 index d8b728e5..00000000 --- a/pcre/dll.mk +++ /dev/null @@ -1,60 +0,0 @@ -# dll.mk - auxilary Makefile to easy build dll's for mingw32 target -# ver. 0.6 of 1999-03-25 -# -# Homepage of this makefile - http://www.is.lg.ua/~paul/devel/ -# Homepage of original mingw32 project - -# http://www.fu.is.saga-u.ac.jp/~colin/gcc.html -# -# How to use: -# This makefile can: -# 1. Create automatical .def file from list of objects -# 2. Create .dll from objects and .def file, either automatical, or your -# hand-written (maybe) file, which must have same basename as dll -# WARNING! There MUST be object, which name match dll's name. Make sux. -# 3. Create import library from .def (as for .dll, only its name required, -# not dll itself) -# By convention implibs for dll have .dll.a suffix, e.g. libstuff.dll.a -# Why not just libstuff.a? 'Cos that's name for static lib, ok? -# Process divided into 3 phases because: -# 1. Pre-existent .def possible -# 2. Generating implib is enough time-consuming -# -# Variables: -# DLL_LDLIBS - libs for linking dll -# DLL_LDFLAGS - flags for linking dll -# -# By using $(DLL_SUFFIX) instead of 'dll', e.g. stuff.$(DLL_SUFFIX) -# you may help porting makefiles to other platforms -# -# Put this file in your make's include path (e.g. main include dir, for -# more information see include section in make doc). Put in the beginning -# of your own Makefile line "include dll.mk". Specify dependences, e.g.: -# -# Do all stuff in one step -# libstuff.dll.a: $(OBJECTS) stuff.def -# stuff.def: $(OBJECTS) -# -# Steps separated, pre-provided .def, link with user32 -# -# DLL_LDLIBS=-luser32 -# stuff.dll: $(OBJECTS) -# libstuff.dll.a: $(OBJECTS) - - -DLLWRAP=dllwrap -DLLTOOL=dlltool - -DLL_SUFFIX=dll - -.SUFFIXES: .o .$(DLL_SUFFIX) - -_%.def: %.o - $(DLLTOOL) --export-all --output-def $@ $^ - -%.$(DLL_SUFFIX): %.o - $(DLLWRAP) --dllname $(notdir $@) --driver-name $(CC) --def $*.def -o $@ $(filter %.o,$^) $(DLL_LDFLAGS) $(DLL_LDLIBS) - -lib%.$(DLL_SUFFIX).a:%.def - $(DLLTOOL) --dllname $(notdir $*.dll) --def $< --output-lib $@ - -# End diff --git a/pcre/doc/ChangeLog b/pcre/doc/ChangeLog deleted file mode 100644 index 2133dd76..00000000 --- a/pcre/doc/ChangeLog +++ /dev/null @@ -1,655 +0,0 @@ -ChangeLog for PCRE ------------------- - - -Version 3.4 22-Aug-00 ---------------------- - -1. Fixed typo in pcre.h: unsigned const char * changed to const unsigned char *. - -2. Diagnose condition (?(0) as an error instead of crashing on matching. - - -Version 3.3 01-Aug-00 ---------------------- - -1. If an octal character was given, but the value was greater than \377, it -was not getting masked to the least significant bits, as documented. This could -lead to crashes in some systems. - -2. Perl 5.6 (if not earlier versions) accepts classes like [a-\d] and treats -the hyphen as a literal. PCRE used to give an error; it now behaves like Perl. - -3. Added the functions pcre_free_substring() and pcre_free_substring_list(). -These just pass their arguments on to (pcre_free)(), but they are provided -because some uses of PCRE bind it to non-C systems that can call its functions, -but cannot call free() or pcre_free() directly. - -4. Add "make test" as a synonym for "make check". Corrected some comments in -the Makefile. - -5. Add $(DESTDIR)/ in front of all the paths in the "install" target in the -Makefile. - -6. Changed the name of pgrep to pcregrep, because Solaris has introduced a -command called pgrep for grepping around the active processes. - -7. Added the beginnings of support for UTF-8 character strings. - -8. Arranged for the Makefile to pass over the settings of CC, CFLAGS, and -RANLIB to ./ltconfig so that they are used by libtool. I think these are all -the relevant ones. (AR is not passed because ./ltconfig does its own figuring -out for the ar command.) - - -Version 3.2 12-May-00 ---------------------- - -This is purely a bug fixing release. - -1. If the pattern /((Z)+|A)*/ was matched agained ZABCDEFG it matched Z instead -of ZA. This was just one example of several cases that could provoke this bug, -which was introduced by change 9 of version 2.00. The code for breaking -infinite loops after an iteration that matches an empty string was't working -correctly. - -2. The pcretest program was not imitating Perl correctly for the pattern /a*/g -when matched against abbab (for example). After matching an empty string, it -wasn't forcing anchoring when setting PCRE_NOTEMPTY for the next attempt; this -caused it to match further down the string than it should. - -3. The code contained an inclusion of sys/types.h. It isn't clear why this -was there because it doesn't seem to be needed, and it causes trouble on some -systems, as it is not a Standard C header. It has been removed. - -4. Made 4 silly changes to the source to avoid stupid compiler warnings that -were reported on the Macintosh. The changes were from - - while ((c = *(++ptr)) != 0 && c != '\n'); -to - while ((c = *(++ptr)) != 0 && c != '\n') ; - -Totally extraordinary, but if that's what it takes... - -5. PCRE is being used in one environment where neither memmove() nor bcopy() is -available. Added HAVE_BCOPY and an autoconf test for it; if neither -HAVE_MEMMOVE nor HAVE_BCOPY is set, use a built-in emulation function which -assumes the way PCRE uses memmove() (always moving upwards). - -6. PCRE is being used in one environment where strchr() is not available. There -was only one use in pcre.c, and writing it out to avoid strchr() probably gives -faster code anyway. - - -Version 3.1 09-Feb-00 ---------------------- - -The only change in this release is the fixing of some bugs in Makefile.in for -the "install" target: - -(1) It was failing to install pcreposix.h. - -(2) It was overwriting the pcre.3 man page with the pcreposix.3 man page. - - -Version 3.0 01-Feb-00 ---------------------- - -1. Add support for the /+ modifier to perltest (to output $` like it does in -pcretest). - -2. Add support for the /g modifier to perltest. - -3. Fix pcretest so that it behaves even more like Perl for /g when the pattern -matches null strings. - -4. Fix perltest so that it doesn't do unwanted things when fed an empty -pattern. Perl treats empty patterns specially - it reuses the most recent -pattern, which is not what we want. Replace // by /(?#)/ in order to avoid this -effect. - -5. The POSIX interface was broken in that it was just handing over the POSIX -captured string vector to pcre_exec(), but (since release 2.00) PCRE has -required a bigger vector, with some working space on the end. This means that -the POSIX wrapper now has to get and free some memory, and copy the results. - -6. Added some simple autoconf support, placing the test data and the -documentation in separate directories, re-organizing some of the -information files, and making it build pcre-config (a GNU standard). Also added -libtool support for building PCRE as a shared library, which is now the -default. - -7. Got rid of the leading zero in the definition of PCRE_MINOR because 08 and -09 are not valid octal constants. Single digits will be used for minor values -less than 10. - -8. Defined REG_EXTENDED and REG_NOSUB as zero in the POSIX header, so that -existing programs that set these in the POSIX interface can use PCRE without -modification. - -9. Added a new function, pcre_fullinfo() with an extensible interface. It can -return all that pcre_info() returns, plus additional data. The pcre_info() -function is retained for compatibility, but is considered to be obsolete. - -10. Added experimental recursion feature (?R) to handle one common case that -Perl 5.6 will be able to do with (?p{...}). - -11. Added support for POSIX character classes like [:alpha:], which Perl is -adopting. - - -Version 2.08 31-Aug-99 ----------------------- - -1. When startoffset was not zero and the pattern began with ".*", PCRE was not -trying to match at the startoffset position, but instead was moving forward to -the next newline as if a previous match had failed. - -2. pcretest was not making use of PCRE_NOTEMPTY when repeating for /g and /G, -and could get into a loop if a null string was matched other than at the start -of the subject. - -3. Added definitions of PCRE_MAJOR and PCRE_MINOR to pcre.h so the version can -be distinguished at compile time, and for completeness also added PCRE_DATE. - -5. Added Paul Sokolovsky's minor changes to make it easy to compile a Win32 DLL -in GnuWin32 environments. - - -Version 2.07 29-Jul-99 ----------------------- - -1. The documentation is now supplied in plain text form and HTML as well as in -the form of man page sources. - -2. C++ compilers don't like assigning (void *) values to other pointer types. -In particular this affects malloc(). Although there is no problem in Standard -C, I've put in casts to keep C++ compilers happy. - -3. Typo on pcretest.c; a cast of (unsigned char *) in the POSIX regexec() call -should be (const char *). - -4. If NOPOSIX is defined, pcretest.c compiles without POSIX support. This may -be useful for non-Unix systems who don't want to bother with the POSIX stuff. -However, I haven't made this a standard facility. The documentation doesn't -mention it, and the Makefile doesn't support it. - -5. The Makefile now contains an "install" target, with editable destinations at -the top of the file. The pcretest program is not installed. - -6. pgrep -V now gives the PCRE version number and date. - -7. Fixed bug: a zero repetition after a literal string (e.g. /abcde{0}/) was -causing the entire string to be ignored, instead of just the last character. - -8. If a pattern like /"([^\\"]+|\\.)*"/ is applied in the normal way to a -non-matching string, it can take a very, very long time, even for strings of -quite modest length, because of the nested recursion. PCRE now does better in -some of these cases. It does this by remembering the last required literal -character in the pattern, and pre-searching the subject to ensure it is present -before running the real match. In other words, it applies a heuristic to detect -some types of certain failure quickly, and in the above example, if presented -with a string that has no trailing " it gives "no match" very quickly. - -9. A new runtime option PCRE_NOTEMPTY causes null string matches to be ignored; -other alternatives are tried instead. - - -Version 2.06 09-Jun-99 ----------------------- - -1. Change pcretest's output for amount of store used to show just the code -space, because the remainder (the data block) varies in size between 32-bit and -64-bit systems. - -2. Added an extra argument to pcre_exec() to supply an offset in the subject to -start matching at. This allows lookbehinds to work when searching for multiple -occurrences in a string. - -3. Added additional options to pcretest for testing multiple occurrences: - - /+ outputs the rest of the string that follows a match - /g loops for multiple occurrences, using the new startoffset argument - /G loops for multiple occurrences by passing an incremented pointer - -4. PCRE wasn't doing the "first character" optimization for patterns starting -with \b or \B, though it was doing it for other lookbehind assertions. That is, -it wasn't noticing that a match for a pattern such as /\bxyz/ has to start with -the letter 'x'. On long subject strings, this gives a significant speed-up. - - -Version 2.05 21-Apr-99 ----------------------- - -1. Changed the type of magic_number from int to long int so that it works -properly on 16-bit systems. - -2. Fixed a bug which caused patterns starting with .* not to work correctly -when the subject string contained newline characters. PCRE was assuming -anchoring for such patterns in all cases, which is not correct because .* will -not pass a newline unless PCRE_DOTALL is set. It now assumes anchoring only if -DOTALL is set at top level; otherwise it knows that patterns starting with .* -must be retried after every newline in the subject. - - -Version 2.04 18-Feb-99 ----------------------- - -1. For parenthesized subpatterns with repeats whose minimum was zero, the -computation of the store needed to hold the pattern was incorrect (too large). -If such patterns were nested a few deep, this could multiply and become a real -problem. - -2. Added /M option to pcretest to show the memory requirement of a specific -pattern. Made -m a synonym of -s (which does this globally) for compatibility. - -3. Subpatterns of the form (regex){n,m} (i.e. limited maximum) were being -compiled in such a way that the backtracking after subsequent failure was -pessimal. Something like (a){0,3} was compiled as (a)?(a)?(a)? instead of -((a)((a)(a)?)?)? with disastrous performance if the maximum was of any size. - - -Version 2.03 02-Feb-99 ----------------------- - -1. Fixed typo and small mistake in man page. - -2. Added 4th condition (GPL supersedes if conflict) and created separate -LICENCE file containing the conditions. - -3. Updated pcretest so that patterns such as /abc\/def/ work like they do in -Perl, that is the internal \ allows the delimiter to be included in the -pattern. Locked out the use of \ as a delimiter. If \ immediately follows -the final delimiter, add \ to the end of the pattern (to test the error). - -4. Added the convenience functions for extracting substrings after a successful -match. Updated pcretest to make it able to test these functions. - - -Version 2.02 14-Jan-99 ----------------------- - -1. Initialized the working variables associated with each extraction so that -their saving and restoring doesn't refer to uninitialized store. - -2. Put dummy code into study.c in order to trick the optimizer of the IBM C -compiler for OS/2 into generating correct code. Apparently IBM isn't going to -fix the problem. - -3. Pcretest: the timing code wasn't using LOOPREPEAT for timing execution -calls, and wasn't printing the correct value for compiling calls. Increased the -default value of LOOPREPEAT, and the number of significant figures in the -times. - -4. Changed "/bin/rm" in the Makefile to "-rm" so it works on Windows NT. - -5. Renamed "deftables" as "dftables" to get it down to 8 characters, to avoid -a building problem on Windows NT with a FAT file system. - - -Version 2.01 21-Oct-98 ----------------------- - -1. Changed the API for pcre_compile() to allow for the provision of a pointer -to character tables built by pcre_maketables() in the current locale. If NULL -is passed, the default tables are used. - - -Version 2.00 24-Sep-98 ----------------------- - -1. Since the (>?) facility is in Perl 5.005, don't require PCRE_EXTRA to enable -it any more. - -2. Allow quantification of (?>) groups, and make it work correctly. - -3. The first character computation wasn't working for (?>) groups. - -4. Correct the implementation of \Z (it is permitted to match on the \n at the -end of the subject) and add 5.005's \z, which really does match only at the -very end of the subject. - -5. Remove the \X "cut" facility; Perl doesn't have it, and (?> is neater. - -6. Remove the ability to specify CASELESS, MULTILINE, DOTALL, and -DOLLAR_END_ONLY at runtime, to make it possible to implement the Perl 5.005 -localized options. All options to pcre_study() were also removed. - -7. Add other new features from 5.005: - - $(?<= positive lookbehind - $(?a*))*/ (a PCRE_EXTRA facility). - - -Version 1.00 18-Nov-97 ----------------------- - -1. Added compile-time macros to support systems such as SunOS4 which don't have -memmove() or strerror() but have other things that can be used instead. - -2. Arranged that "make clean" removes the executables. - - -Version 0.99 27-Oct-97 ----------------------- - -1. Fixed bug in code for optimizing classes with only one character. It was -initializing a 32-byte map regardless, which could cause it to run off the end -of the memory it had got. - -2. Added, conditional on PCRE_EXTRA, the proposed (?>REGEX) construction. - - -Version 0.98 22-Oct-97 ----------------------- - -1. Fixed bug in code for handling temporary memory usage when there are more -back references than supplied space in the ovector. This could cause segfaults. - - -Version 0.97 21-Oct-97 ----------------------- - -1. Added the \X "cut" facility, conditional on PCRE_EXTRA. - -2. Optimized negated single characters not to use a bit map. - -3. Brought error texts together as macro definitions; clarified some of them; -fixed one that was wrong - it said "range out of order" when it meant "invalid -escape sequence". - -4. Changed some char * arguments to const char *. - -5. Added PCRE_NOTBOL and PCRE_NOTEOL (from POSIX). - -6. Added the POSIX-style API wrapper in pcreposix.a and testing facilities in -pcretest. - - -Version 0.96 16-Oct-97 ----------------------- - -1. Added a simple "pgrep" utility to the distribution. - -2. Fixed an incompatibility with Perl: "{" is now treated as a normal character -unless it appears in one of the precise forms "{ddd}", "{ddd,}", or "{ddd,ddd}" -where "ddd" means "one or more decimal digits". - -3. Fixed serious bug. If a pattern had a back reference, but the call to -pcre_exec() didn't supply a large enough ovector to record the related -identifying subpattern, the match always failed. PCRE now remembers the number -of the largest back reference, and gets some temporary memory in which to save -the offsets during matching if necessary, in order to ensure that -backreferences always work. - -4. Increased the compatibility with Perl in a number of ways: - - (a) . no longer matches \n by default; an option PCRE_DOTALL is provided - to request this handling. The option can be set at compile or exec time. - - (b) $ matches before a terminating newline by default; an option - PCRE_DOLLAR_ENDONLY is provided to override this (but not in multiline - mode). The option can be set at compile or exec time. - - (c) The handling of \ followed by a digit other than 0 is now supposed to be - the same as Perl's. If the decimal number it represents is less than 10 - or there aren't that many previous left capturing parentheses, an octal - escape is read. Inside a character class, it's always an octal escape, - even if it is a single digit. - - (d) An escaped but undefined alphabetic character is taken as a literal, - unless PCRE_EXTRA is set. Currently this just reserves the remaining - escapes. - - (e) {0} is now permitted. (The previous item is removed from the compiled - pattern). - -5. Changed all the names of code files so that the basic parts are no longer -than 10 characters, and abolished the teeny "globals.c" file. - -6. Changed the handling of character classes; they are now done with a 32-byte -bit map always. - -7. Added the -d and /D options to pcretest to make it possible to look at the -internals of compilation without having to recompile pcre. - - -Version 0.95 23-Sep-97 ----------------------- - -1. Fixed bug in pre-pass concerning escaped "normal" characters such as \x5c or -\x20 at the start of a run of normal characters. These were being treated as -real characters, instead of the source characters being re-checked. - - -Version 0.94 18-Sep-97 ----------------------- - -1. The functions are now thread-safe, with the caveat that the global variables -containing pointers to malloc() and free() or alternative functions are the -same for all threads. - -2. Get pcre_study() to generate a bitmap of initial characters for non- -anchored patterns when this is possible, and use it if passed to pcre_exec(). - - -Version 0.93 15-Sep-97 ----------------------- - -1. /(b)|(:+)/ was computing an incorrect first character. - -2. Add pcre_study() to the API and the passing of pcre_extra to pcre_exec(), -but not actually doing anything yet. - -3. Treat "-" characters in classes that cannot be part of ranges as literals, -as Perl does (e.g. [-az] or [az-]). - -4. Set the anchored flag if a branch starts with .* or .*? because that tests -all possible positions. - -5. Split up into different modules to avoid including unneeded functions in a -compiled binary. However, compile and exec are still in one module. The "study" -function is split off. - -6. The character tables are now in a separate module whose source is generated -by an auxiliary program - but can then be edited by hand if required. There are -now no calls to isalnum(), isspace(), isdigit(), isxdigit(), tolower() or -toupper() in the code. - -7. Turn the malloc/free funtions variables into pcre_malloc and pcre_free and -make them global. Abolish the function for setting them, as the caller can now -set them directly. - - -Version 0.92 11-Sep-97 ----------------------- - -1. A repeat with a fixed maximum and a minimum of 1 for an ordinary character -(e.g. /a{1,3}/) was broken (I mis-optimized it). - -2. Caseless matching was not working in character classes if the characters in -the pattern were in upper case. - -3. Make ranges like [W-c] work in the same way as Perl for caseless matching. - -4. Make PCRE_ANCHORED public and accept as a compile option. - -5. Add an options word to pcre_exec() and accept PCRE_ANCHORED and -PCRE_CASELESS at run time. Add escapes \A and \I to pcretest to cause it to -pass them. - -6. Give an error if bad option bits passed at compile or run time. - -7. Add PCRE_MULTILINE at compile and exec time, and (?m) as well. Add \M to -pcretest to cause it to pass that flag. - -8. Add pcre_info(), to get the number of identifying subpatterns, the stored -options, and the first character, if set. - -9. Recognize C+ or C{n,m} where n >= 1 as providing a fixed starting character. - - -Version 0.91 10-Sep-97 ----------------------- - -1. PCRE was failing to diagnose unlimited repeats of subpatterns that could -match the empty string as in /(a*)*/. It was looping and ultimately crashing. - -2. PCRE was looping on encountering an indefinitely repeated back reference to -a subpattern that had matched an empty string, e.g. /(a|)\1*/. It now does what -Perl does - treats the match as successful. - -**** diff --git a/pcre/doc/NON-UNIX-USE b/pcre/doc/NON-UNIX-USE deleted file mode 100644 index 09a74324..00000000 --- a/pcre/doc/NON-UNIX-USE +++ /dev/null @@ -1,50 +0,0 @@ -Compiling PCRE on non-Unix systems ----------------------------------- - -If you want to compile PCRE for a non-Unix system, note that it consists -entirely of code written in Standard C, and so should compile successfully -on any machine with a Standard C compiler and library, using normal compiling -commands to do the following: - -(1) Copy or rename the file config.in as config.h, and change the macros that -define HAVE_STRERROR and HAVE_MEMMOVE to define them as 1 rather than 0. -Unfortunately, because of the way Unix autoconf works, the default setting has -to be 0. - -(2) Copy or rename the file pcre.in as pcre.h, and change the macro definitions -for PCRE_MAJOR, PCRE_MINOR, and PCRE_DATE near its start to the values set in -configure.in. - -(3) Compile dftables.c as a stand-alone program, and then run it with -the standard output sent to chartables.c. This generates a set of standard -character tables. - -(4) Compile maketables.c, get.c, study.c and pcre.c and link them all -together into an object library in whichever form your system keeps such -libraries. This is the pcre library (chartables.c gets included by means of an -#include directive). - -(5) Similarly, compile pcreposix.c and link it as the pcreposix library. - -(6) Compile the test program pcretest.c. This needs the functions in the -pcre and pcreposix libraries when linking. - -(7) Run pcretest on the testinput files in the testdata directory, and check -that the output matches the corresponding testoutput files. You must use the --i option when checking testinput2. - -If you have a system without "configure" but where you can use a Makefile, edit -Makefile.in to create Makefile, substituting suitable values for the variables -at the head of the file. - -Some help in building a Win32 DLL of PCRE in GnuWin32 environments was -contributed by Paul.Sokolovsky@technologist.com. These environments are -Mingw32 (http://www.xraylith.wisc.edu/~khan/software/gnu-win32/) and -CygWin (http://sourceware.cygnus.com/cygwin/). Paul comments: - - For CygWin, set CFLAGS=-mno-cygwin, and do 'make dll'. You'll get - pcre.dll (containing pcreposix also), libpcre.dll.a, and dynamically - linked pgrep and pcretest. If you have /bin/sh, run RunTest (three - main test go ok, locale not supported). - -**** diff --git a/pcre/doc/Tech.Notes b/pcre/doc/Tech.Notes deleted file mode 100644 index 7b96e5b6..00000000 --- a/pcre/doc/Tech.Notes +++ /dev/null @@ -1,243 +0,0 @@ -Technical Notes about PCRE --------------------------- - -Many years ago I implemented some regular expression functions to an algorithm -suggested by Martin Richards. These were not Unix-like in form, and were quite -restricted in what they could do by comparison with Perl. The interesting part -about the algorithm was that the amount of space required to hold the compiled -form of an expression was known in advance. The code to apply an expression did -not operate by backtracking, as the Henry Spencer and Perl code does, but -instead checked all possibilities simultaneously by keeping a list of current -states and checking all of them as it advanced through the subject string. (In -the terminology of Jeffrey Friedl's book, it was a "DFA algorithm".) When the -pattern was all used up, all remaining states were possible matches, and the -one matching the longest subset of the subject string was chosen. This did not -necessarily maximize the individual wild portions of the pattern, as is -expected in Unix and Perl-style regular expressions. - -By contrast, the code originally written by Henry Spencer and subsequently -heavily modified for Perl actually compiles the expression twice: once in a -dummy mode in order to find out how much store will be needed, and then for -real. The execution function operates by backtracking and maximizing (or, -optionally, minimizing in Perl) the amount of the subject that matches -individual wild portions of the pattern. This is an "NFA algorithm" in Friedl's -terminology. - -For the set of functions that forms PCRE (which are unrelated to those -mentioned above), I tried at first to invent an algorithm that used an amount -of store bounded by a multiple of the number of characters in the pattern, to -save on compiling time. However, because of the greater complexity in Perl -regular expressions, I couldn't do this. In any case, a first pass through the -pattern is needed, in order to find internal flag settings like (?i) at top -level. So PCRE works by running a very degenerate first pass to calculate a -maximum store size, and then a second pass to do the real compile - which may -use a bit less than the predicted amount of store. The idea is that this is -going to turn out faster because the first pass is degenerate and the second -pass can just store stuff straight into the vector. It does make the compiling -functions bigger, of course, but they have got quite big anyway to handle all -the Perl stuff. - -The compiled form of a pattern is a vector of bytes, containing items of -variable length. The first byte in an item is an opcode, and the length of the -item is either implicit in the opcode or contained in the data bytes which -follow it. A list of all the opcodes follows: - -Opcodes with no following data ------------------------------- - -These items are all just one byte long - - OP_END end of pattern - OP_ANY match any character - OP_SOD match start of data: \A - OP_CIRC ^ (start of data, or after \n in multiline) - OP_NOT_WORD_BOUNDARY \W - OP_WORD_BOUNDARY \w - OP_NOT_DIGIT \D - OP_DIGIT \d - OP_NOT_WHITESPACE \S - OP_WHITESPACE \s - OP_NOT_WORDCHAR \W - OP_WORDCHAR \w - OP_EODN match end of data or \n at end: \Z - OP_EOD match end of data: \z - OP_DOLL $ (end of data, or before \n in multiline) - OP_RECURSE match the pattern recursively - - -Repeating single characters ---------------------------- - -The common repeats (*, +, ?) when applied to a single character appear as -two-byte items using the following opcodes: - - OP_STAR - OP_MINSTAR - OP_PLUS - OP_MINPLUS - OP_QUERY - OP_MINQUERY - -Those with "MIN" in their name are the minimizing versions. Each is followed by -the character that is to be repeated. Other repeats make use of - - OP_UPTO - OP_MINUPTO - OP_EXACT - -which are followed by a two-byte count (most significant first) and the -repeated character. OP_UPTO matches from 0 to the given number. A repeat with a -non-zero minimum and a fixed maximum is coded as an OP_EXACT followed by an -OP_UPTO (or OP_MINUPTO). - - -Repeating character types -------------------------- - -Repeats of things like \d are done exactly as for single characters, except -that instead of a character, the opcode for the type is stored in the data -byte. The opcodes are: - - OP_TYPESTAR - OP_TYPEMINSTAR - OP_TYPEPLUS - OP_TYPEMINPLUS - OP_TYPEQUERY - OP_TYPEMINQUERY - OP_TYPEUPTO - OP_TYPEMINUPTO - OP_TYPEEXACT - - -Matching a character string ---------------------------- - -The OP_CHARS opcode is followed by a one-byte count and then that number of -characters. If there are more than 255 characters in sequence, successive -instances of OP_CHARS are used. - - -Character classes ------------------ - -OP_CLASS is used for a character class, provided there are at least two -characters in the class. If there is only one character, OP_CHARS is used for a -positive class, and OP_NOT for a negative one (that is, for something like -[^a]). Another set of repeating opcodes (OP_NOTSTAR etc.) are used for a -repeated, negated, single-character class. The normal ones (OP_STAR etc.) are -used for a repeated positive single-character class. - -OP_CLASS is followed by a 32-byte bit map containing a 1 bit for every -character that is acceptable. The bits are counted from the least significant -end of each byte. - - -Back references ---------------- - -OP_REF is followed by a single byte containing the reference number. - - -Repeating character classes and back references ------------------------------------------------ - -Single-character classes are handled specially (see above). This applies to -OP_CLASS and OP_REF. In both cases, the repeat information follows the base -item. The matching code looks at the following opcode to see if it is one of - - OP_CRSTAR - OP_CRMINSTAR - OP_CRPLUS - OP_CRMINPLUS - OP_CRQUERY - OP_CRMINQUERY - OP_CRRANGE - OP_CRMINRANGE - -All but the last two are just single-byte items. The others are followed by -four bytes of data, comprising the minimum and maximum repeat counts. - - -Brackets and alternation ------------------------- - -A pair of non-capturing (round) brackets is wrapped round each expression at -compile time, so alternation always happens in the context of brackets. -Non-capturing brackets use the opcode OP_BRA, while capturing brackets use -OP_BRA+1, OP_BRA+2, etc. [Note for North Americans: "bracket" to some English -speakers, including myself, can be round, square, curly, or pointy. Hence this -usage.] - -A bracket opcode is followed by two bytes which give the offset to the next -alternative OP_ALT or, if there aren't any branches, to the matching KET -opcode. Each OP_ALT is followed by two bytes giving the offset to the next one, -or to the KET opcode. - -OP_KET is used for subpatterns that do not repeat indefinitely, while -OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or -maximally respectively. All three are followed by two bytes giving (as a -positive number) the offset back to the matching BRA opcode. - -If a subpattern is quantified such that it is permitted to match zero times, it -is preceded by one of OP_BRAZERO or OP_BRAMINZERO. These are single-byte -opcodes which tell the matcher that skipping this subpattern entirely is a -valid branch. - -A subpattern with an indefinite maximum repetition is replicated in the -compiled data its minimum number of times (or once with a BRAZERO if the -minimum is zero), with the final copy terminating with a KETRMIN or KETRMAX as -appropriate. - -A subpattern with a bounded maximum repetition is replicated in a nested -fashion up to the maximum number of times, with BRAZERO or BRAMINZERO before -each replication after the minimum, so that, for example, (abc){2,5} is -compiled as (abc)(abc)((abc)((abc)(abc)?)?)?. The 200-bracket limit does not -apply to these internally generated brackets. - - -Assertions ----------- - -Forward assertions are just like other subpatterns, but starting with one of -the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes -OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion -is OP_REVERSE, followed by a two byte count of the number of characters to move -back the pointer in the subject string. When operating in UTF-8 mode, the count -is a character count rather than a byte count. A separate count is present in -each alternative of a lookbehind assertion, allowing them to have different -fixed lengths. - - -Once-only subpatterns ---------------------- - -These are also just like other subpatterns, but they start with the opcode -OP_ONCE. - - -Conditional subpatterns ------------------------ - -These are like other subpatterns, but they start with the opcode OP_COND. If -the condition is a back reference, this is stored at the start of the -subpattern using the opcode OP_CREF followed by one byte containing the -reference number. Otherwise, a conditional subpattern will always start with -one of the assertions. - - -Changing options ----------------- - -If any of the /i, /m, or /s options are changed within a parenthesized group, -an OP_OPT opcode is compiled, followed by one byte containing the new settings -of these flags. If there are several alternatives in a group, there is an -occurrence of OP_OPT at the start of all those following the first options -change, to set appropriate options for the start of the alternative. -Immediately after the end of the group there is another such item to reset the -flags to their previous values. Other changes of flag within the pattern can be -handled entirely at compile time, and so do not cause anything to be put into -the compiled data. - - -Philip Hazel -August 2000 diff --git a/pcre/doc/authors b/pcre/doc/authors deleted file mode 100644 index bfe1b5d8..00000000 --- a/pcre/doc/authors +++ /dev/null @@ -1,6 +0,0 @@ -Written by: Philip Hazel - -University of Cambridge Computing Service, -Cambridge, England. Phone: +44 1223 334714. - -Copyright (c) 1997-2000 University of Cambridge diff --git a/pcre/doc/copying b/pcre/doc/copying deleted file mode 100644 index 34d20db9..00000000 --- a/pcre/doc/copying +++ /dev/null @@ -1,46 +0,0 @@ -PCRE LICENCE ------------- - -PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - -Written by: Philip Hazel - -University of Cambridge Computing Service, -Cambridge, England. Phone: +44 1223 334714. - -Copyright (c) 1997-2000 University of Cambridge - -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. In practice, this means that if you use - PCRE in software which you distribute to others, commercially or - otherwise, you must put a sentence like this - - Regular expression support is provided by the PCRE library package, - which is open source software, written by Philip Hazel, and copyright - by the University of Cambridge, England. - - somewhere reasonably visible in your documentation and in any relevant - files or online help data or similar. A reference to the ftp site for - the source, that is, to - - ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/ - - should also be given in the documentation. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. - -End diff --git a/pcre/doc/news b/pcre/doc/news deleted file mode 100644 index 56fccdfa..00000000 --- a/pcre/doc/news +++ /dev/null @@ -1,54 +0,0 @@ -News about PCRE releases ------------------------- - -Release 3.3 01-Aug-00 ---------------------- - -There is some support for UTF-8 character strings. This is incomplete and -experimental. The documentation describes what is and what is not implemented. -Otherwise, this is just a bug-fixing release. - - -Release 3.0 01-Feb-00 ---------------------- - -1. A "configure" script is now used to configure PCRE for Unix systems. It -builds a Makefile, a config.h file, and the pcre-config script. - -2. PCRE is built as a shared library by default. - -3. There is support for POSIX classes such as [:alpha:]. - -5. There is an experimental recursion feature. - ----------------------------------------------------------------------------- - IMPORTANT FOR THOSE UPGRADING FROM VERSIONS BEFORE 2.00 - -Please note that there has been a change in the API such that a larger -ovector is required at matching time, to provide some additional workspace. -The new man page has details. This change was necessary in order to support -some of the new functionality in Perl 5.005. - - IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.00 - -Another (I hope this is the last!) change has been made to the API for the -pcre_compile() function. An additional argument has been added to make it -possible to pass over a pointer to character tables built in the current -locale by pcre_maketables(). To use the default tables, this new arguement -should be passed as NULL. - - IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05 - -Yet another (and again I hope this really is the last) change has been made -to the API for the pcre_exec() function. An additional argument has been -added to make it possible to start the match other than at the start of the -subject string. This is important if there are lookbehinds. The new man -page has the details, but you just want to convert existing programs, all -you need to do is to stick in a new fifth argument to pcre_exec(), with a -value of zero. For example, change - - pcre_exec(pattern, extra, subject, length, options, ovec, ovecsize) -to - pcre_exec(pattern, extra, subject, length, 0, options, ovec, ovecsize) - -**** diff --git a/pcre/doc/pcre.3 b/pcre/doc/pcre.3 deleted file mode 100644 index bb812f47..00000000 --- a/pcre/doc/pcre.3 +++ /dev/null @@ -1,1810 +0,0 @@ -.TH PCRE 3 -.SH NAME -pcre - Perl-compatible regular expressions. -.SH SYNOPSIS -.B #include -.PP -.SM -.br -.B pcre *pcre_compile(const char *\fIpattern\fR, int \fIoptions\fR, -.ti +5n -.B const char **\fIerrptr\fR, int *\fIerroffset\fR, -.ti +5n -.B const unsigned char *\fItableptr\fR); -.PP -.br -.B pcre_extra *pcre_study(const pcre *\fIcode\fR, int \fIoptions\fR, -.ti +5n -.B const char **\fIerrptr\fR); -.PP -.br -.B int pcre_exec(const pcre *\fIcode\fR, "const pcre_extra *\fIextra\fR," -.ti +5n -.B "const char *\fIsubject\fR," int \fIlength\fR, int \fIstartoffset\fR, -.ti +5n -.B int \fIoptions\fR, int *\fIovector\fR, int \fIovecsize\fR); -.PP -.br -.B int pcre_copy_substring(const char *\fIsubject\fR, int *\fIovector\fR, -.ti +5n -.B int \fIstringcount\fR, int \fIstringnumber\fR, char *\fIbuffer\fR, -.ti +5n -.B int \fIbuffersize\fR); -.PP -.br -.B int pcre_get_substring(const char *\fIsubject\fR, int *\fIovector\fR, -.ti +5n -.B int \fIstringcount\fR, int \fIstringnumber\fR, -.ti +5n -.B const char **\fIstringptr\fR); -.PP -.br -.B int pcre_get_substring_list(const char *\fIsubject\fR, -.ti +5n -.B int *\fIovector\fR, int \fIstringcount\fR, "const char ***\fIlistptr\fR);" -.PP -.br -.B void pcre_free_substring(const char *\fIstringptr\fR); -.PP -.br -.B void pcre_free_substring_list(const char **\fIstringptr\fR); -.PP -.br -.B const unsigned char *pcre_maketables(void); -.PP -.br -.B int pcre_fullinfo(const pcre *\fIcode\fR, "const pcre_extra *\fIextra\fR," -.ti +5n -.B int \fIwhat\fR, void *\fIwhere\fR); -.PP -.br -.B int pcre_info(const pcre *\fIcode\fR, int *\fIoptptr\fR, int -.B *\fIfirstcharptr\fR); -.PP -.br -.B char *pcre_version(void); -.PP -.br -.B void *(*pcre_malloc)(size_t); -.PP -.br -.B void (*pcre_free)(void *); - - - -.SH DESCRIPTION -The PCRE library is a set of functions that implement regular expression -pattern matching using the same syntax and semantics as Perl 5, with just a few -differences (see below). The current implementation corresponds to Perl 5.005, -with some additional features from later versions. This includes some -experimental, incomplete support for UTF-8 encoded strings. Details of exactly -what is and what is not supported are given below. - -PCRE has its own native API, which is described in this document. There is also -a set of wrapper functions that correspond to the POSIX regular expression API. -These are described in the \fBpcreposix\fR documentation. - -The native API function prototypes are defined in the header file \fBpcre.h\fR, -and on Unix systems the library itself is called \fBlibpcre.a\fR, so can be -accessed by adding \fB-lpcre\fR to the command for linking an application which -calls it. The header file defines the macros PCRE_MAJOR and PCRE_MINOR to -contain the major and minor release numbers for the library. Applications can -use these to include support for different releases. - -The functions \fBpcre_compile()\fR, \fBpcre_study()\fR, and \fBpcre_exec()\fR -are used for compiling and matching regular expressions. - -The functions \fBpcre_copy_substring()\fR, \fBpcre_get_substring()\fR, and -\fBpcre_get_substring_list()\fR are convenience functions for extracting -captured substrings from a matched subject string; \fBpcre_free_substring()\fR -and \fBpcre_free_substring_list()\fR are also provided, to free the memory used -for extracted strings. - -The function \fBpcre_maketables()\fR is used (optionally) to build a set of -character tables in the current locale for passing to \fBpcre_compile()\fR. - -The function \fBpcre_fullinfo()\fR is used to find out information about a -compiled pattern; \fBpcre_info()\fR is an obsolete version which returns only -some of the available information, but is retained for backwards compatibility. -The function \fBpcre_version()\fR returns a pointer to a string containing the -version of PCRE and its date of release. - -The global variables \fBpcre_malloc\fR and \fBpcre_free\fR initially contain -the entry points of the standard \fBmalloc()\fR and \fBfree()\fR functions -respectively. PCRE calls the memory management functions via these variables, -so a calling program can replace them if it wishes to intercept the calls. This -should be done before calling any PCRE functions. - - -.SH MULTI-THREADING -The PCRE functions can be used in multi-threading applications, with the -proviso that the memory management functions pointed to by \fBpcre_malloc\fR -and \fBpcre_free\fR are shared by all threads. - -The compiled form of a regular expression is not altered during matching, so -the same compiled pattern can safely be used by several threads at once. - - -.SH COMPILING A PATTERN -The function \fBpcre_compile()\fR is called to compile a pattern into an -internal form. The pattern is a C string terminated by a binary zero, and -is passed in the argument \fIpattern\fR. A pointer to a single block of memory -that is obtained via \fBpcre_malloc\fR is returned. This contains the -compiled code and related data. The \fBpcre\fR type is defined for this for -convenience, but in fact \fBpcre\fR is just a typedef for \fBvoid\fR, since the -contents of the block are not externally defined. It is up to the caller to -free the memory when it is no longer required. -.PP -The size of a compiled pattern is roughly proportional to the length of the -pattern string, except that each character class (other than those containing -just a single character, negated or not) requires 33 bytes, and repeat -quantifiers with a minimum greater than one or a bounded maximum cause the -relevant portions of the compiled pattern to be replicated. -.PP -The \fIoptions\fR argument contains independent bits that affect the -compilation. It should be zero if no options are required. Some of the options, -in particular, those that are compatible with Perl, can also be set and unset -from within the pattern (see the detailed description of regular expressions -below). For these options, the contents of the \fIoptions\fR argument specifies -their initial settings at the start of compilation and execution. The -PCRE_ANCHORED option can be set at the time of matching as well as at compile -time. -.PP -If \fIerrptr\fR is NULL, \fBpcre_compile()\fR returns NULL immediately. -Otherwise, if compilation of a pattern fails, \fBpcre_compile()\fR returns -NULL, and sets the variable pointed to by \fIerrptr\fR to point to a textual -error message. The offset from the start of the pattern to the character where -the error was discovered is placed in the variable pointed to by -\fIerroffset\fR, which must not be NULL. If it is, an immediate error is given. -.PP -If the final argument, \fItableptr\fR, is NULL, PCRE uses a default set of -character tables which are built when it is compiled, using the default C -locale. Otherwise, \fItableptr\fR must be the result of a call to -\fBpcre_maketables()\fR. See the section on locale support below. -.PP -The following option bits are defined in the header file: - - PCRE_ANCHORED - -If this bit is set, the pattern is forced to be "anchored", that is, it is -constrained to match only at the start of the string which is being searched -(the "subject string"). This effect can also be achieved by appropriate -constructs in the pattern itself, which is the only way to do it in Perl. - - PCRE_CASELESS - -If this bit is set, letters in the pattern match both upper and lower case -letters. It is equivalent to Perl's /i option. - - PCRE_DOLLAR_ENDONLY - -If this bit is set, a dollar metacharacter in the pattern matches only at the -end of the subject string. Without this option, a dollar also matches -immediately before the final character if it is a newline (but not before any -other newlines). The PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE is -set. There is no equivalent to this option in Perl. - - PCRE_DOTALL - -If this bit is set, a dot metacharater in the pattern matches all characters, -including newlines. Without it, newlines are excluded. This option is -equivalent to Perl's /s option. A negative class such as [^a] always matches a -newline character, independent of the setting of this option. - - PCRE_EXTENDED - -If this bit is set, whitespace data characters in the pattern are totally -ignored except when escaped or inside a character class, and characters between -an unescaped # outside a character class and the next newline character, -inclusive, are also ignored. This is equivalent to Perl's /x option, and makes -it possible to include comments inside complicated patterns. Note, however, -that this applies only to data characters. Whitespace characters may never -appear within special character sequences in a pattern, for example within the -sequence (?( which introduces a conditional subpattern. - - PCRE_EXTRA - -This option was invented in order to turn on additional functionality of PCRE -that is incompatible with Perl, but it is currently of very little use. When -set, any backslash in a pattern that is followed by a letter that has no -special meaning causes an error, thus reserving these combinations for future -expansion. By default, as in Perl, a backslash followed by a letter with no -special meaning is treated as a literal. There are at present no other features -controlled by this option. It can also be set by a (?X) option setting within a -pattern. - - PCRE_MULTILINE - -By default, PCRE treats the subject string as consisting of a single "line" of -characters (even if it actually contains several newlines). The "start of line" -metacharacter (^) matches only at the start of the string, while the "end of -line" metacharacter ($) matches only at the end of the string, or before a -terminating newline (unless PCRE_DOLLAR_ENDONLY is set). This is the same as -Perl. - -When PCRE_MULTILINE it is set, the "start of line" and "end of line" constructs -match immediately following or immediately before any newline in the subject -string, respectively, as well as at the very start and end. This is equivalent -to Perl's /m option. If there are no "\\n" characters in a subject string, or -no occurrences of ^ or $ in a pattern, setting PCRE_MULTILINE has no -effect. - - PCRE_UNGREEDY - -This option inverts the "greediness" of the quantifiers so that they are not -greedy by default, but become greedy if followed by "?". It is not compatible -with Perl. It can also be set by a (?U) option setting within the pattern. - - PCRE_UTF8 - -This option causes PCRE to regard both the pattern and the subject as strings -of UTF-8 characters instead of just byte strings. However, it is available only -if PCRE has been built to include UTF-8 support. If not, the use of this option -provokes an error. Support for UTF-8 is new, experimental, and incomplete. -Details of exactly what it entails are given below. - - -.SH STUDYING A PATTERN -When a pattern is going to be used several times, it is worth spending more -time analyzing it in order to speed up the time taken for matching. The -function \fBpcre_study()\fR takes a pointer to a compiled pattern as its first -argument, and returns a pointer to a \fBpcre_extra\fR block (another \fBvoid\fR -typedef) containing additional information about the pattern; this can be -passed to \fBpcre_exec()\fR. If no additional information is available, NULL -is returned. - -The second argument contains option bits. At present, no options are defined -for \fBpcre_study()\fR, and this argument should always be zero. - -The third argument for \fBpcre_study()\fR is a pointer to an error message. If -studying succeeds (even if no data is returned), the variable it points to is -set to NULL. Otherwise it points to a textual error message. - -At present, studying a pattern is useful only for non-anchored patterns that do -not have a single fixed starting character. A bitmap of possible starting -characters is created. - - -.SH LOCALE SUPPORT -PCRE handles caseless matching, and determines whether characters are letters, -digits, or whatever, by reference to a set of tables. The library contains a -default set of tables which is created in the default C locale when PCRE is -compiled. This is used when the final argument of \fBpcre_compile()\fR is NULL, -and is sufficient for many applications. - -An alternative set of tables can, however, be supplied. Such tables are built -by calling the \fBpcre_maketables()\fR function, which has no arguments, in the -relevant locale. The result can then be passed to \fBpcre_compile()\fR as often -as necessary. For example, to build and use tables that are appropriate for the -French locale (where accented characters with codes greater than 128 are -treated as letters), the following code could be used: - - setlocale(LC_CTYPE, "fr"); - tables = pcre_maketables(); - re = pcre_compile(..., tables); - -The tables are built in memory that is obtained via \fBpcre_malloc\fR. The -pointer that is passed to \fBpcre_compile\fR is saved with the compiled -pattern, and the same tables are used via this pointer by \fBpcre_study()\fR -and \fBpcre_exec()\fR. Thus for any single pattern, compilation, studying and -matching all happen in the same locale, but different patterns can be compiled -in different locales. It is the caller's responsibility to ensure that the -memory containing the tables remains available for as long as it is needed. - - -.SH INFORMATION ABOUT A PATTERN -The \fBpcre_fullinfo()\fR function returns information about a compiled -pattern. It replaces the obsolete \fBpcre_info()\fR function, which is -nevertheless retained for backwards compability (and is documented below). - -The first argument for \fBpcre_fullinfo()\fR is a pointer to the compiled -pattern. The second argument is the result of \fBpcre_study()\fR, or NULL if -the pattern was not studied. The third argument specifies which piece of -information is required, while the fourth argument is a pointer to a variable -to receive the data. The yield of the function is zero for success, or one of -the following negative numbers: - - PCRE_ERROR_NULL the argument \fIcode\fR was NULL - the argument \fIwhere\fR was NULL - PCRE_ERROR_BADMAGIC the "magic number" was not found - PCRE_ERROR_BADOPTION the value of \fIwhat\fR was invalid - -The possible values for the third argument are defined in \fBpcre.h\fR, and are -as follows: - - PCRE_INFO_OPTIONS - -Return a copy of the options with which the pattern was compiled. The fourth -argument should point to au \fBunsigned long int\fR variable. These option bits -are those specified in the call to \fBpcre_compile()\fR, modified by any -top-level option settings within the pattern itself, and with the PCRE_ANCHORED -bit forcibly set if the form of the pattern implies that it can match only at -the start of a subject string. - - PCRE_INFO_SIZE - -Return the size of the compiled pattern, that is, the value that was passed as -the argument to \fBpcre_malloc()\fR when PCRE was getting memory in which to -place the compiled data. The fourth argument should point to a \fBsize_t\fR -variable. - - PCRE_INFO_CAPTURECOUNT - -Return the number of capturing subpatterns in the pattern. The fourth argument -should point to an \fbint\fR variable. - - PCRE_INFO_BACKREFMAX - -Return the number of the highest back reference in the pattern. The fourth -argument should point to an \fBint\fR variable. Zero is returned if there are -no back references. - - PCRE_INFO_FIRSTCHAR - -Return information about the first character of any matched string, for a -non-anchored pattern. If there is a fixed first character, e.g. from a pattern -such as (cat|cow|coyote), it is returned in the integer pointed to by -\fIwhere\fR. Otherwise, if either - -(a) the pattern was compiled with the PCRE_MULTILINE option, and every branch -starts with "^", or - -(b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set -(if it were set, the pattern would be anchored), - --1 is returned, indicating that the pattern matches only at the start of a -subject string or after any "\\n" within the string. Otherwise -2 is returned. -For anchored patterns, -2 is returned. - - PCRE_INFO_FIRSTTABLE - -If the pattern was studied, and this resulted in the construction of a 256-bit -table indicating a fixed set of characters for the first character in any -matching string, a pointer to the table is returned. Otherwise NULL is -returned. The fourth argument should point to an \fBunsigned char *\fR -variable. - - PCRE_INFO_LASTLITERAL - -For a non-anchored pattern, return the value of the rightmost literal character -which must exist in any matched string, other than at its start. The fourth -argument should point to an \fBint\fR variable. If there is no such character, -or if the pattern is anchored, -1 is returned. For example, for the pattern -/a\\d+z\\d+/ the returned value is 'z'. - -The \fBpcre_info()\fR function is now obsolete because its interface is too -restrictive to return all the available data about a compiled pattern. New -programs should use \fBpcre_fullinfo()\fR instead. The yield of -\fBpcre_info()\fR is the number of capturing subpatterns, or one of the -following negative numbers: - - PCRE_ERROR_NULL the argument \fIcode\fR was NULL - PCRE_ERROR_BADMAGIC the "magic number" was not found - -If the \fIoptptr\fR argument is not NULL, a copy of the options with which the -pattern was compiled is placed in the integer it points to (see -PCRE_INFO_OPTIONS above). - -If the pattern is not anchored and the \fIfirstcharptr\fR argument is not NULL, -it is used to pass back information about the first character of any matched -string (see PCRE_INFO_FIRSTCHAR above). - - -.SH MATCHING A PATTERN -The function \fBpcre_exec()\fR is called to match a subject string against a -pre-compiled pattern, which is passed in the \fIcode\fR argument. If the -pattern has been studied, the result of the study should be passed in the -\fIextra\fR argument. Otherwise this must be NULL. - -The PCRE_ANCHORED option can be passed in the \fIoptions\fR argument, whose -unused bits must be zero. However, if a pattern was compiled with -PCRE_ANCHORED, or turned out to be anchored by virtue of its contents, it -cannot be made unachored at matching time. - -There are also three further options that can be set only at matching time: - - PCRE_NOTBOL - -The first character of the string is not the beginning of a line, so the -circumflex metacharacter should not match before it. Setting this without -PCRE_MULTILINE (at compile time) causes circumflex never to match. - - PCRE_NOTEOL - -The end of the string is not the end of a line, so the dollar metacharacter -should not match it nor (except in multiline mode) a newline immediately before -it. Setting this without PCRE_MULTILINE (at compile time) causes dollar never -to match. - - PCRE_NOTEMPTY - -An empty string is not considered to be a valid match if this option is set. If -there are alternatives in the pattern, they are tried. If all the alternatives -match the empty string, the entire match fails. For example, if the pattern - - a?b? - -is applied to a string not beginning with "a" or "b", it matches the empty -string at the start of the subject. With PCRE_NOTEMPTY set, this match is not -valid, so PCRE searches further into the string for occurrences of "a" or "b". - -Perl has no direct equivalent of PCRE_NOTEMPTY, but it does make a special case -of a pattern match of the empty string within its \fBsplit()\fR function, and -when using the /g modifier. It is possible to emulate Perl's behaviour after -matching a null string by first trying the match again at the same offset with -PCRE_NOTEMPTY set, and then if that fails by advancing the starting offset (see -below) and trying an ordinary match again. - -The subject string is passed as a pointer in \fIsubject\fR, a length in -\fIlength\fR, and a starting offset in \fIstartoffset\fR. Unlike the pattern -string, it may contain binary zero characters. When the starting offset is -zero, the search for a match starts at the beginning of the subject, and this -is by far the most common case. - -A non-zero starting offset is useful when searching for another match in the -same subject by calling \fBpcre_exec()\fR again after a previous success. -Setting \fIstartoffset\fR differs from just passing over a shortened string and -setting PCRE_NOTBOL in the case of a pattern that begins with any kind of -lookbehind. For example, consider the pattern - - \\Biss\\B - -which finds occurrences of "iss" in the middle of words. (\\B matches only if -the current position in the subject is not a word boundary.) When applied to -the string "Mississipi" the first call to \fBpcre_exec()\fR finds the first -occurrence. If \fBpcre_exec()\fR is called again with just the remainder of the -subject, namely "issipi", it does not match, because \\B is always false at the -start of the subject, which is deemed to be a word boundary. However, if -\fBpcre_exec()\fR is passed the entire string again, but with \fIstartoffset\fR -set to 4, it finds the second occurrence of "iss" because it is able to look -behind the starting point to discover that it is preceded by a letter. - -If a non-zero starting offset is passed when the pattern is anchored, one -attempt to match at the given offset is tried. This can only succeed if the -pattern does not require the match to be at the start of the subject. - -In general, a pattern matches a certain portion of the subject, and in -addition, further substrings from the subject may be picked out by parts of the -pattern. Following the usage in Jeffrey Friedl's book, this is called -"capturing" in what follows, and the phrase "capturing subpattern" is used for -a fragment of a pattern that picks out a substring. PCRE supports several other -kinds of parenthesized subpattern that do not cause substrings to be captured. - -Captured substrings are returned to the caller via a vector of integer offsets -whose address is passed in \fIovector\fR. The number of elements in the vector -is passed in \fIovecsize\fR. The first two-thirds of the vector is used to pass -back captured substrings, each substring using a pair of integers. The -remaining third of the vector is used as workspace by \fBpcre_exec()\fR while -matching capturing subpatterns, and is not available for passing back -information. The length passed in \fIovecsize\fR should always be a multiple of -three. If it is not, it is rounded down. - -When a match has been successful, information about captured substrings is -returned in pairs of integers, starting at the beginning of \fIovector\fR, and -continuing up to two-thirds of its length at the most. The first element of a -pair is set to the offset of the first character in a substring, and the second -is set to the offset of the first character after the end of a substring. The -first pair, \fIovector[0]\fR and \fIovector[1]\fR, identify the portion of the -subject string matched by the entire pattern. The next pair is used for the -first capturing subpattern, and so on. The value returned by \fBpcre_exec()\fR -is the number of pairs that have been set. If there are no capturing -subpatterns, the return value from a successful match is 1, indicating that -just the first pair of offsets has been set. - -Some convenience functions are provided for extracting the captured substrings -as separate strings. These are described in the following section. - -It is possible for an capturing subpattern number \fIn+1\fR to match some -part of the subject when subpattern \fIn\fR has not been used at all. For -example, if the string "abc" is matched against the pattern (a|(z))(bc) -subpatterns 1 and 3 are matched, but 2 is not. When this happens, both offset -values corresponding to the unused subpattern are set to -1. - -If a capturing subpattern is matched repeatedly, it is the last portion of the -string that it matched that gets returned. - -If the vector is too small to hold all the captured substrings, it is used as -far as possible (up to two-thirds of its length), and the function returns a -value of zero. In particular, if the substring offsets are not of interest, -\fBpcre_exec()\fR may be called with \fIovector\fR passed as NULL and -\fIovecsize\fR as zero. However, if the pattern contains back references and -the \fIovector\fR isn't big enough to remember the related substrings, PCRE has -to get additional memory for use during matching. Thus it is usually advisable -to supply an \fIovector\fR. - -Note that \fBpcre_info()\fR can be used to find out how many capturing -subpatterns there are in a compiled pattern. The smallest size for -\fIovector\fR that will allow for \fIn\fR captured substrings in addition to -the offsets of the substring matched by the whole pattern is (\fIn\fR+1)*3. - -If \fBpcre_exec()\fR fails, it returns a negative number. The following are -defined in the header file: - - PCRE_ERROR_NOMATCH (-1) - -The subject string did not match the pattern. - - PCRE_ERROR_NULL (-2) - -Either \fIcode\fR or \fIsubject\fR was passed as NULL, or \fIovector\fR was -NULL and \fIovecsize\fR was not zero. - - PCRE_ERROR_BADOPTION (-3) - -An unrecognized bit was set in the \fIoptions\fR argument. - - PCRE_ERROR_BADMAGIC (-4) - -PCRE stores a 4-byte "magic number" at the start of the compiled code, to catch -the case when it is passed a junk pointer. This is the error it gives when the -magic number isn't present. - - PCRE_ERROR_UNKNOWN_NODE (-5) - -While running the pattern match, an unknown item was encountered in the -compiled pattern. This error could be caused by a bug in PCRE or by overwriting -of the compiled pattern. - - PCRE_ERROR_NOMEMORY (-6) - -If a pattern contains back references, but the \fIovector\fR that is passed to -\fBpcre_exec()\fR is not big enough to remember the referenced substrings, PCRE -gets a block of memory at the start of matching to use for this purpose. If the -call via \fBpcre_malloc()\fR fails, this error is given. The memory is freed at -the end of matching. - - -.SH EXTRACTING CAPTURED SUBSTRINGS -Captured substrings can be accessed directly by using the offsets returned by -\fBpcre_exec()\fR in \fIovector\fR. For convenience, the functions -\fBpcre_copy_substring()\fR, \fBpcre_get_substring()\fR, and -\fBpcre_get_substring_list()\fR are provided for extracting captured substrings -as new, separate, zero-terminated strings. A substring that contains a binary -zero is correctly extracted and has a further zero added on the end, but the -result does not, of course, function as a C string. - -The first three arguments are the same for all three functions: \fIsubject\fR -is the subject string which has just been successfully matched, \fIovector\fR -is a pointer to the vector of integer offsets that was passed to -\fBpcre_exec()\fR, and \fIstringcount\fR is the number of substrings that -were captured by the match, including the substring that matched the entire -regular expression. This is the value returned by \fBpcre_exec\fR if it -is greater than zero. If \fBpcre_exec()\fR returned zero, indicating that it -ran out of space in \fIovector\fR, the value passed as \fIstringcount\fR should -be the size of the vector divided by three. - -The functions \fBpcre_copy_substring()\fR and \fBpcre_get_substring()\fR -extract a single substring, whose number is given as \fIstringnumber\fR. A -value of zero extracts the substring that matched the entire pattern, while -higher values extract the captured substrings. For \fBpcre_copy_substring()\fR, -the string is placed in \fIbuffer\fR, whose length is given by -\fIbuffersize\fR, while for \fBpcre_get_substring()\fR a new block of memory is -obtained via \fBpcre_malloc\fR, and its address is returned via -\fIstringptr\fR. The yield of the function is the length of the string, not -including the terminating zero, or one of - - PCRE_ERROR_NOMEMORY (-6) - -The buffer was too small for \fBpcre_copy_substring()\fR, or the attempt to get -memory failed for \fBpcre_get_substring()\fR. - - PCRE_ERROR_NOSUBSTRING (-7) - -There is no substring whose number is \fIstringnumber\fR. - -The \fBpcre_get_substring_list()\fR function extracts all available substrings -and builds a list of pointers to them. All this is done in a single block of -memory which is obtained via \fBpcre_malloc\fR. The address of the memory block -is returned via \fIlistptr\fR, which is also the start of the list of string -pointers. The end of the list is marked by a NULL pointer. The yield of the -function is zero if all went well, or - - PCRE_ERROR_NOMEMORY (-6) - -if the attempt to get the memory block failed. - -When any of these functions encounter a substring that is unset, which can -happen when capturing subpattern number \fIn+1\fR matches some part of the -subject, but subpattern \fIn\fR has not been used at all, they return an empty -string. This can be distinguished from a genuine zero-length substring by -inspecting the appropriate offset in \fIovector\fR, which is negative for unset -substrings. - -The two convenience functions \fBpcre_free_substring()\fR and -\fBpcre_free_substring_list()\fR can be used to free the memory returned by -a previous call of \fBpcre_get_substring()\fR or -\fBpcre_get_substring_list()\fR, respectively. They do nothing more than call -the function pointed to by \fBpcre_free\fR, which of course could be called -directly from a C program. However, PCRE is used in some situations where it is -linked via a special interface to another programming language which cannot use -\fBpcre_free\fR directly; it is for these cases that the functions are -provided. - - -.SH LIMITATIONS -There are some size limitations in PCRE but it is hoped that they will never in -practice be relevant. -The maximum length of a compiled pattern is 65539 (sic) bytes. -All values in repeating quantifiers must be less than 65536. -The maximum number of capturing subpatterns is 99. -The maximum number of all parenthesized subpatterns, including capturing -subpatterns, assertions, and other types of subpattern, is 200. - -The maximum length of a subject string is the largest positive number that an -integer variable can hold. However, PCRE uses recursion to handle subpatterns -and indefinite repetition. This means that the available stack space may limit -the size of a subject string that can be processed by certain patterns. - - -.SH DIFFERENCES FROM PERL -The differences described here are with respect to Perl 5.005. - -1. By default, a whitespace character is any character that the C library -function \fBisspace()\fR recognizes, though it is possible to compile PCRE with -alternative character type tables. Normally \fBisspace()\fR matches space, -formfeed, newline, carriage return, horizontal tab, and vertical tab. Perl 5 -no longer includes vertical tab in its set of whitespace characters. The \\v -escape that was in the Perl documentation for a long time was never in fact -recognized. However, the character itself was treated as whitespace at least -up to 5.002. In 5.004 and 5.005 it does not match \\s. - -2. PCRE does not allow repeat quantifiers on lookahead assertions. Perl permits -them, but they do not mean what you might think. For example, (?!a){3} does -not assert that the next three characters are not "a". It just asserts that the -next character is not "a" three times. - -3. Capturing subpatterns that occur inside negative lookahead assertions are -counted, but their entries in the offsets vector are never set. Perl sets its -numerical variables from any such patterns that are matched before the -assertion fails to match something (thereby succeeding), but only if the -negative lookahead assertion contains just one branch. - -4. Though binary zero characters are supported in the subject string, they are -not allowed in a pattern string because it is passed as a normal C string, -terminated by zero. The escape sequence "\\0" can be used in the pattern to -represent a binary zero. - -5. The following Perl escape sequences are not supported: \\l, \\u, \\L, \\U, -\\E, \\Q. In fact these are implemented by Perl's general string-handling and -are not part of its pattern matching engine. - -6. The Perl \\G assertion is not supported as it is not relevant to single -pattern matches. - -7. Fairly obviously, PCRE does not support the (?{code}) and (?p{code}) -constructions. However, there is some experimental support for recursive -patterns using the non-Perl item (?R). - -8. There are at the time of writing some oddities in Perl 5.005_02 concerned -with the settings of captured strings when part of a pattern is repeated. For -example, matching "aba" against the pattern /^(a(b)?)+$/ sets $2 to the value -"b", but matching "aabbaa" against /^(aa(bb)?)+$/ leaves $2 unset. However, if -the pattern is changed to /^(aa(b(b))?)+$/ then $2 (and $3) are set. - -In Perl 5.004 $2 is set in both cases, and that is also true of PCRE. If in the -future Perl changes to a consistent state that is different, PCRE may change to -follow. - -9. Another as yet unresolved discrepancy is that in Perl 5.005_02 the pattern -/^(a)?(?(1)a|b)+$/ matches the string "a", whereas in PCRE it does not. -However, in both Perl and PCRE /^(a)?a/ matched against "a" leaves $1 unset. - -10. PCRE provides some extensions to the Perl regular expression facilities: - -(a) Although lookbehind assertions must match fixed length strings, each -alternative branch of a lookbehind assertion can match a different length of -string. Perl 5.005 requires them all to have the same length. - -(b) If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not set, the $ meta- -character matches only at the very end of the string. - -(c) If PCRE_EXTRA is set, a backslash followed by a letter with no special -meaning is faulted. - -(d) If PCRE_UNGREEDY is set, the greediness of the repetition quantifiers is -inverted, that is, by default they are not greedy, but if followed by a -question mark they are. - -(e) PCRE_ANCHORED can be used to force a pattern to be tried only at the start -of the subject. - -(f) The PCRE_NOTBOL, PCRE_NOTEOL, and PCRE_NOTEMPTY options for -\fBpcre_exec()\fR have no Perl equivalents. - -(g) The (?R) construct allows for recursive pattern matching (Perl 5.6 can do -this using the (?p{code}) construct, which PCRE cannot of course support.) - - -.SH REGULAR EXPRESSION DETAILS -The syntax and semantics of the regular expressions supported by PCRE are -described below. Regular expressions are also described in the Perl -documentation and in a number of other books, some of which have copious -examples. Jeffrey Friedl's "Mastering Regular Expressions", published by -O'Reilly (ISBN 1-56592-257), covers them in great detail. - -The description here is intended as reference documentation. The basic -operation of PCRE is on strings of bytes. However, there is the beginnings of -some support for UTF-8 character strings. To use this support you must -configure PCRE to include it, and then call \fBpcre_compile()\fR with the -PCRE_UTF8 option. How this affects the pattern matching is described in the -final section of this document. - -A regular expression is a pattern that is matched against a subject string from -left to right. Most characters stand for themselves in a pattern, and match the -corresponding characters in the subject. As a trivial example, the pattern - - The quick brown fox - -matches a portion of a subject string that is identical to itself. The power of -regular expressions comes from the ability to include alternatives and -repetitions in the pattern. These are encoded in the pattern by the use of -\fImeta-characters\fR, which do not stand for themselves but instead are -interpreted in some special way. - -There are two different sets of meta-characters: those that are recognized -anywhere in the pattern except within square brackets, and those that are -recognized in square brackets. Outside square brackets, the meta-characters are -as follows: - - \\ general escape character with several uses - ^ assert start of subject (or line, in multiline mode) - $ assert end of subject (or line, in multiline mode) - . match any character except newline (by default) - [ start character class definition - | start of alternative branch - ( start subpattern - ) end subpattern - ? extends the meaning of ( - also 0 or 1 quantifier - also quantifier minimizer - * 0 or more quantifier - + 1 or more quantifier - { start min/max quantifier - -Part of a pattern that is in square brackets is called a "character class". In -a character class the only meta-characters are: - - \\ general escape character - ^ negate the class, but only if the first character - - indicates character range - ] terminates the character class - -The following sections describe the use of each of the meta-characters. - - -.SH BACKSLASH -The backslash character has several uses. Firstly, if it is followed by a -non-alphameric character, it takes away any special meaning that character may -have. This use of backslash as an escape character applies both inside and -outside character classes. - -For example, if you want to match a "*" character, you write "\\*" in the -pattern. This applies whether or not the following character would otherwise be -interpreted as a meta-character, so it is always safe to precede a -non-alphameric with "\\" to specify that it stands for itself. In particular, -if you want to match a backslash, you write "\\\\". - -If a pattern is compiled with the PCRE_EXTENDED option, whitespace in the -pattern (other than in a character class) and characters between a "#" outside -a character class and the next newline character are ignored. An escaping -backslash can be used to include a whitespace or "#" character as part of the -pattern. - -A second use of backslash provides a way of encoding non-printing characters -in patterns in a visible manner. There is no restriction on the appearance of -non-printing characters, apart from the binary zero that terminates a pattern, -but when a pattern is being prepared by text editing, it is usually easier to -use one of the following escape sequences than the binary character it -represents: - - \\a alarm, that is, the BEL character (hex 07) - \\cx "control-x", where x is any character - \\e escape (hex 1B) - \\f formfeed (hex 0C) - \\n newline (hex 0A) - \\r carriage return (hex 0D) - \\t tab (hex 09) - \\xhh character with hex code hh - \\ddd character with octal code ddd, or backreference - -The precise effect of "\\cx" is as follows: if "x" is a lower case letter, it -is converted to upper case. Then bit 6 of the character (hex 40) is inverted. -Thus "\\cz" becomes hex 1A, but "\\c{" becomes hex 3B, while "\\c;" becomes hex -7B. - -After "\\x", up to two hexadecimal digits are read (letters can be in upper or -lower case). - -After "\\0" up to two further octal digits are read. In both cases, if there -are fewer than two digits, just those that are present are used. Thus the -sequence "\\0\\x\\07" specifies two binary zeros followed by a BEL character. -Make sure you supply two digits after the initial zero if the character that -follows is itself an octal digit. - -The handling of a backslash followed by a digit other than 0 is complicated. -Outside a character class, PCRE reads it and any following digits as a decimal -number. If the number is less than 10, or if there have been at least that many -previous capturing left parentheses in the expression, the entire sequence is -taken as a \fIback reference\fR. A description of how this works is given -later, following the discussion of parenthesized subpatterns. - -Inside a character class, or if the decimal number is greater than 9 and there -have not been that many capturing subpatterns, PCRE re-reads up to three octal -digits following the backslash, and generates a single byte from the least -significant 8 bits of the value. Any subsequent digits stand for themselves. -For example: - - \\040 is another way of writing a space - \\40 is the same, provided there are fewer than 40 - previous capturing subpatterns - \\7 is always a back reference - \\11 might be a back reference, or another way of - writing a tab - \\011 is always a tab - \\0113 is a tab followed by the character "3" - \\113 is the character with octal code 113 (since there - can be no more than 99 back references) - \\377 is a byte consisting entirely of 1 bits - \\81 is either a back reference, or a binary zero - followed by the two characters "8" and "1" - -Note that octal values of 100 or greater must not be introduced by a leading -zero, because no more than three octal digits are ever read. - -All the sequences that define a single byte value can be used both inside and -outside character classes. In addition, inside a character class, the sequence -"\\b" is interpreted as the backspace character (hex 08). Outside a character -class it has a different meaning (see below). - -The third use of backslash is for specifying generic character types: - - \\d any decimal digit - \\D any character that is not a decimal digit - \\s any whitespace character - \\S any character that is not a whitespace character - \\w any "word" character - \\W any "non-word" character - -Each pair of escape sequences partitions the complete set of characters into -two disjoint sets. Any given character matches one, and only one, of each pair. - -A "word" character is any letter or digit or the underscore character, that is, -any character which can be part of a Perl "word". The definition of letters and -digits is controlled by PCRE's character tables, and may vary if locale- -specific matching is taking place (see "Locale support" above). For example, in -the "fr" (French) locale, some character codes greater than 128 are used for -accented letters, and these are matched by \\w. - -These character type sequences can appear both inside and outside character -classes. They each match one character of the appropriate type. If the current -matching point is at the end of the subject string, all of them fail, since -there is no character to match. - -The fourth use of backslash is for certain simple assertions. An assertion -specifies a condition that has to be met at a particular point in a match, -without consuming any characters from the subject string. The use of -subpatterns for more complicated assertions is described below. The backslashed -assertions are - - \\b word boundary - \\B not a word boundary - \\A start of subject (independent of multiline mode) - \\Z end of subject or newline at end (independent of multiline mode) - \\z end of subject (independent of multiline mode) - -These assertions may not appear in character classes (but note that "\\b" has a -different meaning, namely the backspace character, inside a character class). - -A word boundary is a position in the subject string where the current character -and the previous character do not both match \\w or \\W (i.e. one matches -\\w and the other matches \\W), or the start or end of the string if the -first or last character matches \\w, respectively. - -The \\A, \\Z, and \\z assertions differ from the traditional circumflex and -dollar (described below) in that they only ever match at the very start and end -of the subject string, whatever options are set. They are not affected by the -PCRE_NOTBOL or PCRE_NOTEOL options. If the \fIstartoffset\fR argument of -\fBpcre_exec()\fR is non-zero, \\A can never match. The difference between \\Z -and \\z is that \\Z matches before a newline that is the last character of the -string as well as at the end of the string, whereas \\z matches only at the -end. - - -.SH CIRCUMFLEX AND DOLLAR -Outside a character class, in the default matching mode, the circumflex -character is an assertion which is true only if the current matching point is -at the start of the subject string. If the \fIstartoffset\fR argument of -\fBpcre_exec()\fR is non-zero, circumflex can never match. Inside a character -class, circumflex has an entirely different meaning (see below). - -Circumflex need not be the first character of the pattern if a number of -alternatives are involved, but it should be the first thing in each alternative -in which it appears if the pattern is ever to match that branch. If all -possible alternatives start with a circumflex, that is, if the pattern is -constrained to match only at the start of the subject, it is said to be an -"anchored" pattern. (There are also other constructs that can cause a pattern -to be anchored.) - -A dollar character is an assertion which is true only if the current matching -point is at the end of the subject string, or immediately before a newline -character that is the last character in the string (by default). Dollar need -not be the last character of the pattern if a number of alternatives are -involved, but it should be the last item in any branch in which it appears. -Dollar has no special meaning in a character class. - -The meaning of dollar can be changed so that it matches only at the very end of -the string, by setting the PCRE_DOLLAR_ENDONLY option at compile or matching -time. This does not affect the \\Z assertion. - -The meanings of the circumflex and dollar characters are changed if the -PCRE_MULTILINE option is set. When this is the case, they match immediately -after and immediately before an internal "\\n" character, respectively, in -addition to matching at the start and end of the subject string. For example, -the pattern /^abc$/ matches the subject string "def\\nabc" in multiline mode, -but not otherwise. Consequently, patterns that are anchored in single line mode -because all branches start with "^" are not anchored in multiline mode, and a -match for circumflex is possible when the \fIstartoffset\fR argument of -\fBpcre_exec()\fR is non-zero. The PCRE_DOLLAR_ENDONLY option is ignored if -PCRE_MULTILINE is set. - -Note that the sequences \\A, \\Z, and \\z can be used to match the start and -end of the subject in both modes, and if all branches of a pattern start with -\\A is it always anchored, whether PCRE_MULTILINE is set or not. - - -.SH FULL STOP (PERIOD, DOT) -Outside a character class, a dot in the pattern matches any one character in -the subject, including a non-printing character, but not (by default) newline. -If the PCRE_DOTALL option is set, dots match newlines as well. The handling of -dot is entirely independent of the handling of circumflex and dollar, the only -relationship being that they both involve newline characters. Dot has no -special meaning in a character class. - - -.SH SQUARE BRACKETS -An opening square bracket introduces a character class, terminated by a closing -square bracket. A closing square bracket on its own is not special. If a -closing square bracket is required as a member of the class, it should be the -first data character in the class (after an initial circumflex, if present) or -escaped with a backslash. - -A character class matches a single character in the subject; the character must -be in the set of characters defined by the class, unless the first character in -the class is a circumflex, in which case the subject character must not be in -the set defined by the class. If a circumflex is actually required as a member -of the class, ensure it is not the first character, or escape it with a -backslash. - -For example, the character class [aeiou] matches any lower case vowel, while -[^aeiou] matches any character that is not a lower case vowel. Note that a -circumflex is just a convenient notation for specifying the characters which -are in the class by enumerating those that are not. It is not an assertion: it -still consumes a character from the subject string, and fails if the current -pointer is at the end of the string. - -When caseless matching is set, any letters in a class represent both their -upper case and lower case versions, so for example, a caseless [aeiou] matches -"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a -caseful version would. - -The newline character is never treated in any special way in character classes, -whatever the setting of the PCRE_DOTALL or PCRE_MULTILINE options is. A class -such as [^a] will always match a newline. - -The minus (hyphen) character can be used to specify a range of characters in a -character class. For example, [d-m] matches any letter between d and m, -inclusive. If a minus character is required in a class, it must be escaped with -a backslash or appear in a position where it cannot be interpreted as -indicating a range, typically as the first or last character in the class. - -It is not possible to have the literal character "]" as the end character of a -range. A pattern such as [W-]46] is interpreted as a class of two characters -("W" and "-") followed by a literal string "46]", so it would match "W46]" or -"-46]". However, if the "]" is escaped with a backslash it is interpreted as -the end of range, so [W-\\]46] is interpreted as a single class containing a -range followed by two separate characters. The octal or hexadecimal -representation of "]" can also be used to end a range. - -Ranges operate in ASCII collating sequence. They can also be used for -characters specified numerically, for example [\\000-\\037]. If a range that -includes letters is used when caseless matching is set, it matches the letters -in either case. For example, [W-c] is equivalent to [][\\^_`wxyzabc], matched -caselessly, and if character tables for the "fr" locale are in use, -[\\xc8-\\xcb] matches accented E characters in both cases. - -The character types \\d, \\D, \\s, \\S, \\w, and \\W may also appear in a -character class, and add the characters that they match to the class. For -example, [\\dABCDEF] matches any hexadecimal digit. A circumflex can -conveniently be used with the upper case character types to specify a more -restricted set of characters than the matching lower case type. For example, -the class [^\\W_] matches any letter or digit, but not underscore. - -All non-alphameric characters other than \\, -, ^ (at the start) and the -terminating ] are non-special in character classes, but it does no harm if they -are escaped. - - -.SH POSIX CHARACTER CLASSES -Perl 5.6 (not yet released at the time of writing) is going to support the -POSIX notation for character classes, which uses names enclosed by [: and :] -within the enclosing square brackets. PCRE supports this notation. For example, - - [01[:alpha:]%] - -matches "0", "1", any alphabetic character, or "%". The supported class names -are - - alnum letters and digits - alpha letters - ascii character codes 0 - 127 - cntrl control characters - digit decimal digits (same as \\d) - graph printing characters, excluding space - lower lower case letters - print printing characters, including space - punct printing characters, excluding letters and digits - space white space (same as \\s) - upper upper case letters - word "word" characters (same as \\w) - xdigit hexadecimal digits - -The names "ascii" and "word" are Perl extensions. Another Perl extension is -negation, which is indicated by a ^ character after the colon. For example, - - [12[:^digit:]] - -matches "1", "2", or any non-digit. PCRE (and Perl) also recogize the POSIX -syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not -supported, and an error is given if they are encountered. - - -.SH VERTICAL BAR -Vertical bar characters are used to separate alternative patterns. For example, -the pattern - - gilbert|sullivan - -matches either "gilbert" or "sullivan". Any number of alternatives may appear, -and an empty alternative is permitted (matching the empty string). -The matching process tries each alternative in turn, from left to right, -and the first one that succeeds is used. If the alternatives are within a -subpattern (defined below), "succeeds" means matching the rest of the main -pattern as well as the alternative in the subpattern. - - -.SH INTERNAL OPTION SETTING -The settings of PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and PCRE_EXTENDED -can be changed from within the pattern by a sequence of Perl option letters -enclosed between "(?" and ")". The option letters are - - i for PCRE_CASELESS - m for PCRE_MULTILINE - s for PCRE_DOTALL - x for PCRE_EXTENDED - -For example, (?im) sets caseless, multiline matching. It is also possible to -unset these options by preceding the letter with a hyphen, and a combined -setting and unsetting such as (?im-sx), which sets PCRE_CASELESS and -PCRE_MULTILINE while unsetting PCRE_DOTALL and PCRE_EXTENDED, is also -permitted. If a letter appears both before and after the hyphen, the option is -unset. - -The scope of these option changes depends on where in the pattern the setting -occurs. For settings that are outside any subpattern (defined below), the -effect is the same as if the options were set or unset at the start of -matching. The following patterns all behave in exactly the same way: - - (?i)abc - a(?i)bc - ab(?i)c - abc(?i) - -which in turn is the same as compiling the pattern abc with PCRE_CASELESS set. -In other words, such "top level" settings apply to the whole pattern (unless -there are other changes inside subpatterns). If there is more than one setting -of the same option at top level, the rightmost setting is used. - -If an option change occurs inside a subpattern, the effect is different. This -is a change of behaviour in Perl 5.005. An option change inside a subpattern -affects only that part of the subpattern that follows it, so - - (a(?i)b)c - -matches abc and aBc and no other strings (assuming PCRE_CASELESS is not used). -By this means, options can be made to have different settings in different -parts of the pattern. Any changes made in one alternative do carry on -into subsequent branches within the same subpattern. For example, - - (a(?i)b|c) - -matches "ab", "aB", "c", and "C", even though when matching "C" the first -branch is abandoned before the option setting. This is because the effects of -option settings happen at compile time. There would be some very weird -behaviour otherwise. - -The PCRE-specific options PCRE_UNGREEDY and PCRE_EXTRA can be changed in the -same way as the Perl-compatible options by using the characters U and X -respectively. The (?X) flag setting is special in that it must always occur -earlier in the pattern than any of the additional features it turns on, even -when it is at top level. It is best put at the start. - - -.SH SUBPATTERNS -Subpatterns are delimited by parentheses (round brackets), which can be nested. -Marking part of a pattern as a subpattern does two things: - -1. It localizes a set of alternatives. For example, the pattern - - cat(aract|erpillar|) - -matches one of the words "cat", "cataract", or "caterpillar". Without the -parentheses, it would match "cataract", "erpillar" or the empty string. - -2. It sets up the subpattern as a capturing subpattern (as defined above). -When the whole pattern matches, that portion of the subject string that matched -the subpattern is passed back to the caller via the \fIovector\fR argument of -\fBpcre_exec()\fR. Opening parentheses are counted from left to right (starting -from 1) to obtain the numbers of the capturing subpatterns. - -For example, if the string "the red king" is matched against the pattern - - the ((red|white) (king|queen)) - -the captured substrings are "red king", "red", and "king", and are numbered 1, -2, and 3. - -The fact that plain parentheses fulfil two functions is not always helpful. -There are often times when a grouping subpattern is required without a -capturing requirement. If an opening parenthesis is followed by "?:", the -subpattern does not do any capturing, and is not counted when computing the -number of any subsequent capturing subpatterns. For example, if the string "the -white queen" is matched against the pattern - - the ((?:red|white) (king|queen)) - -the captured substrings are "white queen" and "queen", and are numbered 1 and -2. The maximum number of captured substrings is 99, and the maximum number of -all subpatterns, both capturing and non-capturing, is 200. - -As a convenient shorthand, if any option settings are required at the start of -a non-capturing subpattern, the option letters may appear between the "?" and -the ":". Thus the two patterns - - (?i:saturday|sunday) - (?:(?i)saturday|sunday) - -match exactly the same set of strings. Because alternative branches are tried -from left to right, and options are not reset until the end of the subpattern -is reached, an option setting in one branch does affect subsequent branches, so -the above patterns match "SUNDAY" as well as "Saturday". - - -.SH REPETITION -Repetition is specified by quantifiers, which can follow any of the following -items: - - a single character, possibly escaped - the . metacharacter - a character class - a back reference (see next section) - a parenthesized subpattern (unless it is an assertion - see below) - -The general repetition quantifier specifies a minimum and maximum number of -permitted matches, by giving the two numbers in curly brackets (braces), -separated by a comma. The numbers must be less than 65536, and the first must -be less than or equal to the second. For example: - - z{2,4} - -matches "zz", "zzz", or "zzzz". A closing brace on its own is not a special -character. If the second number is omitted, but the comma is present, there is -no upper limit; if the second number and the comma are both omitted, the -quantifier specifies an exact number of required matches. Thus - - [aeiou]{3,} - -matches at least 3 successive vowels, but may match many more, while - - \\d{8} - -matches exactly 8 digits. An opening curly bracket that appears in a position -where a quantifier is not allowed, or one that does not match the syntax of a -quantifier, is taken as a literal character. For example, {,6} is not a -quantifier, but a literal string of four characters. - -The quantifier {0} is permitted, causing the expression to behave as if the -previous item and the quantifier were not present. - -For convenience (and historical compatibility) the three most common -quantifiers have single-character abbreviations: - - * is equivalent to {0,} - + is equivalent to {1,} - ? is equivalent to {0,1} - -It is possible to construct infinite loops by following a subpattern that can -match no characters with a quantifier that has no upper limit, for example: - - (a?)* - -Earlier versions of Perl and PCRE used to give an error at compile time for -such patterns. However, because there are cases where this can be useful, such -patterns are now accepted, but if any repetition of the subpattern does in fact -match no characters, the loop is forcibly broken. - -By default, the quantifiers are "greedy", that is, they match as much as -possible (up to the maximum number of permitted times), without causing the -rest of the pattern to fail. The classic example of where this gives problems -is in trying to match comments in C programs. These appear between the -sequences /* and */ and within the sequence, individual * and / characters may -appear. An attempt to match C comments by applying the pattern - - /\\*.*\\*/ - -to the string - - /* first command */ not comment /* second comment */ - -fails, because it matches the entire string owing to the greediness of the .* -item. - -However, if a quantifier is followed by a question mark, it ceases to be -greedy, and instead matches the minimum number of times possible, so the -pattern - - /\\*.*?\\*/ - -does the right thing with the C comments. The meaning of the various -quantifiers is not otherwise changed, just the preferred number of matches. -Do not confuse this use of question mark with its use as a quantifier in its -own right. Because it has two uses, it can sometimes appear doubled, as in - - \\d??\\d - -which matches one digit by preference, but can match two if that is the only -way the rest of the pattern matches. - -If the PCRE_UNGREEDY option is set (an option which is not available in Perl), -the quantifiers are not greedy by default, but individual ones can be made -greedy by following them with a question mark. In other words, it inverts the -default behaviour. - -When a parenthesized subpattern is quantified with a minimum repeat count that -is greater than 1 or with a limited maximum, more store is required for the -compiled pattern, in proportion to the size of the minimum or maximum. - -If a pattern starts with .* or .{0,} and the PCRE_DOTALL option (equivalent -to Perl's /s) is set, thus allowing the . to match newlines, the pattern is -implicitly anchored, because whatever follows will be tried against every -character position in the subject string, so there is no point in retrying the -overall match at any position after the first. PCRE treats such a pattern as -though it were preceded by \\A. In cases where it is known that the subject -string contains no newlines, it is worth setting PCRE_DOTALL when the pattern -begins with .* in order to obtain this optimization, or alternatively using ^ -to indicate anchoring explicitly. - -When a capturing subpattern is repeated, the value captured is the substring -that matched the final iteration. For example, after - - (tweedle[dume]{3}\\s*)+ - -has matched "tweedledum tweedledee" the value of the captured substring is -"tweedledee". However, if there are nested capturing subpatterns, the -corresponding captured values may have been set in previous iterations. For -example, after - - /(a|(b))+/ - -matches "aba" the value of the second captured substring is "b". - - -.SH BACK REFERENCES -Outside a character class, a backslash followed by a digit greater than 0 (and -possibly further digits) is a back reference to a capturing subpattern earlier -(i.e. to its left) in the pattern, provided there have been that many previous -capturing left parentheses. - -However, if the decimal number following the backslash is less than 10, it is -always taken as a back reference, and causes an error only if there are not -that many capturing left parentheses in the entire pattern. In other words, the -parentheses that are referenced need not be to the left of the reference for -numbers less than 10. See the section entitled "Backslash" above for further -details of the handling of digits following a backslash. - -A back reference matches whatever actually matched the capturing subpattern in -the current subject string, rather than anything matching the subpattern -itself. So the pattern - - (sens|respons)e and \\1ibility - -matches "sense and sensibility" and "response and responsibility", but not -"sense and responsibility". If caseful matching is in force at the time of the -back reference, the case of letters is relevant. For example, - - ((?i)rah)\\s+\\1 - -matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original -capturing subpattern is matched caselessly. - -There may be more than one back reference to the same subpattern. If a -subpattern has not actually been used in a particular match, any back -references to it always fail. For example, the pattern - - (a|(bc))\\2 - -always fails if it starts to match "a" rather than "bc". Because there may be -up to 99 back references, all digits following the backslash are taken -as part of a potential back reference number. If the pattern continues with a -digit character, some delimiter must be used to terminate the back reference. -If the PCRE_EXTENDED option is set, this can be whitespace. Otherwise an empty -comment can be used. - -A back reference that occurs inside the parentheses to which it refers fails -when the subpattern is first used, so, for example, (a\\1) never matches. -However, such references can be useful inside repeated subpatterns. For -example, the pattern - - (a|b\\1)+ - -matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration of -the subpattern, the back reference matches the character string corresponding -to the previous iteration. In order for this to work, the pattern must be such -that the first iteration does not need to match the back reference. This can be -done using alternation, as in the example above, or by a quantifier with a -minimum of zero. - - -.SH ASSERTIONS -An assertion is a test on the characters following or preceding the current -matching point that does not actually consume any characters. The simple -assertions coded as \\b, \\B, \\A, \\Z, \\z, ^ and $ are described above. More -complicated assertions are coded as subpatterns. There are two kinds: those -that look ahead of the current position in the subject string, and those that -look behind it. - -An assertion subpattern is matched in the normal way, except that it does not -cause the current matching position to be changed. Lookahead assertions start -with (?= for positive assertions and (?! for negative assertions. For example, - - \\w+(?=;) - -matches a word followed by a semicolon, but does not include the semicolon in -the match, and - - foo(?!bar) - -matches any occurrence of "foo" that is not followed by "bar". Note that the -apparently similar pattern - - (?!foo)bar - -does not find an occurrence of "bar" that is preceded by something other than -"foo"; it finds any occurrence of "bar" whatsoever, because the assertion -(?!foo) is always true when the next three characters are "bar". A -lookbehind assertion is needed to achieve this effect. - -Lookbehind assertions start with (?<= for positive assertions and (? as in this example: - - (?>\\d+)bar - -This kind of parenthesis "locks up" the part of the pattern it contains once -it has matched, and a failure further into the pattern is prevented from -backtracking into it. Backtracking past it to previous items, however, works as -normal. - -An alternative description is that a subpattern of this type matches the string -of characters that an identical standalone pattern would match, if anchored at -the current point in the subject string. - -Once-only subpatterns are not capturing subpatterns. Simple cases such as the -above example can be thought of as a maximizing repeat that must swallow -everything it can. So, while both \\d+ and \\d+? are prepared to adjust the -number of digits they match in order to make the rest of the pattern match, -(?>\\d+) can only match an entire sequence of digits. - -This construction can of course contain arbitrarily complicated subpatterns, -and it can be nested. - -Once-only subpatterns can be used in conjunction with lookbehind assertions to -specify efficient matching at the end of the subject string. Consider a simple -pattern such as - - abcd$ - -when applied to a long string which does not match. Because matching proceeds -from left to right, PCRE will look for each "a" in the subject and then see if -what follows matches the rest of the pattern. If the pattern is specified as - - ^.*abcd$ - -the initial .* matches the entire string at first, but when this fails (because -there is no following "a"), it backtracks to match all but the last character, -then all but the last two characters, and so on. Once again the search for "a" -covers the entire string, from right to left, so we are no better off. However, -if the pattern is written as - - ^(?>.*)(?<=abcd) - -there can be no backtracking for the .* item; it can match only the entire -string. The subsequent lookbehind assertion does a single test on the last four -characters. If it fails, the match fails immediately. For long strings, this -approach makes a significant difference to the processing time. - -When a pattern contains an unlimited repeat inside a subpattern that can itself -be repeated an unlimited number of times, the use of a once-only subpattern is -the only way to avoid some failing matches taking a very long time indeed. -The pattern - - (\\D+|<\\d+>)*[!?] - -matches an unlimited number of substrings that either consist of non-digits, or -digits enclosed in <>, followed by either ! or ?. When it matches, it runs -quickly. However, if it is applied to - - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa - -it takes a long time before reporting failure. This is because the string can -be divided between the two repeats in a large number of ways, and all have to -be tried. (The example used [!?] rather than a single character at the end, -because both PCRE and Perl have an optimization that allows for fast failure -when a single character is used. They remember the last single character that -is required for a match, and fail early if it is not present in the string.) -If the pattern is changed to - - ((?>\\D+)|<\\d+>)*[!?] - -sequences of non-digits cannot be broken, and failure happens quickly. - - -.SH CONDITIONAL SUBPATTERNS -It is possible to cause the matching process to obey a subpattern -conditionally or to choose between two alternative subpatterns, depending on -the result of an assertion, or whether a previous capturing subpattern matched -or not. The two possible forms of conditional subpattern are - - (?(condition)yes-pattern) - (?(condition)yes-pattern|no-pattern) - -If the condition is satisfied, the yes-pattern is used; otherwise the -no-pattern (if present) is used. If there are more than two alternatives in the -subpattern, a compile-time error occurs. - -There are two kinds of condition. If the text between the parentheses consists -of a sequence of digits, the condition is satisfied if the capturing subpattern -of that number has previously matched. The number must be greater than zero. -Consider the following pattern, which contains non-significant white space to -make it more readable (assume the PCRE_EXTENDED option) and to divide it into -three parts for ease of discussion: - - ( \\( )? [^()]+ (?(1) \\) ) - -The first part matches an optional opening parenthesis, and if that -character is present, sets it as the first captured substring. The second part -matches one or more characters that are not parentheses. The third part is a -conditional subpattern that tests whether the first set of parentheses matched -or not. If they did, that is, if subject started with an opening parenthesis, -the condition is true, and so the yes-pattern is executed and a closing -parenthesis is required. Otherwise, since no-pattern is not present, the -subpattern matches nothing. In other words, this pattern matches a sequence of -non-parentheses, optionally enclosed in parentheses. - -If the condition is not a sequence of digits, it must be an assertion. This may -be a positive or negative lookahead or lookbehind assertion. Consider this -pattern, again containing non-significant white space, and with the two -alternatives on the second line: - - (?(?=[^a-z]*[a-z]) - \\d{2}-[a-z]{3}-\\d{2} | \\d{2}-\\d{2}-\\d{2} ) - -The condition is a positive lookahead assertion that matches an optional -sequence of non-letters followed by a letter. In other words, it tests for the -presence of at least one letter in the subject. If a letter is found, the -subject is matched against the first alternative; otherwise it is matched -against the second. This pattern matches strings in one of the two forms -dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits. - - -.SH COMMENTS -The sequence (?# marks the start of a comment which continues up to the next -closing parenthesis. Nested parentheses are not permitted. The characters -that make up a comment play no part in the pattern matching at all. - -If the PCRE_EXTENDED option is set, an unescaped # character outside a -character class introduces a comment that continues up to the next newline -character in the pattern. - - -.SH RECURSIVE PATTERNS -Consider the problem of matching a string in parentheses, allowing for -unlimited nested parentheses. Without the use of recursion, the best that can -be done is to use a pattern that matches up to some fixed depth of nesting. It -is not possible to handle an arbitrary nesting depth. Perl 5.6 has provided an -experimental facility that allows regular expressions to recurse (amongst other -things). It does this by interpolating Perl code in the expression at run time, -and the code can refer to the expression itself. A Perl pattern to solve the -parentheses problem can be created like this: - - $re = qr{\\( (?: (?>[^()]+) | (?p{$re}) )* \\)}x; - -The (?p{...}) item interpolates Perl code at run time, and in this case refers -recursively to the pattern in which it appears. Obviously, PCRE cannot support -the interpolation of Perl code. Instead, the special item (?R) is provided for -the specific case of recursion. This PCRE pattern solves the parentheses -problem (assume the PCRE_EXTENDED option is set so that white space is -ignored): - - \\( ( (?>[^()]+) | (?R) )* \\) - -First it matches an opening parenthesis. Then it matches any number of -substrings which can either be a sequence of non-parentheses, or a recursive -match of the pattern itself (i.e. a correctly parenthesized substring). Finally -there is a closing parenthesis. - -This particular example pattern contains nested unlimited repeats, and so the -use of a once-only subpattern for matching strings of non-parentheses is -important when applying the pattern to strings that do not match. For example, -when it is applied to - - (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa() - -it yields "no match" quickly. However, if a once-only subpattern is not used, -the match runs for a very long time indeed because there are so many different -ways the + and * repeats can carve up the subject, and all have to be tested -before failure can be reported. - -The values set for any capturing subpatterns are those from the outermost level -of the recursion at which the subpattern value is set. If the pattern above is -matched against - - (ab(cd)ef) - -the value for the capturing parentheses is "ef", which is the last value taken -on at the top level. If additional parentheses are added, giving - - \\( ( ( (?>[^()]+) | (?R) )* ) \\) - ^ ^ - ^ ^ -the string they capture is "ab(cd)ef", the contents of the top level -parentheses. If there are more than 15 capturing parentheses in a pattern, PCRE -has to obtain extra memory to store data during a recursion, which it does by -using \fBpcre_malloc\fR, freeing it via \fBpcre_free\fR afterwards. If no -memory can be obtained, it saves data for the first 15 capturing parentheses -only, as there is no way to give an out-of-memory error from within a -recursion. - - -.SH PERFORMANCE -Certain items that may appear in patterns are more efficient than others. It is -more efficient to use a character class like [aeiou] than a set of alternatives -such as (a|e|i|o|u). In general, the simplest construction that provides the -required behaviour is usually the most efficient. Jeffrey Friedl's book -contains a lot of discussion about optimizing regular expressions for efficient -performance. - -When a pattern begins with .* and the PCRE_DOTALL option is set, the pattern is -implicitly anchored by PCRE, since it can match only at the start of a subject -string. However, if PCRE_DOTALL is not set, PCRE cannot make this optimization, -because the . metacharacter does not then match a newline, and if the subject -string contains newlines, the pattern may match from the character immediately -following one of them instead of from the very start. For example, the pattern - - (.*) second - -matches the subject "first\\nand second" (where \\n stands for a newline -character) with the first captured substring being "and". In order to do this, -PCRE has to retry the match starting after every newline in the subject. - -If you are using such a pattern with subject strings that do not contain -newlines, the best performance is obtained by setting PCRE_DOTALL, or starting -the pattern with ^.* to indicate explicit anchoring. That saves PCRE from -having to scan along the subject looking for a newline to restart at. - -Beware of patterns that contain nested indefinite repeats. These can take a -long time to run when applied to a string that does not match. Consider the -pattern fragment - - (a+)* - -This can match "aaaa" in 33 different ways, and this number increases very -rapidly as the string gets longer. (The * repeat can match 0, 1, 2, 3, or 4 -times, and for each of those cases other than 0, the + repeats can match -different numbers of times.) When the remainder of the pattern is such that the -entire match is going to fail, PCRE has in principle to try every possible -variation, and this can take an extremely long time. - -An optimization catches some of the more simple cases such as - - (a+)*b - -where a literal character follows. Before embarking on the standard matching -procedure, PCRE checks that there is a "b" later in the subject string, and if -there is not, it fails the match immediately. However, when there is no -following literal this optimization cannot be used. You can see the difference -by comparing the behaviour of - - (a+)*\\d - -with the pattern above. The former gives a failure almost instantly when -applied to a whole line of "a" characters, whereas the latter takes an -appreciable time with strings longer than about 20 characters. - - -.SH UTF-8 SUPPORT -Starting at release 3.3, PCRE has some support for character strings encoded -in the UTF-8 format. This is incomplete, and is regarded as experimental. In -order to use it, you must configure PCRE to include UTF-8 support in the code, -and, in addition, you must call \fBpcre_compile()\fR with the PCRE_UTF8 option -flag. When you do this, both the pattern and any subject strings that are -matched against it are treated as UTF-8 strings instead of just strings of -bytes, but only in the cases that are mentioned below. - -If you compile PCRE with UTF-8 support, but do not use it at run time, the -library will be a bit bigger, but the additional run time overhead is limited -to testing the PCRE_UTF8 flag in several places, so should not be very large. - -PCRE assumes that the strings it is given contain valid UTF-8 codes. It does -not diagnose invalid UTF-8 strings. If you pass invalid UTF-8 strings to PCRE, -the results are undefined. - -Running with PCRE_UTF8 set causes these changes in the way PCRE works: - -1. In a pattern, the escape sequence \\x{...}, where the contents of the braces -is a string of hexadecimal digits, is interpreted as a UTF-8 character whose -code number is the given hexadecimal number, for example: \\x{1234}. This -inserts from one to six literal bytes into the pattern, using the UTF-8 -encoding. If a non-hexadecimal digit appears between the braces, the item is -not recognized. - -2. The original hexadecimal escape sequence, \\xhh, generates a two-byte UTF-8 -character if its value is greater than 127. - -3. Repeat quantifiers are NOT correctly handled if they follow a multibyte -character. For example, \\x{100}* and \\xc3+ do not work. If you want to -repeat such characters, you must enclose them in non-capturing parentheses, -for example (?:\\x{100}), at present. - -4. The dot metacharacter matches one UTF-8 character instead of a single byte. - -5. Unlike literal UTF-8 characters, the dot metacharacter followed by a -repeat quantifier does operate correctly on UTF-8 characters instead of -single bytes. - -4. Although the \\x{...} escape is permitted in a character class, characters -whose values are greater than 255 cannot be included in a class. - -5. A class is matched against a UTF-8 character instead of just a single byte, -but it can match only characters whose values are less than 256. Characters -with greater values always fail to match a class. - -6. Repeated classes work correctly on multiple characters. - -7. Classes containing just a single character whose value is greater than 127 -(but less than 256), for example, [\\x80] or [^\\x{93}], do not work because -these are optimized into single byte matches. In the first case, of course, -the class brackets are just redundant. - -8. Lookbehind assertions move backwards in the subject by a fixed number of -characters instead of a fixed number of bytes. Simple cases have been tested -to work correctly, but there may be hidden gotchas herein. - -9. The character types such as \\d and \\w do not work correctly with UTF-8 -characters. They continue to test a single byte. - -10. Anything not explicitly mentioned here continues to work in bytes rather -than in characters. - -The following UTF-8 features of Perl 5.6 are not implemented: - -1. The escape sequence \\C to match a single byte. - -2. The use of Unicode tables and properties and escapes \\p, \\P, and \\X. - -.SH AUTHOR -Philip Hazel -.br -University Computing Service, -.br -New Museums Site, -.br -Cambridge CB2 3QG, England. -.br -Phone: +44 1223 334714 - -Last updated: 28 August 2000, -.br - the 250th anniversary of the death of J.S. Bach. -.br -Copyright (c) 1997-2000 University of Cambridge. diff --git a/pcre/doc/pcre.html b/pcre/doc/pcre.html deleted file mode 100644 index b12b2126..00000000 --- a/pcre/doc/pcre.html +++ /dev/null @@ -1,2397 +0,0 @@ - - -pcre specification - - -

pcre specification

-This HTML document has been generated automatically from the original man page. -If there is any nonsense in it, please consult the man page in case the -conversion went wrong. - -
  • NAME -

    -pcre - Perl-compatible regular expressions. -

    -
  • SYNOPSIS -

    -#include <pcre.h> -

    -

    -pcre *pcre_compile(const char *pattern, int options, -const char **errptr, int *erroffset, -const unsigned char *tableptr); -

    -

    -pcre_extra *pcre_study(const pcre *code, int options, -const char **errptr); -

    -

    -int pcre_exec(const pcre *code, const pcre_extra *extra, -const char *subject, int length, int startoffset, -int options, int *ovector, int ovecsize); -

    -

    -int pcre_copy_substring(const char *subject, int *ovector, -int stringcount, int stringnumber, char *buffer, -int buffersize); -

    -

    -int pcre_get_substring(const char *subject, int *ovector, -int stringcount, int stringnumber, -const char **stringptr); -

    -

    -int pcre_get_substring_list(const char *subject, -int *ovector, int stringcount, const char ***listptr); -

    -

    -void pcre_free_substring(const char *stringptr); -

    -

    -void pcre_free_substring_list(const char **stringptr); -

    -

    -const unsigned char *pcre_maketables(void); -

    -

    -int pcre_fullinfo(const pcre *code, const pcre_extra *extra, -int what, void *where); -

    -

    -int pcre_info(const pcre *code, int *optptr, int -*firstcharptr); -

    -

    -char *pcre_version(void); -

    -

    -void *(*pcre_malloc)(size_t); -

    -

    -void (*pcre_free)(void *); -

    -
  • DESCRIPTION -

    -The PCRE library is a set of functions that implement regular expression -pattern matching using the same syntax and semantics as Perl 5, with just a few -differences (see below). The current implementation corresponds to Perl 5.005, -with some additional features from later versions. This includes some -experimental, incomplete support for UTF-8 encoded strings. Details of exactly -what is and what is not supported are given below. -

    -

    -PCRE has its own native API, which is described in this document. There is also -a set of wrapper functions that correspond to the POSIX regular expression API. -These are described in the pcreposix documentation. -

    -

    -The native API function prototypes are defined in the header file pcre.h, -and on Unix systems the library itself is called libpcre.a, so can be -accessed by adding -lpcre to the command for linking an application which -calls it. The header file defines the macros PCRE_MAJOR and PCRE_MINOR to -contain the major and minor release numbers for the library. Applications can -use these to include support for different releases. -

    -

    -The functions pcre_compile(), pcre_study(), and pcre_exec() -are used for compiling and matching regular expressions. -

    -

    -The functions pcre_copy_substring(), pcre_get_substring(), and -pcre_get_substring_list() are convenience functions for extracting -captured substrings from a matched subject string; pcre_free_substring() -and pcre_free_substring_list() are also provided, to free the memory used -for extracted strings. -

    -

    -The function pcre_maketables() is used (optionally) to build a set of -character tables in the current locale for passing to pcre_compile(). -

    -

    -The function pcre_fullinfo() is used to find out information about a -compiled pattern; pcre_info() is an obsolete version which returns only -some of the available information, but is retained for backwards compatibility. -The function pcre_version() returns a pointer to a string containing the -version of PCRE and its date of release. -

    -

    -The global variables pcre_malloc and pcre_free initially contain -the entry points of the standard malloc() and free() functions -respectively. PCRE calls the memory management functions via these variables, -so a calling program can replace them if it wishes to intercept the calls. This -should be done before calling any PCRE functions. -

    -
  • MULTI-THREADING -

    -The PCRE functions can be used in multi-threading applications, with the -proviso that the memory management functions pointed to by pcre_malloc -and pcre_free are shared by all threads. -

    -

    -The compiled form of a regular expression is not altered during matching, so -the same compiled pattern can safely be used by several threads at once. -

    -
  • COMPILING A PATTERN -

    -The function pcre_compile() is called to compile a pattern into an -internal form. The pattern is a C string terminated by a binary zero, and -is passed in the argument pattern. A pointer to a single block of memory -that is obtained via pcre_malloc is returned. This contains the -compiled code and related data. The pcre type is defined for this for -convenience, but in fact pcre is just a typedef for void, since the -contents of the block are not externally defined. It is up to the caller to -free the memory when it is no longer required. -

    -

    -The size of a compiled pattern is roughly proportional to the length of the -pattern string, except that each character class (other than those containing -just a single character, negated or not) requires 33 bytes, and repeat -quantifiers with a minimum greater than one or a bounded maximum cause the -relevant portions of the compiled pattern to be replicated. -

    -

    -The options argument contains independent bits that affect the -compilation. It should be zero if no options are required. Some of the options, -in particular, those that are compatible with Perl, can also be set and unset -from within the pattern (see the detailed description of regular expressions -below). For these options, the contents of the options argument specifies -their initial settings at the start of compilation and execution. The -PCRE_ANCHORED option can be set at the time of matching as well as at compile -time. -

    -

    -If errptr is NULL, pcre_compile() returns NULL immediately. -Otherwise, if compilation of a pattern fails, pcre_compile() returns -NULL, and sets the variable pointed to by errptr to point to a textual -error message. The offset from the start of the pattern to the character where -the error was discovered is placed in the variable pointed to by -erroffset, which must not be NULL. If it is, an immediate error is given. -

    -

    -If the final argument, tableptr, is NULL, PCRE uses a default set of -character tables which are built when it is compiled, using the default C -locale. Otherwise, tableptr must be the result of a call to -pcre_maketables(). See the section on locale support below. -

    -

    -The following option bits are defined in the header file: -

    -

    -

    -  PCRE_ANCHORED
    -
    -

    -

    -If this bit is set, the pattern is forced to be "anchored", that is, it is -constrained to match only at the start of the string which is being searched -(the "subject string"). This effect can also be achieved by appropriate -constructs in the pattern itself, which is the only way to do it in Perl. -

    -

    -

    -  PCRE_CASELESS
    -
    -

    -

    -If this bit is set, letters in the pattern match both upper and lower case -letters. It is equivalent to Perl's /i option. -

    -

    -

    -  PCRE_DOLLAR_ENDONLY
    -
    -

    -

    -If this bit is set, a dollar metacharacter in the pattern matches only at the -end of the subject string. Without this option, a dollar also matches -immediately before the final character if it is a newline (but not before any -other newlines). The PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE is -set. There is no equivalent to this option in Perl. -

    -

    -

    -  PCRE_DOTALL
    -
    -

    -

    -If this bit is set, a dot metacharater in the pattern matches all characters, -including newlines. Without it, newlines are excluded. This option is -equivalent to Perl's /s option. A negative class such as [^a] always matches a -newline character, independent of the setting of this option. -

    -

    -

    -  PCRE_EXTENDED
    -
    -

    -

    -If this bit is set, whitespace data characters in the pattern are totally -ignored except when escaped or inside a character class, and characters between -an unescaped # outside a character class and the next newline character, -inclusive, are also ignored. This is equivalent to Perl's /x option, and makes -it possible to include comments inside complicated patterns. Note, however, -that this applies only to data characters. Whitespace characters may never -appear within special character sequences in a pattern, for example within the -sequence (?( which introduces a conditional subpattern. -

    -

    -

    -  PCRE_EXTRA
    -
    -

    -

    -This option was invented in order to turn on additional functionality of PCRE -that is incompatible with Perl, but it is currently of very little use. When -set, any backslash in a pattern that is followed by a letter that has no -special meaning causes an error, thus reserving these combinations for future -expansion. By default, as in Perl, a backslash followed by a letter with no -special meaning is treated as a literal. There are at present no other features -controlled by this option. It can also be set by a (?X) option setting within a -pattern. -

    -

    -

    -  PCRE_MULTILINE
    -
    -

    -

    -By default, PCRE treats the subject string as consisting of a single "line" of -characters (even if it actually contains several newlines). The "start of line" -metacharacter (^) matches only at the start of the string, while the "end of -line" metacharacter ($) matches only at the end of the string, or before a -terminating newline (unless PCRE_DOLLAR_ENDONLY is set). This is the same as -Perl. -

    -

    -When PCRE_MULTILINE it is set, the "start of line" and "end of line" constructs -match immediately following or immediately before any newline in the subject -string, respectively, as well as at the very start and end. This is equivalent -to Perl's /m option. If there are no "\n" characters in a subject string, or -no occurrences of ^ or $ in a pattern, setting PCRE_MULTILINE has no -effect. -

    -

    -

    -  PCRE_UNGREEDY
    -
    -

    -

    -This option inverts the "greediness" of the quantifiers so that they are not -greedy by default, but become greedy if followed by "?". It is not compatible -with Perl. It can also be set by a (?U) option setting within the pattern. -

    -

    -

    -  PCRE_UTF8
    -
    -

    -

    -This option causes PCRE to regard both the pattern and the subject as strings -of UTF-8 characters instead of just byte strings. However, it is available only -if PCRE has been built to include UTF-8 support. If not, the use of this option -provokes an error. Support for UTF-8 is new, experimental, and incomplete. -Details of exactly what it entails are given below. -

    -
  • STUDYING A PATTERN -

    -When a pattern is going to be used several times, it is worth spending more -time analyzing it in order to speed up the time taken for matching. The -function pcre_study() takes a pointer to a compiled pattern as its first -argument, and returns a pointer to a pcre_extra block (another void -typedef) containing additional information about the pattern; this can be -passed to pcre_exec(). If no additional information is available, NULL -is returned. -

    -

    -The second argument contains option bits. At present, no options are defined -for pcre_study(), and this argument should always be zero. -

    -

    -The third argument for pcre_study() is a pointer to an error message. If -studying succeeds (even if no data is returned), the variable it points to is -set to NULL. Otherwise it points to a textual error message. -

    -

    -At present, studying a pattern is useful only for non-anchored patterns that do -not have a single fixed starting character. A bitmap of possible starting -characters is created. -

    -
  • LOCALE SUPPORT -

    -PCRE handles caseless matching, and determines whether characters are letters, -digits, or whatever, by reference to a set of tables. The library contains a -default set of tables which is created in the default C locale when PCRE is -compiled. This is used when the final argument of pcre_compile() is NULL, -and is sufficient for many applications. -

    -

    -An alternative set of tables can, however, be supplied. Such tables are built -by calling the pcre_maketables() function, which has no arguments, in the -relevant locale. The result can then be passed to pcre_compile() as often -as necessary. For example, to build and use tables that are appropriate for the -French locale (where accented characters with codes greater than 128 are -treated as letters), the following code could be used: -

    -

    -

    -  setlocale(LC_CTYPE, "fr");
    -  tables = pcre_maketables();
    -  re = pcre_compile(..., tables);
    -
    -

    -

    -The tables are built in memory that is obtained via pcre_malloc. The -pointer that is passed to pcre_compile is saved with the compiled -pattern, and the same tables are used via this pointer by pcre_study() -and pcre_exec(). Thus for any single pattern, compilation, studying and -matching all happen in the same locale, but different patterns can be compiled -in different locales. It is the caller's responsibility to ensure that the -memory containing the tables remains available for as long as it is needed. -

    -
  • INFORMATION ABOUT A PATTERN -

    -The pcre_fullinfo() function returns information about a compiled -pattern. It replaces the obsolete pcre_info() function, which is -nevertheless retained for backwards compability (and is documented below). -

    -

    -The first argument for pcre_fullinfo() is a pointer to the compiled -pattern. The second argument is the result of pcre_study(), or NULL if -the pattern was not studied. The third argument specifies which piece of -information is required, while the fourth argument is a pointer to a variable -to receive the data. The yield of the function is zero for success, or one of -the following negative numbers: -

    -

    -

    -  PCRE_ERROR_NULL       the argument code was NULL
    -                        the argument where was NULL
    -  PCRE_ERROR_BADMAGIC   the "magic number" was not found
    -  PCRE_ERROR_BADOPTION  the value of what was invalid
    -
    -

    -

    -The possible values for the third argument are defined in pcre.h, and are -as follows: -

    -

    -

    -  PCRE_INFO_OPTIONS
    -
    -

    -

    -Return a copy of the options with which the pattern was compiled. The fourth -argument should point to au unsigned long int variable. These option bits -are those specified in the call to pcre_compile(), modified by any -top-level option settings within the pattern itself, and with the PCRE_ANCHORED -bit forcibly set if the form of the pattern implies that it can match only at -the start of a subject string. -

    -

    -

    -  PCRE_INFO_SIZE
    -
    -

    -

    -Return the size of the compiled pattern, that is, the value that was passed as -the argument to pcre_malloc() when PCRE was getting memory in which to -place the compiled data. The fourth argument should point to a size_t -variable. -

    -

    -

    -  PCRE_INFO_CAPTURECOUNT
    -
    -

    -

    -Return the number of capturing subpatterns in the pattern. The fourth argument -should point to an \fbint\fR variable. -

    -

    -

    -  PCRE_INFO_BACKREFMAX
    -
    -

    -

    -Return the number of the highest back reference in the pattern. The fourth -argument should point to an int variable. Zero is returned if there are -no back references. -

    -

    -

    -  PCRE_INFO_FIRSTCHAR
    -
    -

    -

    -Return information about the first character of any matched string, for a -non-anchored pattern. If there is a fixed first character, e.g. from a pattern -such as (cat|cow|coyote), it is returned in the integer pointed to by -where. Otherwise, if either -

    -

    -(a) the pattern was compiled with the PCRE_MULTILINE option, and every branch -starts with "^", or -

    -

    -(b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set -(if it were set, the pattern would be anchored), -

    -

    --1 is returned, indicating that the pattern matches only at the start of a -subject string or after any "\n" within the string. Otherwise -2 is returned. -For anchored patterns, -2 is returned. -

    -

    -

    -  PCRE_INFO_FIRSTTABLE
    -
    -

    -

    -If the pattern was studied, and this resulted in the construction of a 256-bit -table indicating a fixed set of characters for the first character in any -matching string, a pointer to the table is returned. Otherwise NULL is -returned. The fourth argument should point to an unsigned char * -variable. -

    -

    -

    -  PCRE_INFO_LASTLITERAL
    -
    -

    -

    -For a non-anchored pattern, return the value of the rightmost literal character -which must exist in any matched string, other than at its start. The fourth -argument should point to an int variable. If there is no such character, -or if the pattern is anchored, -1 is returned. For example, for the pattern -/a\d+z\d+/ the returned value is 'z'. -

    -

    -The pcre_info() function is now obsolete because its interface is too -restrictive to return all the available data about a compiled pattern. New -programs should use pcre_fullinfo() instead. The yield of -pcre_info() is the number of capturing subpatterns, or one of the -following negative numbers: -

    -

    -

    -  PCRE_ERROR_NULL       the argument code was NULL
    -  PCRE_ERROR_BADMAGIC   the "magic number" was not found
    -
    -

    -

    -If the optptr argument is not NULL, a copy of the options with which the -pattern was compiled is placed in the integer it points to (see -PCRE_INFO_OPTIONS above). -

    -

    -If the pattern is not anchored and the firstcharptr argument is not NULL, -it is used to pass back information about the first character of any matched -string (see PCRE_INFO_FIRSTCHAR above). -

    -
  • MATCHING A PATTERN -

    -The function pcre_exec() is called to match a subject string against a -pre-compiled pattern, which is passed in the code argument. If the -pattern has been studied, the result of the study should be passed in the -extra argument. Otherwise this must be NULL. -

    -

    -The PCRE_ANCHORED option can be passed in the options argument, whose -unused bits must be zero. However, if a pattern was compiled with -PCRE_ANCHORED, or turned out to be anchored by virtue of its contents, it -cannot be made unachored at matching time. -

    -

    -There are also three further options that can be set only at matching time: -

    -

    -

    -  PCRE_NOTBOL
    -
    -

    -

    -The first character of the string is not the beginning of a line, so the -circumflex metacharacter should not match before it. Setting this without -PCRE_MULTILINE (at compile time) causes circumflex never to match. -

    -

    -

    -  PCRE_NOTEOL
    -
    -

    -

    -The end of the string is not the end of a line, so the dollar metacharacter -should not match it nor (except in multiline mode) a newline immediately before -it. Setting this without PCRE_MULTILINE (at compile time) causes dollar never -to match. -

    -

    -

    -  PCRE_NOTEMPTY
    -
    -

    -

    -An empty string is not considered to be a valid match if this option is set. If -there are alternatives in the pattern, they are tried. If all the alternatives -match the empty string, the entire match fails. For example, if the pattern -

    -

    -

    -  a?b?
    -
    -

    -

    -is applied to a string not beginning with "a" or "b", it matches the empty -string at the start of the subject. With PCRE_NOTEMPTY set, this match is not -valid, so PCRE searches further into the string for occurrences of "a" or "b". -

    -

    -Perl has no direct equivalent of PCRE_NOTEMPTY, but it does make a special case -of a pattern match of the empty string within its split() function, and -when using the /g modifier. It is possible to emulate Perl's behaviour after -matching a null string by first trying the match again at the same offset with -PCRE_NOTEMPTY set, and then if that fails by advancing the starting offset (see -below) and trying an ordinary match again. -

    -

    -The subject string is passed as a pointer in subject, a length in -length, and a starting offset in startoffset. Unlike the pattern -string, it may contain binary zero characters. When the starting offset is -zero, the search for a match starts at the beginning of the subject, and this -is by far the most common case. -

    -

    -A non-zero starting offset is useful when searching for another match in the -same subject by calling pcre_exec() again after a previous success. -Setting startoffset differs from just passing over a shortened string and -setting PCRE_NOTBOL in the case of a pattern that begins with any kind of -lookbehind. For example, consider the pattern -

    -

    -

    -  \Biss\B
    -
    -

    -

    -which finds occurrences of "iss" in the middle of words. (\B matches only if -the current position in the subject is not a word boundary.) When applied to -the string "Mississipi" the first call to pcre_exec() finds the first -occurrence. If pcre_exec() is called again with just the remainder of the -subject, namely "issipi", it does not match, because \B is always false at the -start of the subject, which is deemed to be a word boundary. However, if -pcre_exec() is passed the entire string again, but with startoffset -set to 4, it finds the second occurrence of "iss" because it is able to look -behind the starting point to discover that it is preceded by a letter. -

    -

    -If a non-zero starting offset is passed when the pattern is anchored, one -attempt to match at the given offset is tried. This can only succeed if the -pattern does not require the match to be at the start of the subject. -

    -

    -In general, a pattern matches a certain portion of the subject, and in -addition, further substrings from the subject may be picked out by parts of the -pattern. Following the usage in Jeffrey Friedl's book, this is called -"capturing" in what follows, and the phrase "capturing subpattern" is used for -a fragment of a pattern that picks out a substring. PCRE supports several other -kinds of parenthesized subpattern that do not cause substrings to be captured. -

    -

    -Captured substrings are returned to the caller via a vector of integer offsets -whose address is passed in ovector. The number of elements in the vector -is passed in ovecsize. The first two-thirds of the vector is used to pass -back captured substrings, each substring using a pair of integers. The -remaining third of the vector is used as workspace by pcre_exec() while -matching capturing subpatterns, and is not available for passing back -information. The length passed in ovecsize should always be a multiple of -three. If it is not, it is rounded down. -

    -

    -When a match has been successful, information about captured substrings is -returned in pairs of integers, starting at the beginning of ovector, and -continuing up to two-thirds of its length at the most. The first element of a -pair is set to the offset of the first character in a substring, and the second -is set to the offset of the first character after the end of a substring. The -first pair, ovector[0] and ovector[1], identify the portion of the -subject string matched by the entire pattern. The next pair is used for the -first capturing subpattern, and so on. The value returned by pcre_exec() -is the number of pairs that have been set. If there are no capturing -subpatterns, the return value from a successful match is 1, indicating that -just the first pair of offsets has been set. -

    -

    -Some convenience functions are provided for extracting the captured substrings -as separate strings. These are described in the following section. -

    -

    -It is possible for an capturing subpattern number n+1 to match some -part of the subject when subpattern n has not been used at all. For -example, if the string "abc" is matched against the pattern (a|(z))(bc) -subpatterns 1 and 3 are matched, but 2 is not. When this happens, both offset -values corresponding to the unused subpattern are set to -1. -

    -

    -If a capturing subpattern is matched repeatedly, it is the last portion of the -string that it matched that gets returned. -

    -

    -If the vector is too small to hold all the captured substrings, it is used as -far as possible (up to two-thirds of its length), and the function returns a -value of zero. In particular, if the substring offsets are not of interest, -pcre_exec() may be called with ovector passed as NULL and -ovecsize as zero. However, if the pattern contains back references and -the ovector isn't big enough to remember the related substrings, PCRE has -to get additional memory for use during matching. Thus it is usually advisable -to supply an ovector. -

    -

    -Note that pcre_info() can be used to find out how many capturing -subpatterns there are in a compiled pattern. The smallest size for -ovector that will allow for n captured substrings in addition to -the offsets of the substring matched by the whole pattern is (n+1)*3. -

    -

    -If pcre_exec() fails, it returns a negative number. The following are -defined in the header file: -

    -

    -

    -  PCRE_ERROR_NOMATCH        (-1)
    -
    -

    -

    -The subject string did not match the pattern. -

    -

    -

    -  PCRE_ERROR_NULL           (-2)
    -
    -

    -

    -Either code or subject was passed as NULL, or ovector was -NULL and ovecsize was not zero. -

    -

    -

    -  PCRE_ERROR_BADOPTION      (-3)
    -
    -

    -

    -An unrecognized bit was set in the options argument. -

    -

    -

    -  PCRE_ERROR_BADMAGIC       (-4)
    -
    -

    -

    -PCRE stores a 4-byte "magic number" at the start of the compiled code, to catch -the case when it is passed a junk pointer. This is the error it gives when the -magic number isn't present. -

    -

    -

    -  PCRE_ERROR_UNKNOWN_NODE   (-5)
    -
    -

    -

    -While running the pattern match, an unknown item was encountered in the -compiled pattern. This error could be caused by a bug in PCRE or by overwriting -of the compiled pattern. -

    -

    -

    -  PCRE_ERROR_NOMEMORY       (-6)
    -
    -

    -

    -If a pattern contains back references, but the ovector that is passed to -pcre_exec() is not big enough to remember the referenced substrings, PCRE -gets a block of memory at the start of matching to use for this purpose. If the -call via pcre_malloc() fails, this error is given. The memory is freed at -the end of matching. -

    -
  • EXTRACTING CAPTURED SUBSTRINGS -

    -Captured substrings can be accessed directly by using the offsets returned by -pcre_exec() in ovector. For convenience, the functions -pcre_copy_substring(), pcre_get_substring(), and -pcre_get_substring_list() are provided for extracting captured substrings -as new, separate, zero-terminated strings. A substring that contains a binary -zero is correctly extracted and has a further zero added on the end, but the -result does not, of course, function as a C string. -

    -

    -The first three arguments are the same for all three functions: subject -is the subject string which has just been successfully matched, ovector -is a pointer to the vector of integer offsets that was passed to -pcre_exec(), and stringcount is the number of substrings that -were captured by the match, including the substring that matched the entire -regular expression. This is the value returned by pcre_exec if it -is greater than zero. If pcre_exec() returned zero, indicating that it -ran out of space in ovector, the value passed as stringcount should -be the size of the vector divided by three. -

    -

    -The functions pcre_copy_substring() and pcre_get_substring() -extract a single substring, whose number is given as stringnumber. A -value of zero extracts the substring that matched the entire pattern, while -higher values extract the captured substrings. For pcre_copy_substring(), -the string is placed in buffer, whose length is given by -buffersize, while for pcre_get_substring() a new block of memory is -obtained via pcre_malloc, and its address is returned via -stringptr. The yield of the function is the length of the string, not -including the terminating zero, or one of -

    -

    -

    -  PCRE_ERROR_NOMEMORY       (-6)
    -
    -

    -

    -The buffer was too small for pcre_copy_substring(), or the attempt to get -memory failed for pcre_get_substring(). -

    -

    -

    -  PCRE_ERROR_NOSUBSTRING    (-7)
    -
    -

    -

    -There is no substring whose number is stringnumber. -

    -

    -The pcre_get_substring_list() function extracts all available substrings -and builds a list of pointers to them. All this is done in a single block of -memory which is obtained via pcre_malloc. The address of the memory block -is returned via listptr, which is also the start of the list of string -pointers. The end of the list is marked by a NULL pointer. The yield of the -function is zero if all went well, or -

    -

    -

    -  PCRE_ERROR_NOMEMORY       (-6)
    -
    -

    -

    -if the attempt to get the memory block failed. -

    -

    -When any of these functions encounter a substring that is unset, which can -happen when capturing subpattern number n+1 matches some part of the -subject, but subpattern n has not been used at all, they return an empty -string. This can be distinguished from a genuine zero-length substring by -inspecting the appropriate offset in ovector, which is negative for unset -substrings. -

    -

    -The two convenience functions pcre_free_substring() and -pcre_free_substring_list() can be used to free the memory returned by -a previous call of pcre_get_substring() or -pcre_get_substring_list(), respectively. They do nothing more than call -the function pointed to by pcre_free, which of course could be called -directly from a C program. However, PCRE is used in some situations where it is -linked via a special interface to another programming language which cannot use -pcre_free directly; it is for these cases that the functions are -provided. -

    -
  • LIMITATIONS -

    -There are some size limitations in PCRE but it is hoped that they will never in -practice be relevant. -The maximum length of a compiled pattern is 65539 (sic) bytes. -All values in repeating quantifiers must be less than 65536. -The maximum number of capturing subpatterns is 99. -The maximum number of all parenthesized subpatterns, including capturing -subpatterns, assertions, and other types of subpattern, is 200. -

    -

    -The maximum length of a subject string is the largest positive number that an -integer variable can hold. However, PCRE uses recursion to handle subpatterns -and indefinite repetition. This means that the available stack space may limit -the size of a subject string that can be processed by certain patterns. -

    -
  • DIFFERENCES FROM PERL -

    -The differences described here are with respect to Perl 5.005. -

    -

    -1. By default, a whitespace character is any character that the C library -function isspace() recognizes, though it is possible to compile PCRE with -alternative character type tables. Normally isspace() matches space, -formfeed, newline, carriage return, horizontal tab, and vertical tab. Perl 5 -no longer includes vertical tab in its set of whitespace characters. The \v -escape that was in the Perl documentation for a long time was never in fact -recognized. However, the character itself was treated as whitespace at least -up to 5.002. In 5.004 and 5.005 it does not match \s. -

    -

    -2. PCRE does not allow repeat quantifiers on lookahead assertions. Perl permits -them, but they do not mean what you might think. For example, (?!a){3} does -not assert that the next three characters are not "a". It just asserts that the -next character is not "a" three times. -

    -

    -3. Capturing subpatterns that occur inside negative lookahead assertions are -counted, but their entries in the offsets vector are never set. Perl sets its -numerical variables from any such patterns that are matched before the -assertion fails to match something (thereby succeeding), but only if the -negative lookahead assertion contains just one branch. -

    -

    -4. Though binary zero characters are supported in the subject string, they are -not allowed in a pattern string because it is passed as a normal C string, -terminated by zero. The escape sequence "\0" can be used in the pattern to -represent a binary zero. -

    -

    -5. The following Perl escape sequences are not supported: \l, \u, \L, \U, -\E, \Q. In fact these are implemented by Perl's general string-handling and -are not part of its pattern matching engine. -

    -

    -6. The Perl \G assertion is not supported as it is not relevant to single -pattern matches. -

    -

    -7. Fairly obviously, PCRE does not support the (?{code}) and (?p{code}) -constructions. However, there is some experimental support for recursive -patterns using the non-Perl item (?R). -

    -

    -8. There are at the time of writing some oddities in Perl 5.005_02 concerned -with the settings of captured strings when part of a pattern is repeated. For -example, matching "aba" against the pattern /^(a(b)?)+$/ sets $2 to the value -"b", but matching "aabbaa" against /^(aa(bb)?)+$/ leaves $2 unset. However, if -the pattern is changed to /^(aa(b(b))?)+$/ then $2 (and $3) are set. -

    -

    -In Perl 5.004 $2 is set in both cases, and that is also true of PCRE. If in the -future Perl changes to a consistent state that is different, PCRE may change to -follow. -

    -

    -9. Another as yet unresolved discrepancy is that in Perl 5.005_02 the pattern -/^(a)?(?(1)a|b)+$/ matches the string "a", whereas in PCRE it does not. -However, in both Perl and PCRE /^(a)?a/ matched against "a" leaves $1 unset. -

    -

    -10. PCRE provides some extensions to the Perl regular expression facilities: -

    -

    -(a) Although lookbehind assertions must match fixed length strings, each -alternative branch of a lookbehind assertion can match a different length of -string. Perl 5.005 requires them all to have the same length. -

    -

    -(b) If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not set, the $ meta- -character matches only at the very end of the string. -

    -

    -(c) If PCRE_EXTRA is set, a backslash followed by a letter with no special -meaning is faulted. -

    -

    -(d) If PCRE_UNGREEDY is set, the greediness of the repetition quantifiers is -inverted, that is, by default they are not greedy, but if followed by a -question mark they are. -

    -

    -(e) PCRE_ANCHORED can be used to force a pattern to be tried only at the start -of the subject. -

    -

    -(f) The PCRE_NOTBOL, PCRE_NOTEOL, and PCRE_NOTEMPTY options for -pcre_exec() have no Perl equivalents. -

    -

    -(g) The (?R) construct allows for recursive pattern matching (Perl 5.6 can do -this using the (?p{code}) construct, which PCRE cannot of course support.) -

    -
  • REGULAR EXPRESSION DETAILS -

    -The syntax and semantics of the regular expressions supported by PCRE are -described below. Regular expressions are also described in the Perl -documentation and in a number of other books, some of which have copious -examples. Jeffrey Friedl's "Mastering Regular Expressions", published by -O'Reilly (ISBN 1-56592-257), covers them in great detail. -

    -

    -The description here is intended as reference documentation. The basic -operation of PCRE is on strings of bytes. However, there is the beginnings of -some support for UTF-8 character strings. To use this support you must -configure PCRE to include it, and then call pcre_compile() with the -PCRE_UTF8 option. How this affects the pattern matching is described in the -final section of this document. -

    -

    -A regular expression is a pattern that is matched against a subject string from -left to right. Most characters stand for themselves in a pattern, and match the -corresponding characters in the subject. As a trivial example, the pattern -

    -

    -

    -  The quick brown fox
    -
    -

    -

    -matches a portion of a subject string that is identical to itself. The power of -regular expressions comes from the ability to include alternatives and -repetitions in the pattern. These are encoded in the pattern by the use of -meta-characters, which do not stand for themselves but instead are -interpreted in some special way. -

    -

    -There are two different sets of meta-characters: those that are recognized -anywhere in the pattern except within square brackets, and those that are -recognized in square brackets. Outside square brackets, the meta-characters are -as follows: -

    -

    -

    -  \      general escape character with several uses
    -  ^      assert start of subject (or line, in multiline mode)
    -  $      assert end of subject (or line, in multiline mode)
    -  .      match any character except newline (by default)
    -  [      start character class definition
    -  |      start of alternative branch
    -  (      start subpattern
    -  )      end subpattern
    -  ?      extends the meaning of (
    -         also 0 or 1 quantifier
    -         also quantifier minimizer
    -  *      0 or more quantifier
    -  +      1 or more quantifier
    -  {      start min/max quantifier
    -
    -

    -

    -Part of a pattern that is in square brackets is called a "character class". In -a character class the only meta-characters are: -

    -

    -

    -  \      general escape character
    -  ^      negate the class, but only if the first character
    -  -      indicates character range
    -  ]      terminates the character class
    -
    -

    -

    -The following sections describe the use of each of the meta-characters. -

    -
  • BACKSLASH -

    -The backslash character has several uses. Firstly, if it is followed by a -non-alphameric character, it takes away any special meaning that character may -have. This use of backslash as an escape character applies both inside and -outside character classes. -

    -

    -For example, if you want to match a "*" character, you write "\*" in the -pattern. This applies whether or not the following character would otherwise be -interpreted as a meta-character, so it is always safe to precede a -non-alphameric with "\" to specify that it stands for itself. In particular, -if you want to match a backslash, you write "\\". -

    -

    -If a pattern is compiled with the PCRE_EXTENDED option, whitespace in the -pattern (other than in a character class) and characters between a "#" outside -a character class and the next newline character are ignored. An escaping -backslash can be used to include a whitespace or "#" character as part of the -pattern. -

    -

    -A second use of backslash provides a way of encoding non-printing characters -in patterns in a visible manner. There is no restriction on the appearance of -non-printing characters, apart from the binary zero that terminates a pattern, -but when a pattern is being prepared by text editing, it is usually easier to -use one of the following escape sequences than the binary character it -represents: -

    -

    -

    -  \a     alarm, that is, the BEL character (hex 07)
    -  \cx    "control-x", where x is any character
    -  \e     escape (hex 1B)
    -  \f     formfeed (hex 0C)
    -  \n     newline (hex 0A)
    -  \r     carriage return (hex 0D)
    -  \t     tab (hex 09)
    -  \xhh   character with hex code hh
    -  \ddd   character with octal code ddd, or backreference
    -
    -

    -

    -The precise effect of "\cx" is as follows: if "x" is a lower case letter, it -is converted to upper case. Then bit 6 of the character (hex 40) is inverted. -Thus "\cz" becomes hex 1A, but "\c{" becomes hex 3B, while "\c;" becomes hex -7B. -

    -

    -After "\x", up to two hexadecimal digits are read (letters can be in upper or -lower case). -

    -

    -After "\0" up to two further octal digits are read. In both cases, if there -are fewer than two digits, just those that are present are used. Thus the -sequence "\0\x\07" specifies two binary zeros followed by a BEL character. -Make sure you supply two digits after the initial zero if the character that -follows is itself an octal digit. -

    -

    -The handling of a backslash followed by a digit other than 0 is complicated. -Outside a character class, PCRE reads it and any following digits as a decimal -number. If the number is less than 10, or if there have been at least that many -previous capturing left parentheses in the expression, the entire sequence is -taken as a back reference. A description of how this works is given -later, following the discussion of parenthesized subpatterns. -

    -

    -Inside a character class, or if the decimal number is greater than 9 and there -have not been that many capturing subpatterns, PCRE re-reads up to three octal -digits following the backslash, and generates a single byte from the least -significant 8 bits of the value. Any subsequent digits stand for themselves. -For example: -

    -

    -

    -  \040   is another way of writing a space
    -  \40    is the same, provided there are fewer than 40
    -            previous capturing subpatterns
    -  \7     is always a back reference
    -  \11    might be a back reference, or another way of
    -            writing a tab
    -  \011   is always a tab
    -  \0113  is a tab followed by the character "3"
    -  \113   is the character with octal code 113 (since there
    -            can be no more than 99 back references)
    -  \377   is a byte consisting entirely of 1 bits
    -  \81    is either a back reference, or a binary zero
    -            followed by the two characters "8" and "1"
    -
    -

    -

    -Note that octal values of 100 or greater must not be introduced by a leading -zero, because no more than three octal digits are ever read. -

    -

    -All the sequences that define a single byte value can be used both inside and -outside character classes. In addition, inside a character class, the sequence -"\b" is interpreted as the backspace character (hex 08). Outside a character -class it has a different meaning (see below). -

    -

    -The third use of backslash is for specifying generic character types: -

    -

    -

    -  \d     any decimal digit
    -  \D     any character that is not a decimal digit
    -  \s     any whitespace character
    -  \S     any character that is not a whitespace character
    -  \w     any "word" character
    -  \W     any "non-word" character
    -
    -

    -

    -Each pair of escape sequences partitions the complete set of characters into -two disjoint sets. Any given character matches one, and only one, of each pair. -

    -

    -A "word" character is any letter or digit or the underscore character, that is, -any character which can be part of a Perl "word". The definition of letters and -digits is controlled by PCRE's character tables, and may vary if locale- -specific matching is taking place (see "Locale support" above). For example, in -the "fr" (French) locale, some character codes greater than 128 are used for -accented letters, and these are matched by \w. -

    -

    -These character type sequences can appear both inside and outside character -classes. They each match one character of the appropriate type. If the current -matching point is at the end of the subject string, all of them fail, since -there is no character to match. -

    -

    -The fourth use of backslash is for certain simple assertions. An assertion -specifies a condition that has to be met at a particular point in a match, -without consuming any characters from the subject string. The use of -subpatterns for more complicated assertions is described below. The backslashed -assertions are -

    -

    -

    -  \b     word boundary
    -  \B     not a word boundary
    -  \A     start of subject (independent of multiline mode)
    -  \Z     end of subject or newline at end (independent of multiline mode)
    -  \z     end of subject (independent of multiline mode)
    -
    -

    -

    -These assertions may not appear in character classes (but note that "\b" has a -different meaning, namely the backspace character, inside a character class). -

    -

    -A word boundary is a position in the subject string where the current character -and the previous character do not both match \w or \W (i.e. one matches -\w and the other matches \W), or the start or end of the string if the -first or last character matches \w, respectively. -

    -

    -The \A, \Z, and \z assertions differ from the traditional circumflex and -dollar (described below) in that they only ever match at the very start and end -of the subject string, whatever options are set. They are not affected by the -PCRE_NOTBOL or PCRE_NOTEOL options. If the startoffset argument of -pcre_exec() is non-zero, \A can never match. The difference between \Z -and \z is that \Z matches before a newline that is the last character of the -string as well as at the end of the string, whereas \z matches only at the -end. -

    -
  • CIRCUMFLEX AND DOLLAR -

    -Outside a character class, in the default matching mode, the circumflex -character is an assertion which is true only if the current matching point is -at the start of the subject string. If the startoffset argument of -pcre_exec() is non-zero, circumflex can never match. Inside a character -class, circumflex has an entirely different meaning (see below). -

    -

    -Circumflex need not be the first character of the pattern if a number of -alternatives are involved, but it should be the first thing in each alternative -in which it appears if the pattern is ever to match that branch. If all -possible alternatives start with a circumflex, that is, if the pattern is -constrained to match only at the start of the subject, it is said to be an -"anchored" pattern. (There are also other constructs that can cause a pattern -to be anchored.) -

    -

    -A dollar character is an assertion which is true only if the current matching -point is at the end of the subject string, or immediately before a newline -character that is the last character in the string (by default). Dollar need -not be the last character of the pattern if a number of alternatives are -involved, but it should be the last item in any branch in which it appears. -Dollar has no special meaning in a character class. -

    -

    -The meaning of dollar can be changed so that it matches only at the very end of -the string, by setting the PCRE_DOLLAR_ENDONLY option at compile or matching -time. This does not affect the \Z assertion. -

    -

    -The meanings of the circumflex and dollar characters are changed if the -PCRE_MULTILINE option is set. When this is the case, they match immediately -after and immediately before an internal "\n" character, respectively, in -addition to matching at the start and end of the subject string. For example, -the pattern /^abc$/ matches the subject string "def\nabc" in multiline mode, -but not otherwise. Consequently, patterns that are anchored in single line mode -because all branches start with "^" are not anchored in multiline mode, and a -match for circumflex is possible when the startoffset argument of -pcre_exec() is non-zero. The PCRE_DOLLAR_ENDONLY option is ignored if -PCRE_MULTILINE is set. -

    -

    -Note that the sequences \A, \Z, and \z can be used to match the start and -end of the subject in both modes, and if all branches of a pattern start with -\A is it always anchored, whether PCRE_MULTILINE is set or not. -

    -
  • FULL STOP (PERIOD, DOT) -

    -Outside a character class, a dot in the pattern matches any one character in -the subject, including a non-printing character, but not (by default) newline. -If the PCRE_DOTALL option is set, dots match newlines as well. The handling of -dot is entirely independent of the handling of circumflex and dollar, the only -relationship being that they both involve newline characters. Dot has no -special meaning in a character class. -

    -
  • SQUARE BRACKETS -

    -An opening square bracket introduces a character class, terminated by a closing -square bracket. A closing square bracket on its own is not special. If a -closing square bracket is required as a member of the class, it should be the -first data character in the class (after an initial circumflex, if present) or -escaped with a backslash. -

    -

    -A character class matches a single character in the subject; the character must -be in the set of characters defined by the class, unless the first character in -the class is a circumflex, in which case the subject character must not be in -the set defined by the class. If a circumflex is actually required as a member -of the class, ensure it is not the first character, or escape it with a -backslash. -

    -

    -For example, the character class [aeiou] matches any lower case vowel, while -[^aeiou] matches any character that is not a lower case vowel. Note that a -circumflex is just a convenient notation for specifying the characters which -are in the class by enumerating those that are not. It is not an assertion: it -still consumes a character from the subject string, and fails if the current -pointer is at the end of the string. -

    -

    -When caseless matching is set, any letters in a class represent both their -upper case and lower case versions, so for example, a caseless [aeiou] matches -"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a -caseful version would. -

    -

    -The newline character is never treated in any special way in character classes, -whatever the setting of the PCRE_DOTALL or PCRE_MULTILINE options is. A class -such as [^a] will always match a newline. -

    -

    -The minus (hyphen) character can be used to specify a range of characters in a -character class. For example, [d-m] matches any letter between d and m, -inclusive. If a minus character is required in a class, it must be escaped with -a backslash or appear in a position where it cannot be interpreted as -indicating a range, typically as the first or last character in the class. -

    -

    -It is not possible to have the literal character "]" as the end character of a -range. A pattern such as [W-]46] is interpreted as a class of two characters -("W" and "-") followed by a literal string "46]", so it would match "W46]" or -"-46]". However, if the "]" is escaped with a backslash it is interpreted as -the end of range, so [W-\]46] is interpreted as a single class containing a -range followed by two separate characters. The octal or hexadecimal -representation of "]" can also be used to end a range. -

    -

    -Ranges operate in ASCII collating sequence. They can also be used for -characters specified numerically, for example [\000-\037]. If a range that -includes letters is used when caseless matching is set, it matches the letters -in either case. For example, [W-c] is equivalent to [][\^_`wxyzabc], matched -caselessly, and if character tables for the "fr" locale are in use, -[\xc8-\xcb] matches accented E characters in both cases. -

    -

    -The character types \d, \D, \s, \S, \w, and \W may also appear in a -character class, and add the characters that they match to the class. For -example, [\dABCDEF] matches any hexadecimal digit. A circumflex can -conveniently be used with the upper case character types to specify a more -restricted set of characters than the matching lower case type. For example, -the class [^\W_] matches any letter or digit, but not underscore. -

    -

    -All non-alphameric characters other than \, -, ^ (at the start) and the -terminating ] are non-special in character classes, but it does no harm if they -are escaped. -

    -
  • POSIX CHARACTER CLASSES -

    -Perl 5.6 (not yet released at the time of writing) is going to support the -POSIX notation for character classes, which uses names enclosed by [: and :] -within the enclosing square brackets. PCRE supports this notation. For example, -

    -

    -

    -  [01[:alpha:]%]
    -
    -

    -

    -matches "0", "1", any alphabetic character, or "%". The supported class names -are -

    -

    -

    -  alnum    letters and digits
    -  alpha    letters
    -  ascii    character codes 0 - 127
    -  cntrl    control characters
    -  digit    decimal digits (same as \d)
    -  graph    printing characters, excluding space
    -  lower    lower case letters
    -  print    printing characters, including space
    -  punct    printing characters, excluding letters and digits
    -  space    white space (same as \s)
    -  upper    upper case letters
    -  word     "word" characters (same as \w)
    -  xdigit   hexadecimal digits
    -
    -

    -

    -The names "ascii" and "word" are Perl extensions. Another Perl extension is -negation, which is indicated by a ^ character after the colon. For example, -

    -

    -

    -  [12[:^digit:]]
    -
    -

    -

    -matches "1", "2", or any non-digit. PCRE (and Perl) also recogize the POSIX -syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not -supported, and an error is given if they are encountered. -

    -
  • VERTICAL BAR -

    -Vertical bar characters are used to separate alternative patterns. For example, -the pattern -

    -

    -

    -  gilbert|sullivan
    -
    -

    -

    -matches either "gilbert" or "sullivan". Any number of alternatives may appear, -and an empty alternative is permitted (matching the empty string). -The matching process tries each alternative in turn, from left to right, -and the first one that succeeds is used. If the alternatives are within a -subpattern (defined below), "succeeds" means matching the rest of the main -pattern as well as the alternative in the subpattern. -

    -
  • INTERNAL OPTION SETTING -

    -The settings of PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and PCRE_EXTENDED -can be changed from within the pattern by a sequence of Perl option letters -enclosed between "(?" and ")". The option letters are -

    -

    -

    -  i  for PCRE_CASELESS
    -  m  for PCRE_MULTILINE
    -  s  for PCRE_DOTALL
    -  x  for PCRE_EXTENDED
    -
    -

    -

    -For example, (?im) sets caseless, multiline matching. It is also possible to -unset these options by preceding the letter with a hyphen, and a combined -setting and unsetting such as (?im-sx), which sets PCRE_CASELESS and -PCRE_MULTILINE while unsetting PCRE_DOTALL and PCRE_EXTENDED, is also -permitted. If a letter appears both before and after the hyphen, the option is -unset. -

    -

    -The scope of these option changes depends on where in the pattern the setting -occurs. For settings that are outside any subpattern (defined below), the -effect is the same as if the options were set or unset at the start of -matching. The following patterns all behave in exactly the same way: -

    -

    -

    -  (?i)abc
    -  a(?i)bc
    -  ab(?i)c
    -  abc(?i)
    -
    -

    -

    -which in turn is the same as compiling the pattern abc with PCRE_CASELESS set. -In other words, such "top level" settings apply to the whole pattern (unless -there are other changes inside subpatterns). If there is more than one setting -of the same option at top level, the rightmost setting is used. -

    -

    -If an option change occurs inside a subpattern, the effect is different. This -is a change of behaviour in Perl 5.005. An option change inside a subpattern -affects only that part of the subpattern that follows it, so -

    -

    -

    -  (a(?i)b)c
    -
    -

    -

    -matches abc and aBc and no other strings (assuming PCRE_CASELESS is not used). -By this means, options can be made to have different settings in different -parts of the pattern. Any changes made in one alternative do carry on -into subsequent branches within the same subpattern. For example, -

    -

    -

    -  (a(?i)b|c)
    -
    -

    -

    -matches "ab", "aB", "c", and "C", even though when matching "C" the first -branch is abandoned before the option setting. This is because the effects of -option settings happen at compile time. There would be some very weird -behaviour otherwise. -

    -

    -The PCRE-specific options PCRE_UNGREEDY and PCRE_EXTRA can be changed in the -same way as the Perl-compatible options by using the characters U and X -respectively. The (?X) flag setting is special in that it must always occur -earlier in the pattern than any of the additional features it turns on, even -when it is at top level. It is best put at the start. -

    -
  • SUBPATTERNS -

    -Subpatterns are delimited by parentheses (round brackets), which can be nested. -Marking part of a pattern as a subpattern does two things: -

    -

    -1. It localizes a set of alternatives. For example, the pattern -

    -

    -

    -  cat(aract|erpillar|)
    -
    -

    -

    -matches one of the words "cat", "cataract", or "caterpillar". Without the -parentheses, it would match "cataract", "erpillar" or the empty string. -

    -

    -2. It sets up the subpattern as a capturing subpattern (as defined above). -When the whole pattern matches, that portion of the subject string that matched -the subpattern is passed back to the caller via the ovector argument of -pcre_exec(). Opening parentheses are counted from left to right (starting -from 1) to obtain the numbers of the capturing subpatterns. -

    -

    -For example, if the string "the red king" is matched against the pattern -

    -

    -

    -  the ((red|white) (king|queen))
    -
    -

    -

    -the captured substrings are "red king", "red", and "king", and are numbered 1, -2, and 3. -

    -

    -The fact that plain parentheses fulfil two functions is not always helpful. -There are often times when a grouping subpattern is required without a -capturing requirement. If an opening parenthesis is followed by "?:", the -subpattern does not do any capturing, and is not counted when computing the -number of any subsequent capturing subpatterns. For example, if the string "the -white queen" is matched against the pattern -

    -

    -

    -  the ((?:red|white) (king|queen))
    -
    -

    -

    -the captured substrings are "white queen" and "queen", and are numbered 1 and -2. The maximum number of captured substrings is 99, and the maximum number of -all subpatterns, both capturing and non-capturing, is 200. -

    -

    -As a convenient shorthand, if any option settings are required at the start of -a non-capturing subpattern, the option letters may appear between the "?" and -the ":". Thus the two patterns -

    -

    -

    -  (?i:saturday|sunday)
    -  (?:(?i)saturday|sunday)
    -
    -

    -

    -match exactly the same set of strings. Because alternative branches are tried -from left to right, and options are not reset until the end of the subpattern -is reached, an option setting in one branch does affect subsequent branches, so -the above patterns match "SUNDAY" as well as "Saturday". -

    -
  • REPETITION -

    -Repetition is specified by quantifiers, which can follow any of the following -items: -

    -

    -

    -  a single character, possibly escaped
    -  the . metacharacter
    -  a character class
    -  a back reference (see next section)
    -  a parenthesized subpattern (unless it is an assertion - see below)
    -
    -

    -

    -The general repetition quantifier specifies a minimum and maximum number of -permitted matches, by giving the two numbers in curly brackets (braces), -separated by a comma. The numbers must be less than 65536, and the first must -be less than or equal to the second. For example: -

    -

    -

    -  z{2,4}
    -
    -

    -

    -matches "zz", "zzz", or "zzzz". A closing brace on its own is not a special -character. If the second number is omitted, but the comma is present, there is -no upper limit; if the second number and the comma are both omitted, the -quantifier specifies an exact number of required matches. Thus -

    -

    -

    -  [aeiou]{3,}
    -
    -

    -

    -matches at least 3 successive vowels, but may match many more, while -

    -

    -

    -  \d{8}
    -
    -

    -

    -matches exactly 8 digits. An opening curly bracket that appears in a position -where a quantifier is not allowed, or one that does not match the syntax of a -quantifier, is taken as a literal character. For example, {,6} is not a -quantifier, but a literal string of four characters. -

    -

    -The quantifier {0} is permitted, causing the expression to behave as if the -previous item and the quantifier were not present. -

    -

    -For convenience (and historical compatibility) the three most common -quantifiers have single-character abbreviations: -

    -

    -

    -  *    is equivalent to {0,}
    -  +    is equivalent to {1,}
    -  ?    is equivalent to {0,1}
    -
    -

    -

    -It is possible to construct infinite loops by following a subpattern that can -match no characters with a quantifier that has no upper limit, for example: -

    -

    -

    -  (a?)*
    -
    -

    -

    -Earlier versions of Perl and PCRE used to give an error at compile time for -such patterns. However, because there are cases where this can be useful, such -patterns are now accepted, but if any repetition of the subpattern does in fact -match no characters, the loop is forcibly broken. -

    -

    -By default, the quantifiers are "greedy", that is, they match as much as -possible (up to the maximum number of permitted times), without causing the -rest of the pattern to fail. The classic example of where this gives problems -is in trying to match comments in C programs. These appear between the -sequences /* and */ and within the sequence, individual * and / characters may -appear. An attempt to match C comments by applying the pattern -

    -

    -

    -  /\*.*\*/
    -
    -

    -

    -to the string -

    -

    -

    -  /* first command */  not comment  /* second comment */
    -
    -

    -

    -fails, because it matches the entire string owing to the greediness of the .* -item. -

    -

    -However, if a quantifier is followed by a question mark, it ceases to be -greedy, and instead matches the minimum number of times possible, so the -pattern -

    -

    -

    -  /\*.*?\*/
    -
    -

    -

    -does the right thing with the C comments. The meaning of the various -quantifiers is not otherwise changed, just the preferred number of matches. -Do not confuse this use of question mark with its use as a quantifier in its -own right. Because it has two uses, it can sometimes appear doubled, as in -

    -

    -

    -  \d??\d
    -
    -

    -

    -which matches one digit by preference, but can match two if that is the only -way the rest of the pattern matches. -

    -

    -If the PCRE_UNGREEDY option is set (an option which is not available in Perl), -the quantifiers are not greedy by default, but individual ones can be made -greedy by following them with a question mark. In other words, it inverts the -default behaviour. -

    -

    -When a parenthesized subpattern is quantified with a minimum repeat count that -is greater than 1 or with a limited maximum, more store is required for the -compiled pattern, in proportion to the size of the minimum or maximum. -

    -

    -If a pattern starts with .* or .{0,} and the PCRE_DOTALL option (equivalent -to Perl's /s) is set, thus allowing the . to match newlines, the pattern is -implicitly anchored, because whatever follows will be tried against every -character position in the subject string, so there is no point in retrying the -overall match at any position after the first. PCRE treats such a pattern as -though it were preceded by \A. In cases where it is known that the subject -string contains no newlines, it is worth setting PCRE_DOTALL when the pattern -begins with .* in order to obtain this optimization, or alternatively using ^ -to indicate anchoring explicitly. -

    -

    -When a capturing subpattern is repeated, the value captured is the substring -that matched the final iteration. For example, after -

    -

    -

    -  (tweedle[dume]{3}\s*)+
    -
    -

    -

    -has matched "tweedledum tweedledee" the value of the captured substring is -"tweedledee". However, if there are nested capturing subpatterns, the -corresponding captured values may have been set in previous iterations. For -example, after -

    -

    -

    -  /(a|(b))+/
    -
    -

    -

    -matches "aba" the value of the second captured substring is "b". -

    -
  • BACK REFERENCES -

    -Outside a character class, a backslash followed by a digit greater than 0 (and -possibly further digits) is a back reference to a capturing subpattern earlier -(i.e. to its left) in the pattern, provided there have been that many previous -capturing left parentheses. -

    -

    -However, if the decimal number following the backslash is less than 10, it is -always taken as a back reference, and causes an error only if there are not -that many capturing left parentheses in the entire pattern. In other words, the -parentheses that are referenced need not be to the left of the reference for -numbers less than 10. See the section entitled "Backslash" above for further -details of the handling of digits following a backslash. -

    -

    -A back reference matches whatever actually matched the capturing subpattern in -the current subject string, rather than anything matching the subpattern -itself. So the pattern -

    -

    -

    -  (sens|respons)e and \1ibility
    -
    -

    -

    -matches "sense and sensibility" and "response and responsibility", but not -"sense and responsibility". If caseful matching is in force at the time of the -back reference, the case of letters is relevant. For example, -

    -

    -

    -  ((?i)rah)\s+\1
    -
    -

    -

    -matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original -capturing subpattern is matched caselessly. -

    -

    -There may be more than one back reference to the same subpattern. If a -subpattern has not actually been used in a particular match, any back -references to it always fail. For example, the pattern -

    -

    -

    -  (a|(bc))\2
    -
    -

    -

    -always fails if it starts to match "a" rather than "bc". Because there may be -up to 99 back references, all digits following the backslash are taken -as part of a potential back reference number. If the pattern continues with a -digit character, some delimiter must be used to terminate the back reference. -If the PCRE_EXTENDED option is set, this can be whitespace. Otherwise an empty -comment can be used. -

    -

    -A back reference that occurs inside the parentheses to which it refers fails -when the subpattern is first used, so, for example, (a\1) never matches. -However, such references can be useful inside repeated subpatterns. For -example, the pattern -

    -

    -

    -  (a|b\1)+
    -
    -

    -

    -matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration of -the subpattern, the back reference matches the character string corresponding -to the previous iteration. In order for this to work, the pattern must be such -that the first iteration does not need to match the back reference. This can be -done using alternation, as in the example above, or by a quantifier with a -minimum of zero. -

    -
  • ASSERTIONS -

    -An assertion is a test on the characters following or preceding the current -matching point that does not actually consume any characters. The simple -assertions coded as \b, \B, \A, \Z, \z, ^ and $ are described above. More -complicated assertions are coded as subpatterns. There are two kinds: those -that look ahead of the current position in the subject string, and those that -look behind it. -

    -

    -An assertion subpattern is matched in the normal way, except that it does not -cause the current matching position to be changed. Lookahead assertions start -with (?= for positive assertions and (?! for negative assertions. For example, -

    -

    -

    -  \w+(?=;)
    -
    -

    -

    -matches a word followed by a semicolon, but does not include the semicolon in -the match, and -

    -

    -

    -  foo(?!bar)
    -
    -

    -

    -matches any occurrence of "foo" that is not followed by "bar". Note that the -apparently similar pattern -

    -

    -

    -  (?!foo)bar
    -
    -

    -

    -does not find an occurrence of "bar" that is preceded by something other than -"foo"; it finds any occurrence of "bar" whatsoever, because the assertion -(?!foo) is always true when the next three characters are "bar". A -lookbehind assertion is needed to achieve this effect. -

    -

    -Lookbehind assertions start with (?<= for positive assertions and (?<! for -negative assertions. For example, -

    -

    -

    -  (?<!foo)bar
    -
    -

    -

    -does find an occurrence of "bar" that is not preceded by "foo". The contents of -a lookbehind assertion are restricted such that all the strings it matches must -have a fixed length. However, if there are several alternatives, they do not -all have to have the same fixed length. Thus -

    -

    -

    -  (?<=bullock|donkey)
    -
    -

    -

    -is permitted, but -

    -

    -

    -  (?<!dogs?|cats?)
    -
    -

    -

    -causes an error at compile time. Branches that match different length strings -are permitted only at the top level of a lookbehind assertion. This is an -extension compared with Perl 5.005, which requires all branches to match the -same length of string. An assertion such as -

    -

    -

    -  (?<=ab(c|de))
    -
    -

    -

    -is not permitted, because its single top-level branch can match two different -lengths, but it is acceptable if rewritten to use two top-level branches: -

    -

    -

    -  (?<=abc|abde)
    -
    -

    -

    -The implementation of lookbehind assertions is, for each alternative, to -temporarily move the current position back by the fixed width and then try to -match. If there are insufficient characters before the current position, the -match is deemed to fail. Lookbehinds in conjunction with once-only subpatterns -can be particularly useful for matching at the ends of strings; an example is -given at the end of the section on once-only subpatterns. -

    -

    -Several assertions (of any sort) may occur in succession. For example, -

    -

    -

    -  (?<=\d{3})(?<!999)foo
    -
    -

    -

    -matches "foo" preceded by three digits that are not "999". Notice that each of -the assertions is applied independently at the same point in the subject -string. First there is a check that the previous three characters are all -digits, and then there is a check that the same three characters are not "999". -This pattern does not match "foo" preceded by six characters, the first -of which are digits and the last three of which are not "999". For example, it -doesn't match "123abcfoo". A pattern to do that is -

    -

    -

    -  (?<=\d{3}...)(?<!999)foo
    -
    -

    -

    -This time the first assertion looks at the preceding six characters, checking -that the first three are digits, and then the second assertion checks that the -preceding three characters are not "999". -

    -

    -Assertions can be nested in any combination. For example, -

    -

    -

    -  (?<=(?<!foo)bar)baz
    -
    -

    -

    -matches an occurrence of "baz" that is preceded by "bar" which in turn is not -preceded by "foo", while -

    -

    -

    -  (?<=\d{3}(?!999)...)foo
    -
    -

    -

    -is another pattern which matches "foo" preceded by three digits and any three -characters that are not "999". -

    -

    -Assertion subpatterns are not capturing subpatterns, and may not be repeated, -because it makes no sense to assert the same thing several times. If any kind -of assertion contains capturing subpatterns within it, these are counted for -the purposes of numbering the capturing subpatterns in the whole pattern. -However, substring capturing is carried out only for positive assertions, -because it does not make sense for negative assertions. -

    -

    -Assertions count towards the maximum of 200 parenthesized subpatterns. -

    -
  • ONCE-ONLY SUBPATTERNS -

    -With both maximizing and minimizing repetition, failure of what follows -normally causes the repeated item to be re-evaluated to see if a different -number of repeats allows the rest of the pattern to match. Sometimes it is -useful to prevent this, either to change the nature of the match, or to cause -it fail earlier than it otherwise might, when the author of the pattern knows -there is no point in carrying on. -

    -

    -Consider, for example, the pattern \d+foo when applied to the subject line -

    -

    -

    -  123456bar
    -
    -

    -

    -After matching all 6 digits and then failing to match "foo", the normal -action of the matcher is to try again with only 5 digits matching the \d+ -item, and then with 4, and so on, before ultimately failing. Once-only -subpatterns provide the means for specifying that once a portion of the pattern -has matched, it is not to be re-evaluated in this way, so the matcher would -give up immediately on failing to match "foo" the first time. The notation is -another kind of special parenthesis, starting with (?> as in this example: -

    -

    -

    -  (?>\d+)bar
    -
    -

    -

    -This kind of parenthesis "locks up" the part of the pattern it contains once -it has matched, and a failure further into the pattern is prevented from -backtracking into it. Backtracking past it to previous items, however, works as -normal. -

    -

    -An alternative description is that a subpattern of this type matches the string -of characters that an identical standalone pattern would match, if anchored at -the current point in the subject string. -

    -

    -Once-only subpatterns are not capturing subpatterns. Simple cases such as the -above example can be thought of as a maximizing repeat that must swallow -everything it can. So, while both \d+ and \d+? are prepared to adjust the -number of digits they match in order to make the rest of the pattern match, -(?>\d+) can only match an entire sequence of digits. -

    -

    -This construction can of course contain arbitrarily complicated subpatterns, -and it can be nested. -

    -

    -Once-only subpatterns can be used in conjunction with lookbehind assertions to -specify efficient matching at the end of the subject string. Consider a simple -pattern such as -

    -

    -

    -  abcd$
    -
    -

    -

    -when applied to a long string which does not match. Because matching proceeds -from left to right, PCRE will look for each "a" in the subject and then see if -what follows matches the rest of the pattern. If the pattern is specified as -

    -

    -

    -  ^.*abcd$
    -
    -

    -

    -the initial .* matches the entire string at first, but when this fails (because -there is no following "a"), it backtracks to match all but the last character, -then all but the last two characters, and so on. Once again the search for "a" -covers the entire string, from right to left, so we are no better off. However, -if the pattern is written as -

    -

    -

    -  ^(?>.*)(?<=abcd)
    -
    -

    -

    -there can be no backtracking for the .* item; it can match only the entire -string. The subsequent lookbehind assertion does a single test on the last four -characters. If it fails, the match fails immediately. For long strings, this -approach makes a significant difference to the processing time. -

    -

    -When a pattern contains an unlimited repeat inside a subpattern that can itself -be repeated an unlimited number of times, the use of a once-only subpattern is -the only way to avoid some failing matches taking a very long time indeed. -The pattern -

    -

    -

    -  (\D+|<\d+>)*[!?]
    -
    -

    -

    -matches an unlimited number of substrings that either consist of non-digits, or -digits enclosed in <>, followed by either ! or ?. When it matches, it runs -quickly. However, if it is applied to -

    -

    -

    -  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
    -
    -

    -

    -it takes a long time before reporting failure. This is because the string can -be divided between the two repeats in a large number of ways, and all have to -be tried. (The example used [!?] rather than a single character at the end, -because both PCRE and Perl have an optimization that allows for fast failure -when a single character is used. They remember the last single character that -is required for a match, and fail early if it is not present in the string.) -If the pattern is changed to -

    -

    -

    -  ((?>\D+)|<\d+>)*[!?]
    -
    -

    -

    -sequences of non-digits cannot be broken, and failure happens quickly. -

    -
  • CONDITIONAL SUBPATTERNS -

    -It is possible to cause the matching process to obey a subpattern -conditionally or to choose between two alternative subpatterns, depending on -the result of an assertion, or whether a previous capturing subpattern matched -or not. The two possible forms of conditional subpattern are -

    -

    -

    -  (?(condition)yes-pattern)
    -  (?(condition)yes-pattern|no-pattern)
    -
    -

    -

    -If the condition is satisfied, the yes-pattern is used; otherwise the -no-pattern (if present) is used. If there are more than two alternatives in the -subpattern, a compile-time error occurs. -

    -

    -There are two kinds of condition. If the text between the parentheses consists -of a sequence of digits, the condition is satisfied if the capturing subpattern -of that number has previously matched. The number must be greater than zero. -Consider the following pattern, which contains non-significant white space to -make it more readable (assume the PCRE_EXTENDED option) and to divide it into -three parts for ease of discussion: -

    -

    -

    -  ( \( )?    [^()]+    (?(1) \) )
    -
    -

    -

    -The first part matches an optional opening parenthesis, and if that -character is present, sets it as the first captured substring. The second part -matches one or more characters that are not parentheses. The third part is a -conditional subpattern that tests whether the first set of parentheses matched -or not. If they did, that is, if subject started with an opening parenthesis, -the condition is true, and so the yes-pattern is executed and a closing -parenthesis is required. Otherwise, since no-pattern is not present, the -subpattern matches nothing. In other words, this pattern matches a sequence of -non-parentheses, optionally enclosed in parentheses. -

    -

    -If the condition is not a sequence of digits, it must be an assertion. This may -be a positive or negative lookahead or lookbehind assertion. Consider this -pattern, again containing non-significant white space, and with the two -alternatives on the second line: -

    -

    -

    -  (?(?=[^a-z]*[a-z])
    -  \d{2}-[a-z]{3}-\d{2}  |  \d{2}-\d{2}-\d{2} )
    -
    -

    -

    -The condition is a positive lookahead assertion that matches an optional -sequence of non-letters followed by a letter. In other words, it tests for the -presence of at least one letter in the subject. If a letter is found, the -subject is matched against the first alternative; otherwise it is matched -against the second. This pattern matches strings in one of the two forms -dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits. -

    -
  • COMMENTS -

    -The sequence (?# marks the start of a comment which continues up to the next -closing parenthesis. Nested parentheses are not permitted. The characters -that make up a comment play no part in the pattern matching at all. -

    -

    -If the PCRE_EXTENDED option is set, an unescaped # character outside a -character class introduces a comment that continues up to the next newline -character in the pattern. -

    -
  • RECURSIVE PATTERNS -

    -Consider the problem of matching a string in parentheses, allowing for -unlimited nested parentheses. Without the use of recursion, the best that can -be done is to use a pattern that matches up to some fixed depth of nesting. It -is not possible to handle an arbitrary nesting depth. Perl 5.6 has provided an -experimental facility that allows regular expressions to recurse (amongst other -things). It does this by interpolating Perl code in the expression at run time, -and the code can refer to the expression itself. A Perl pattern to solve the -parentheses problem can be created like this: -

    -

    -

    -  $re = qr{\( (?: (?>[^()]+) | (?p{$re}) )* \)}x;
    -
    -

    -

    -The (?p{...}) item interpolates Perl code at run time, and in this case refers -recursively to the pattern in which it appears. Obviously, PCRE cannot support -the interpolation of Perl code. Instead, the special item (?R) is provided for -the specific case of recursion. This PCRE pattern solves the parentheses -problem (assume the PCRE_EXTENDED option is set so that white space is -ignored): -

    -

    -

    -  \( ( (?>[^()]+) | (?R) )* \)
    -
    -

    -

    -First it matches an opening parenthesis. Then it matches any number of -substrings which can either be a sequence of non-parentheses, or a recursive -match of the pattern itself (i.e. a correctly parenthesized substring). Finally -there is a closing parenthesis. -

    -

    -This particular example pattern contains nested unlimited repeats, and so the -use of a once-only subpattern for matching strings of non-parentheses is -important when applying the pattern to strings that do not match. For example, -when it is applied to -

    -

    -

    -  (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
    -
    -

    -

    -it yields "no match" quickly. However, if a once-only subpattern is not used, -the match runs for a very long time indeed because there are so many different -ways the + and * repeats can carve up the subject, and all have to be tested -before failure can be reported. -

    -

    -The values set for any capturing subpatterns are those from the outermost level -of the recursion at which the subpattern value is set. If the pattern above is -matched against -

    -

    -

    -  (ab(cd)ef)
    -
    -

    -

    -the value for the capturing parentheses is "ef", which is the last value taken -on at the top level. If additional parentheses are added, giving -

    -

    -

    -  \( ( ( (?>[^()]+) | (?R) )* ) \)
    -     ^                        ^
    -     ^                        ^
    -
    -the string they capture is "ab(cd)ef", the contents of the top level -parentheses. If there are more than 15 capturing parentheses in a pattern, PCRE -has to obtain extra memory to store data during a recursion, which it does by -using pcre_malloc, freeing it via pcre_free afterwards. If no -memory can be obtained, it saves data for the first 15 capturing parentheses -only, as there is no way to give an out-of-memory error from within a -recursion. -

    -
  • PERFORMANCE -

    -Certain items that may appear in patterns are more efficient than others. It is -more efficient to use a character class like [aeiou] than a set of alternatives -such as (a|e|i|o|u). In general, the simplest construction that provides the -required behaviour is usually the most efficient. Jeffrey Friedl's book -contains a lot of discussion about optimizing regular expressions for efficient -performance. -

    -

    -When a pattern begins with .* and the PCRE_DOTALL option is set, the pattern is -implicitly anchored by PCRE, since it can match only at the start of a subject -string. However, if PCRE_DOTALL is not set, PCRE cannot make this optimization, -because the . metacharacter does not then match a newline, and if the subject -string contains newlines, the pattern may match from the character immediately -following one of them instead of from the very start. For example, the pattern -

    -

    -

    -  (.*) second
    -
    -

    -

    -matches the subject "first\nand second" (where \n stands for a newline -character) with the first captured substring being "and". In order to do this, -PCRE has to retry the match starting after every newline in the subject. -

    -

    -If you are using such a pattern with subject strings that do not contain -newlines, the best performance is obtained by setting PCRE_DOTALL, or starting -the pattern with ^.* to indicate explicit anchoring. That saves PCRE from -having to scan along the subject looking for a newline to restart at. -

    -

    -Beware of patterns that contain nested indefinite repeats. These can take a -long time to run when applied to a string that does not match. Consider the -pattern fragment -

    -

    -

    -  (a+)*
    -
    -

    -

    -This can match "aaaa" in 33 different ways, and this number increases very -rapidly as the string gets longer. (The * repeat can match 0, 1, 2, 3, or 4 -times, and for each of those cases other than 0, the + repeats can match -different numbers of times.) When the remainder of the pattern is such that the -entire match is going to fail, PCRE has in principle to try every possible -variation, and this can take an extremely long time. -

    -

    -An optimization catches some of the more simple cases such as -

    -

    -

    -  (a+)*b
    -
    -

    -

    -where a literal character follows. Before embarking on the standard matching -procedure, PCRE checks that there is a "b" later in the subject string, and if -there is not, it fails the match immediately. However, when there is no -following literal this optimization cannot be used. You can see the difference -by comparing the behaviour of -

    -

    -

    -  (a+)*\d
    -
    -

    -

    -with the pattern above. The former gives a failure almost instantly when -applied to a whole line of "a" characters, whereas the latter takes an -appreciable time with strings longer than about 20 characters. -

    -
  • UTF-8 SUPPORT -

    -Starting at release 3.3, PCRE has some support for character strings encoded -in the UTF-8 format. This is incomplete, and is regarded as experimental. In -order to use it, you must configure PCRE to include UTF-8 support in the code, -and, in addition, you must call pcre_compile() with the PCRE_UTF8 option -flag. When you do this, both the pattern and any subject strings that are -matched against it are treated as UTF-8 strings instead of just strings of -bytes, but only in the cases that are mentioned below. -

    -

    -If you compile PCRE with UTF-8 support, but do not use it at run time, the -library will be a bit bigger, but the additional run time overhead is limited -to testing the PCRE_UTF8 flag in several places, so should not be very large. -

    -

    -PCRE assumes that the strings it is given contain valid UTF-8 codes. It does -not diagnose invalid UTF-8 strings. If you pass invalid UTF-8 strings to PCRE, -the results are undefined. -

    -

    -Running with PCRE_UTF8 set causes these changes in the way PCRE works: -

    -

    -1. In a pattern, the escape sequence \x{...}, where the contents of the braces -is a string of hexadecimal digits, is interpreted as a UTF-8 character whose -code number is the given hexadecimal number, for example: \x{1234}. This -inserts from one to six literal bytes into the pattern, using the UTF-8 -encoding. If a non-hexadecimal digit appears between the braces, the item is -not recognized. -

    -

    -2. The original hexadecimal escape sequence, \xhh, generates a two-byte UTF-8 -character if its value is greater than 127. -

    -

    -3. Repeat quantifiers are NOT correctly handled if they follow a multibyte -character. For example, \x{100}* and \xc3+ do not work. If you want to -repeat such characters, you must enclose them in non-capturing parentheses, -for example (?:\x{100}), at present. -

    -

    -4. The dot metacharacter matches one UTF-8 character instead of a single byte. -

    -

    -5. Unlike literal UTF-8 characters, the dot metacharacter followed by a -repeat quantifier does operate correctly on UTF-8 characters instead of -single bytes. -

    -

    -4. Although the \x{...} escape is permitted in a character class, characters -whose values are greater than 255 cannot be included in a class. -

    -

    -5. A class is matched against a UTF-8 character instead of just a single byte, -but it can match only characters whose values are less than 256. Characters -with greater values always fail to match a class. -

    -

    -6. Repeated classes work correctly on multiple characters. -

    -

    -7. Classes containing just a single character whose value is greater than 127 -(but less than 256), for example, [\x80] or [^\x{93}], do not work because -these are optimized into single byte matches. In the first case, of course, -the class brackets are just redundant. -

    -

    -8. Lookbehind assertions move backwards in the subject by a fixed number of -characters instead of a fixed number of bytes. Simple cases have been tested -to work correctly, but there may be hidden gotchas herein. -

    -

    -9. The character types such as \d and \w do not work correctly with UTF-8 -characters. They continue to test a single byte. -

    -

    -10. Anything not explicitly mentioned here continues to work in bytes rather -than in characters. -

    -

    -The following UTF-8 features of Perl 5.6 are not implemented: -

    -

    -1. The escape sequence \C to match a single byte. -

    -

    -2. The use of Unicode tables and properties and escapes \p, \P, and \X. -

    -
  • AUTHOR -

    -Philip Hazel <ph10@cam.ac.uk> -
    -University Computing Service, -
    -New Museums Site, -
    -Cambridge CB2 3QG, England. -
    -Phone: +44 1223 334714 -

    -

    -Last updated: 28 August 2000, -
    -

    -  the 250th anniversary of the death of J.S. Bach.
    -
    -
    -Copyright (c) 1997-2000 University of Cambridge. diff --git a/pcre/doc/pcre.txt b/pcre/doc/pcre.txt deleted file mode 100644 index 1db4b537..00000000 --- a/pcre/doc/pcre.txt +++ /dev/null @@ -1,2125 +0,0 @@ -NAME - pcre - Perl-compatible regular expressions. - - - -SYNOPSIS - #include - - pcre *pcre_compile(const char *pattern, int options, - const char **errptr, int *erroffset, - const unsigned char *tableptr); - - pcre_extra *pcre_study(const pcre *code, int options, - const char **errptr); - - int pcre_exec(const pcre *code, const pcre_extra *extra, - const char *subject, int length, int startoffset, - int options, int *ovector, int ovecsize); - - int pcre_copy_substring(const char *subject, int *ovector, - int stringcount, int stringnumber, char *buffer, - int buffersize); - - int pcre_get_substring(const char *subject, int *ovector, - int stringcount, int stringnumber, - const char **stringptr); - - int pcre_get_substring_list(const char *subject, - int *ovector, int stringcount, const char ***listptr); - - void pcre_free_substring(const char *stringptr); - - void pcre_free_substring_list(const char **stringptr); - - const unsigned char *pcre_maketables(void); - - int pcre_fullinfo(const pcre *code, const pcre_extra *extra, - int what, void *where); - - int pcre_info(const pcre *code, int *optptr, *firstcharptr); - - char *pcre_version(void); - - void *(*pcre_malloc)(size_t); - - void (*pcre_free)(void *); - - - - -DESCRIPTION - The PCRE library is a set of functions that implement regu- - lar expression pattern matching using the same syntax and - semantics as Perl 5, with just a few differences (see - - below). The current implementation corresponds to Perl - 5.005, with some additional features from later versions. - This includes some experimental, incomplete support for - UTF-8 encoded strings. Details of exactly what is and what - is not supported are given below. - - PCRE has its own native API, which is described in this - document. There is also a set of wrapper functions that - correspond to the POSIX regular expression API. These are - described in the pcreposix documentation. - - The native API function prototypes are defined in the header - file pcre.h, and on Unix systems the library itself is - called libpcre.a, so can be accessed by adding -lpcre to the - command for linking an application which calls it. The - header file defines the macros PCRE_MAJOR and PCRE_MINOR to - contain the major and minor release numbers for the library. - Applications can use these to include support for different - releases. - - The functions pcre_compile(), pcre_study(), and pcre_exec() - are used for compiling and matching regular expressions. - - The functions pcre_copy_substring(), pcre_get_substring(), - and pcre_get_substring_list() are convenience functions for - extracting captured substrings from a matched subject - string; pcre_free_substring() and pcre_free_substring_list() - are also provided, to free the memory used for extracted - strings. - - The function pcre_maketables() is used (optionally) to build - a set of character tables in the current locale for passing - to pcre_compile(). - - The function pcre_fullinfo() is used to find out information - about a compiled pattern; pcre_info() is an obsolete version - which returns only some of the available information, but is - retained for backwards compatibility. The function - pcre_version() returns a pointer to a string containing the - version of PCRE and its date of release. - - The global variables pcre_malloc and pcre_free initially - contain the entry points of the standard malloc() and free() - functions respectively. PCRE calls the memory management - functions via these variables, so a calling program can - replace them if it wishes to intercept the calls. This - should be done before calling any PCRE functions. - - - -MULTI-THREADING - The PCRE functions can be used in multi-threading - - - - - -SunOS 5.8 Last change: 2 - - - - applications, with the proviso that the memory management - functions pointed to by pcre_malloc and pcre_free are shared - by all threads. - - The compiled form of a regular expression is not altered - during matching, so the same compiled pattern can safely be - used by several threads at once. - - - -COMPILING A PATTERN - The function pcre_compile() is called to compile a pattern - into an internal form. The pattern is a C string terminated - by a binary zero, and is passed in the argument pattern. A - pointer to a single block of memory that is obtained via - pcre_malloc is returned. This contains the compiled code and - related data. The pcre type is defined for this for conveni- - ence, but in fact pcre is just a typedef for void, since the - contents of the block are not externally defined. It is up - to the caller to free the memory when it is no longer - required. - - The size of a compiled pattern is roughly proportional to - the length of the pattern string, except that each character - class (other than those containing just a single character, - negated or not) requires 33 bytes, and repeat quantifiers - with a minimum greater than one or a bounded maximum cause - the relevant portions of the compiled pattern to be repli- - cated. - - The options argument contains independent bits that affect - the compilation. It should be zero if no options are - required. Some of the options, in particular, those that are - compatible with Perl, can also be set and unset from within - the pattern (see the detailed description of regular expres- - sions below). For these options, the contents of the options - argument specifies their initial settings at the start of - compilation and execution. The PCRE_ANCHORED option can be - set at the time of matching as well as at compile time. - - If errptr is NULL, pcre_compile() returns NULL immediately. - Otherwise, if compilation of a pattern fails, pcre_compile() - returns NULL, and sets the variable pointed to by errptr to - point to a textual error message. The offset from the start - of the pattern to the character where the error was - discovered is placed in the variable pointed to by - erroffset, which must not be NULL. If it is, an immediate - error is given. - - If the final argument, tableptr, is NULL, PCRE uses a - default set of character tables which are built when it is - compiled, using the default C locale. Otherwise, tableptr - must be the result of a call to pcre_maketables(). See the - section on locale support below. - - The following option bits are defined in the header file: - - PCRE_ANCHORED - - If this bit is set, the pattern is forced to be "anchored", - that is, it is constrained to match only at the start of the - string which is being searched (the "subject string"). This - effect can also be achieved by appropriate constructs in the - pattern itself, which is the only way to do it in Perl. - - PCRE_CASELESS - - If this bit is set, letters in the pattern match both upper - and lower case letters. It is equivalent to Perl's /i - option. - - PCRE_DOLLAR_ENDONLY - - If this bit is set, a dollar metacharacter in the pattern - matches only at the end of the subject string. Without this - option, a dollar also matches immediately before the final - character if it is a newline (but not before any other new- - lines). The PCRE_DOLLAR_ENDONLY option is ignored if - PCRE_MULTILINE is set. There is no equivalent to this option - in Perl. - - PCRE_DOTALL - - If this bit is set, a dot metacharater in the pattern - matches all characters, including newlines. Without it, new- - lines are excluded. This option is equivalent to Perl's /s - option. A negative class such as [^a] always matches a new- - line character, independent of the setting of this option. - - PCRE_EXTENDED - - If this bit is set, whitespace data characters in the pat- - tern are totally ignored except when escaped or inside a - character class, and characters between an unescaped # out- - side a character class and the next newline character, - inclusive, are also ignored. This is equivalent to Perl's /x - option, and makes it possible to include comments inside - complicated patterns. Note, however, that this applies only - to data characters. Whitespace characters may never appear - within special character sequences in a pattern, for example - within the sequence (?( which introduces a conditional sub- - pattern. - - PCRE_EXTRA - - This option was invented in order to turn on additional - functionality of PCRE that is incompatible with Perl, but it - is currently of very little use. When set, any backslash in - a pattern that is followed by a letter that has no special - meaning causes an error, thus reserving these combinations - for future expansion. By default, as in Perl, a backslash - followed by a letter with no special meaning is treated as a - literal. There are at present no other features controlled - by this option. It can also be set by a (?X) option setting - within a pattern. - - PCRE_MULTILINE - - By default, PCRE treats the subject string as consisting of - a single "line" of characters (even if it actually contains - several newlines). The "start of line" metacharacter (^) - matches only at the start of the string, while the "end of - line" metacharacter ($) matches only at the end of the - string, or before a terminating newline (unless - PCRE_DOLLAR_ENDONLY is set). This is the same as Perl. - - When PCRE_MULTILINE it is set, the "start of line" and "end - of line" constructs match immediately following or immedi- - ately before any newline in the subject string, respec- - tively, as well as at the very start and end. This is - equivalent to Perl's /m option. If there are no "\n" charac- - ters in a subject string, or no occurrences of ^ or $ in a - pattern, setting PCRE_MULTILINE has no effect. - - PCRE_UNGREEDY - - This option inverts the "greediness" of the quantifiers so - that they are not greedy by default, but become greedy if - followed by "?". It is not compatible with Perl. It can also - be set by a (?U) option setting within the pattern. - - PCRE_UTF8 - - This option causes PCRE to regard both the pattern and the - subject as strings of UTF-8 characters instead of just byte - strings. However, it is available only if PCRE has been - built to include UTF-8 support. If not, the use of this - option provokes an error. Support for UTF-8 is new, experi- - mental, and incomplete. Details of exactly what it entails - are given below. - - - -STUDYING A PATTERN - When a pattern is going to be used several times, it is - worth spending more time analyzing it in order to speed up - the time taken for matching. The function pcre_study() takes - - a pointer to a compiled pattern as its first argument, and - returns a pointer to a pcre_extra block (another void - typedef) containing additional information about the pat- - tern; this can be passed to pcre_exec(). If no additional - information is available, NULL is returned. - - The second argument contains option bits. At present, no - options are defined for pcre_study(), and this argument - should always be zero. - - The third argument for pcre_study() is a pointer to an error - message. If studying succeeds (even if no data is returned), - the variable it points to is set to NULL. Otherwise it - points to a textual error message. - - At present, studying a pattern is useful only for non- - anchored patterns that do not have a single fixed starting - character. A bitmap of possible starting characters is - created. - - - -LOCALE SUPPORT - PCRE handles caseless matching, and determines whether char- - acters are letters, digits, or whatever, by reference to a - set of tables. The library contains a default set of tables - which is created in the default C locale when PCRE is com- - piled. This is used when the final argument of - pcre_compile() is NULL, and is sufficient for many applica- - tions. - - An alternative set of tables can, however, be supplied. Such - tables are built by calling the pcre_maketables() function, - which has no arguments, in the relevant locale. The result - can then be passed to pcre_compile() as often as necessary. - For example, to build and use tables that are appropriate - for the French locale (where accented characters with codes - greater than 128 are treated as letters), the following code - could be used: - - setlocale(LC_CTYPE, "fr"); - tables = pcre_maketables(); - re = pcre_compile(..., tables); - - The tables are built in memory that is obtained via - pcre_malloc. The pointer that is passed to pcre_compile is - saved with the compiled pattern, and the same tables are - used via this pointer by pcre_study() and pcre_exec(). Thus - for any single pattern, compilation, studying and matching - all happen in the same locale, but different patterns can be - compiled in different locales. It is the caller's responsi- - bility to ensure that the memory containing the tables - remains available for as long as it is needed. - - - -INFORMATION ABOUT A PATTERN - The pcre_fullinfo() function returns information about a - compiled pattern. It replaces the obsolete pcre_info() func- - tion, which is nevertheless retained for backwards compabil- - ity (and is documented below). - - The first argument for pcre_fullinfo() is a pointer to the - compiled pattern. The second argument is the result of - pcre_study(), or NULL if the pattern was not studied. The - third argument specifies which piece of information is - required, while the fourth argument is a pointer to a vari- - able to receive the data. The yield of the function is zero - for success, or one of the following negative numbers: - - PCRE_ERROR_NULL the argument code was NULL - the argument where was NULL - PCRE_ERROR_BADMAGIC the "magic number" was not found - PCRE_ERROR_BADOPTION the value of what was invalid - - The possible values for the third argument are defined in - pcre.h, and are as follows: - - PCRE_INFO_OPTIONS - - Return a copy of the options with which the pattern was com- - piled. The fourth argument should point to au unsigned long - int variable. These option bits are those specified in the - call to pcre_compile(), modified by any top-level option - settings within the pattern itself, and with the - PCRE_ANCHORED bit forcibly set if the form of the pattern - implies that it can match only at the start of a subject - string. - - PCRE_INFO_SIZE - - Return the size of the compiled pattern, that is, the value - that was passed as the argument to pcre_malloc() when PCRE - was getting memory in which to place the compiled data. The - fourth argument should point to a size_t variable. - - PCRE_INFO_CAPTURECOUNT - - Return the number of capturing subpatterns in the pattern. - The fourth argument should point to an int variable. - - PCRE_INFO_BACKREFMAX - - Return the number of the highest back reference in the - pattern. The fourth argument should point to an int vari- - able. Zero is returned if there are no back references. - - PCRE_INFO_FIRSTCHAR - - Return information about the first character of any matched - string, for a non-anchored pattern. If there is a fixed - first character, e.g. from a pattern such as - (cat|cow|coyote), it is returned in the integer pointed to - by where. Otherwise, if either - - (a) the pattern was compiled with the PCRE_MULTILINE option, - and every branch starts with "^", or - - (b) every branch of the pattern starts with ".*" and - PCRE_DOTALL is not set (if it were set, the pattern would be - anchored), - - -1 is returned, indicating that the pattern matches only at - the start of a subject string or after any "\n" within the - string. Otherwise -2 is returned. For anchored patterns, -2 - is returned. - - PCRE_INFO_FIRSTTABLE - - If the pattern was studied, and this resulted in the con- - struction of a 256-bit table indicating a fixed set of char- - acters for the first character in any matching string, a - pointer to the table is returned. Otherwise NULL is - returned. The fourth argument should point to an unsigned - char * variable. - - PCRE_INFO_LASTLITERAL - - For a non-anchored pattern, return the value of the right- - most literal character which must exist in any matched - string, other than at its start. The fourth argument should - point to an int variable. If there is no such character, or - if the pattern is anchored, -1 is returned. For example, for - the pattern /a\d+z\d+/ the returned value is 'z'. - - The pcre_info() function is now obsolete because its inter- - face is too restrictive to return all the available data - about a compiled pattern. New programs should use - pcre_fullinfo() instead. The yield of pcre_info() is the - number of capturing subpatterns, or one of the following - negative numbers: - - PCRE_ERROR_NULL the argument code was NULL - PCRE_ERROR_BADMAGIC the "magic number" was not found - - If the optptr argument is not NULL, a copy of the options - with which the pattern was compiled is placed in the integer - it points to (see PCRE_INFO_OPTIONS above). - - If the pattern is not anchored and the firstcharptr argument - is not NULL, it is used to pass back information about the - first character of any matched string (see - PCRE_INFO_FIRSTCHAR above). - - - -MATCHING A PATTERN - The function pcre_exec() is called to match a subject string - against a pre-compiled pattern, which is passed in the code - argument. If the pattern has been studied, the result of the - study should be passed in the extra argument. Otherwise this - must be NULL. - - The PCRE_ANCHORED option can be passed in the options argu- - ment, whose unused bits must be zero. However, if a pattern - was compiled with PCRE_ANCHORED, or turned out to be - anchored by virtue of its contents, it cannot be made - unachored at matching time. - - There are also three further options that can be set only at - matching time: - - PCRE_NOTBOL - - The first character of the string is not the beginning of a - line, so the circumflex metacharacter should not match - before it. Setting this without PCRE_MULTILINE (at compile - time) causes circumflex never to match. - - PCRE_NOTEOL - - The end of the string is not the end of a line, so the dol- - lar metacharacter should not match it nor (except in multi- - line mode) a newline immediately before it. Setting this - without PCRE_MULTILINE (at compile time) causes dollar never - to match. - - PCRE_NOTEMPTY - - An empty string is not considered to be a valid match if - this option is set. If there are alternatives in the pat- - tern, they are tried. If all the alternatives match the - empty string, the entire match fails. For example, if the - pattern - - a?b? - - is applied to a string not beginning with "a" or "b", it - matches the empty string at the start of the subject. With - PCRE_NOTEMPTY set, this match is not valid, so PCRE searches - further into the string for occurrences of "a" or "b". - - Perl has no direct equivalent of PCRE_NOTEMPTY, but it does - make a special case of a pattern match of the empty string - within its split() function, and when using the /g modifier. - It is possible to emulate Perl's behaviour after matching a - null string by first trying the match again at the same - offset with PCRE_NOTEMPTY set, and then if that fails by - advancing the starting offset (see below) and trying an - ordinary match again. - - The subject string is passed as a pointer in subject, a - length in length, and a starting offset in startoffset. - Unlike the pattern string, it may contain binary zero char- - acters. When the starting offset is zero, the search for a - match starts at the beginning of the subject, and this is by - far the most common case. - - A non-zero starting offset is useful when searching for - another match in the same subject by calling pcre_exec() - again after a previous success. Setting startoffset differs - from just passing over a shortened string and setting - PCRE_NOTBOL in the case of a pattern that begins with any - kind of lookbehind. For example, consider the pattern - - \Biss\B - - which finds occurrences of "iss" in the middle of words. (\B - matches only if the current position in the subject is not a - word boundary.) When applied to the string "Mississipi" the - first call to pcre_exec() finds the first occurrence. If - pcre_exec() is called again with just the remainder of the - subject, namely "issipi", it does not match, because \B is - always false at the start of the subject, which is deemed to - be a word boundary. However, if pcre_exec() is passed the - entire string again, but with startoffset set to 4, it finds - the second occurrence of "iss" because it is able to look - behind the starting point to discover that it is preceded by - a letter. - - If a non-zero starting offset is passed when the pattern is - anchored, one attempt to match at the given offset is tried. - This can only succeed if the pattern does not require the - match to be at the start of the subject. - - In general, a pattern matches a certain portion of the sub- - ject, and in addition, further substrings from the subject - may be picked out by parts of the pattern. Following the - usage in Jeffrey Friedl's book, this is called "capturing" - in what follows, and the phrase "capturing subpattern" is - used for a fragment of a pattern that picks out a substring. - PCRE supports several other kinds of parenthesized subpat- - tern that do not cause substrings to be captured. - - Captured substrings are returned to the caller via a vector - of integer offsets whose address is passed in ovector. The - number of elements in the vector is passed in ovecsize. The - first two-thirds of the vector is used to pass back captured - substrings, each substring using a pair of integers. The - remaining third of the vector is used as workspace by - pcre_exec() while matching capturing subpatterns, and is not - available for passing back information. The length passed in - ovecsize should always be a multiple of three. If it is not, - it is rounded down. - - When a match has been successful, information about captured - substrings is returned in pairs of integers, starting at the - beginning of ovector, and continuing up to two-thirds of its - length at the most. The first element of a pair is set to - the offset of the first character in a substring, and the - second is set to the offset of the first character after the - end of a substring. The first pair, ovector[0] and ovec- - tor[1], identify the portion of the subject string matched - by the entire pattern. The next pair is used for the first - capturing subpattern, and so on. The value returned by - pcre_exec() is the number of pairs that have been set. If - there are no capturing subpatterns, the return value from a - successful match is 1, indicating that just the first pair - of offsets has been set. - - Some convenience functions are provided for extracting the - captured substrings as separate strings. These are described - in the following section. - - It is possible for an capturing subpattern number n+1 to - match some part of the subject when subpattern n has not - been used at all. For example, if the string "abc" is - matched against the pattern (a|(z))(bc) subpatterns 1 and 3 - are matched, but 2 is not. When this happens, both offset - values corresponding to the unused subpattern are set to -1. - - If a capturing subpattern is matched repeatedly, it is the - last portion of the string that it matched that gets - returned. - - If the vector is too small to hold all the captured sub- - strings, it is used as far as possible (up to two-thirds of - its length), and the function returns a value of zero. In - particular, if the substring offsets are not of interest, - pcre_exec() may be called with ovector passed as NULL and - ovecsize as zero. However, if the pattern contains back - references and the ovector isn't big enough to remember the - related substrings, PCRE has to get additional memory for - use during matching. Thus it is usually advisable to supply - an ovector. - - Note that pcre_info() can be used to find out how many cap- - turing subpatterns there are in a compiled pattern. The - smallest size for ovector that will allow for n captured - substrings in addition to the offsets of the substring - matched by the whole pattern is (n+1)*3. - - If pcre_exec() fails, it returns a negative number. The fol- - lowing are defined in the header file: - - PCRE_ERROR_NOMATCH (-1) - - The subject string did not match the pattern. - - PCRE_ERROR_NULL (-2) - - Either code or subject was passed as NULL, or ovector was - NULL and ovecsize was not zero. - - PCRE_ERROR_BADOPTION (-3) - - An unrecognized bit was set in the options argument. - - PCRE_ERROR_BADMAGIC (-4) - - PCRE stores a 4-byte "magic number" at the start of the com- - piled code, to catch the case when it is passed a junk - pointer. This is the error it gives when the magic number - isn't present. - - PCRE_ERROR_UNKNOWN_NODE (-5) - - While running the pattern match, an unknown item was encoun- - tered in the compiled pattern. This error could be caused by - a bug in PCRE or by overwriting of the compiled pattern. - - PCRE_ERROR_NOMEMORY (-6) - - If a pattern contains back references, but the ovector that - is passed to pcre_exec() is not big enough to remember the - referenced substrings, PCRE gets a block of memory at the - start of matching to use for this purpose. If the call via - pcre_malloc() fails, this error is given. The memory is - freed at the end of matching. - - - -EXTRACTING CAPTURED SUBSTRINGS - Captured substrings can be accessed directly by using the - - - - - -SunOS 5.8 Last change: 12 - - - - offsets returned by pcre_exec() in ovector. For convenience, - the functions pcre_copy_substring(), pcre_get_substring(), - and pcre_get_substring_list() are provided for extracting - captured substrings as new, separate, zero-terminated - strings. A substring that contains a binary zero is - correctly extracted and has a further zero added on the end, - but the result does not, of course, function as a C string. - - The first three arguments are the same for all three func- - tions: subject is the subject string which has just been - successfully matched, ovector is a pointer to the vector of - integer offsets that was passed to pcre_exec(), and - stringcount is the number of substrings that were captured - by the match, including the substring that matched the - entire regular expression. This is the value returned by - pcre_exec if it is greater than zero. If pcre_exec() - returned zero, indicating that it ran out of space in ovec- - tor, the value passed as stringcount should be the size of - the vector divided by three. - - The functions pcre_copy_substring() and pcre_get_substring() - extract a single substring, whose number is given as string- - number. A value of zero extracts the substring that matched - the entire pattern, while higher values extract the captured - substrings. For pcre_copy_substring(), the string is placed - in buffer, whose length is given by buffersize, while for - pcre_get_substring() a new block of memory is obtained via - pcre_malloc, and its address is returned via stringptr. The - yield of the function is the length of the string, not - including the terminating zero, or one of - - PCRE_ERROR_NOMEMORY (-6) - - The buffer was too small for pcre_copy_substring(), or the - attempt to get memory failed for pcre_get_substring(). - - PCRE_ERROR_NOSUBSTRING (-7) - - There is no substring whose number is stringnumber. - - The pcre_get_substring_list() function extracts all avail- - able substrings and builds a list of pointers to them. All - this is done in a single block of memory which is obtained - via pcre_malloc. The address of the memory block is returned - via listptr, which is also the start of the list of string - pointers. The end of the list is marked by a NULL pointer. - The yield of the function is zero if all went well, or - - PCRE_ERROR_NOMEMORY (-6) - - if the attempt to get the memory block failed. - - When any of these functions encounter a substring that is - unset, which can happen when capturing subpattern number n+1 - matches some part of the subject, but subpattern n has not - been used at all, they return an empty string. This can be - distinguished from a genuine zero-length substring by - inspecting the appropriate offset in ovector, which is nega- - tive for unset substrings. - - The two convenience functions pcre_free_substring() and - pcre_free_substring_list() can be used to free the memory - returned by a previous call of pcre_get_substring() or - pcre_get_substring_list(), respectively. They do nothing - more than call the function pointed to by pcre_free, which - of course could be called directly from a C program. How- - ever, PCRE is used in some situations where it is linked via - a special interface to another programming language which - cannot use pcre_free directly; it is for these cases that - the functions are provided. - - - -LIMITATIONS - There are some size limitations in PCRE but it is hoped that - they will never in practice be relevant. The maximum length - of a compiled pattern is 65539 (sic) bytes. All values in - repeating quantifiers must be less than 65536. The maximum - number of capturing subpatterns is 99. The maximum number - of all parenthesized subpatterns, including capturing sub- - patterns, assertions, and other types of subpattern, is 200. - - The maximum length of a subject string is the largest posi- - tive number that an integer variable can hold. However, PCRE - uses recursion to handle subpatterns and indefinite repeti- - tion. This means that the available stack space may limit - the size of a subject string that can be processed by cer- - tain patterns. - - - -DIFFERENCES FROM PERL - The differences described here are with respect to Perl - 5.005. - - 1. By default, a whitespace character is any character that - the C library function isspace() recognizes, though it is - possible to compile PCRE with alternative character type - tables. Normally isspace() matches space, formfeed, newline, - carriage return, horizontal tab, and vertical tab. Perl 5 no - longer includes vertical tab in its set of whitespace char- - acters. The \v escape that was in the Perl documentation for - a long time was never in fact recognized. However, the char- - acter itself was treated as whitespace at least up to 5.002. - In 5.004 and 5.005 it does not match \s. - - 2. PCRE does not allow repeat quantifiers on lookahead - assertions. Perl permits them, but they do not mean what you - might think. For example, (?!a){3} does not assert that the - next three characters are not "a". It just asserts that the - next character is not "a" three times. - - 3. Capturing subpatterns that occur inside negative looka- - head assertions are counted, but their entries in the - offsets vector are never set. Perl sets its numerical vari- - ables from any such patterns that are matched before the - assertion fails to match something (thereby succeeding), but - only if the negative lookahead assertion contains just one - branch. - - 4. Though binary zero characters are supported in the sub- - ject string, they are not allowed in a pattern string - because it is passed as a normal C string, terminated by - zero. The escape sequence "\0" can be used in the pattern to - represent a binary zero. - - 5. The following Perl escape sequences are not supported: - \l, \u, \L, \U, \E, \Q. In fact these are implemented by - Perl's general string-handling and are not part of its pat- - tern matching engine. - - 6. The Perl \G assertion is not supported as it is not - relevant to single pattern matches. - - 7. Fairly obviously, PCRE does not support the (?{code}) and - (?p{code}) constructions. However, there is some experimen- - tal support for recursive patterns using the non-Perl item - (?R). - - 8. There are at the time of writing some oddities in Perl - 5.005_02 concerned with the settings of captured strings - when part of a pattern is repeated. For example, matching - "aba" against the pattern /^(a(b)?)+$/ sets $2 to the value - "b", but matching "aabbaa" against /^(aa(bb)?)+$/ leaves $2 - unset. However, if the pattern is changed to - /^(aa(b(b))?)+$/ then $2 (and $3) are set. - - In Perl 5.004 $2 is set in both cases, and that is also true - of PCRE. If in the future Perl changes to a consistent state - that is different, PCRE may change to follow. - - 9. Another as yet unresolved discrepancy is that in Perl - 5.005_02 the pattern /^(a)?(?(1)a|b)+$/ matches the string - "a", whereas in PCRE it does not. However, in both Perl and - PCRE /^(a)?a/ matched against "a" leaves $1 unset. - - 10. PCRE provides some extensions to the Perl regular - expression facilities: - - (a) Although lookbehind assertions must match fixed length - strings, each alternative branch of a lookbehind assertion - can match a different length of string. Perl 5.005 requires - them all to have the same length. - - (b) If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not - set, the $ meta- character matches only at the very end of - the string. - - (c) If PCRE_EXTRA is set, a backslash followed by a letter - with no special meaning is faulted. - - (d) If PCRE_UNGREEDY is set, the greediness of the repeti- - tion quantifiers is inverted, that is, by default they are - not greedy, but if followed by a question mark they are. - - (e) PCRE_ANCHORED can be used to force a pattern to be tried - only at the start of the subject. - - (f) The PCRE_NOTBOL, PCRE_NOTEOL, and PCRE_NOTEMPTY options - for pcre_exec() have no Perl equivalents. - - (g) The (?R) construct allows for recursive pattern matching - (Perl 5.6 can do this using the (?p{code}) construct, which - PCRE cannot of course support.) - - - -REGULAR EXPRESSION DETAILS - The syntax and semantics of the regular expressions sup- - ported by PCRE are described below. Regular expressions are - also described in the Perl documentation and in a number of - other books, some of which have copious examples. Jeffrey - Friedl's "Mastering Regular Expressions", published by - O'Reilly (ISBN 1-56592-257), covers them in great detail. - - The description here is intended as reference documentation. - The basic operation of PCRE is on strings of bytes. However, - there is the beginnings of some support for UTF-8 character - strings. To use this support you must configure PCRE to - include it, and then call pcre_compile() with the PCRE_UTF8 - option. How this affects the pattern matching is described - in the final section of this document. - - A regular expression is a pattern that is matched against a - subject string from left to right. Most characters stand for - themselves in a pattern, and match the corresponding charac- - ters in the subject. As a trivial example, the pattern - - The quick brown fox - - matches a portion of a subject string that is identical to - itself. The power of regular expressions comes from the - ability to include alternatives and repetitions in the pat- - tern. These are encoded in the pattern by the use of meta- - characters, which do not stand for themselves but instead - are interpreted in some special way. - - There are two different sets of meta-characters: those that - are recognized anywhere in the pattern except within square - brackets, and those that are recognized in square brackets. - Outside square brackets, the meta-characters are as follows: - - \ general escape character with several uses - ^ assert start of subject (or line, in multiline - mode) - $ assert end of subject (or line, in multiline mode) - . match any character except newline (by default) - [ start character class definition - | start of alternative branch - ( start subpattern - ) end subpattern - ? extends the meaning of ( - also 0 or 1 quantifier - also quantifier minimizer - * 0 or more quantifier - + 1 or more quantifier - { start min/max quantifier - - Part of a pattern that is in square brackets is called a - "character class". In a character class the only meta- - characters are: - - \ general escape character - ^ negate the class, but only if the first character - - indicates character range - ] terminates the character class - - The following sections describe the use of each of the - meta-characters. - - - -BACKSLASH - The backslash character has several uses. Firstly, if it is - followed by a non-alphameric character, it takes away any - special meaning that character may have. This use of - backslash as an escape character applies both inside and - outside character classes. - - For example, if you want to match a "*" character, you write - "\*" in the pattern. This applies whether or not the follow- - ing character would otherwise be interpreted as a meta- - character, so it is always safe to precede a non-alphameric - with "\" to specify that it stands for itself. In particu- - lar, if you want to match a backslash, you write "\\". - - If a pattern is compiled with the PCRE_EXTENDED option, whi- - tespace in the pattern (other than in a character class) and - characters between a "#" outside a character class and the - next newline character are ignored. An escaping backslash - can be used to include a whitespace or "#" character as part - of the pattern. - - A second use of backslash provides a way of encoding non- - printing characters in patterns in a visible manner. There - is no restriction on the appearance of non-printing charac- - ters, apart from the binary zero that terminates a pattern, - but when a pattern is being prepared by text editing, it is - usually easier to use one of the following escape sequences - than the binary character it represents: - - \a alarm, that is, the BEL character (hex 07) - \cx "control-x", where x is any character - \e escape (hex 1B) - \f formfeed (hex 0C) - \n newline (hex 0A) - \r carriage return (hex 0D) - \t tab (hex 09) - \xhh character with hex code hh - \ddd character with octal code ddd, or backreference - - The precise effect of "\cx" is as follows: if "x" is a lower - case letter, it is converted to upper case. Then bit 6 of - the character (hex 40) is inverted. Thus "\cz" becomes hex - 1A, but "\c{" becomes hex 3B, while "\c;" becomes hex 7B. - - After "\x", up to two hexadecimal digits are read (letters - can be in upper or lower case). - - After "\0" up to two further octal digits are read. In both - cases, if there are fewer than two digits, just those that - are present are used. Thus the sequence "\0\x\07" specifies - two binary zeros followed by a BEL character. Make sure you - supply two digits after the initial zero if the character - that follows is itself an octal digit. - - The handling of a backslash followed by a digit other than 0 - is complicated. Outside a character class, PCRE reads it - and any following digits as a decimal number. If the number - is less than 10, or if there have been at least that many - previous capturing left parentheses in the expression, the - entire sequence is taken as a back reference. A description - of how this works is given later, following the discussion - of parenthesized subpatterns. - - Inside a character class, or if the decimal number is - greater than 9 and there have not been that many capturing - subpatterns, PCRE re-reads up to three octal digits follow- - ing the backslash, and generates a single byte from the - least significant 8 bits of the value. Any subsequent digits - stand for themselves. For example: - - \040 is another way of writing a space - \40 is the same, provided there are fewer than 40 - previous capturing subpatterns - \7 is always a back reference - \11 might be a back reference, or another way of - writing a tab - \011 is always a tab - \0113 is a tab followed by the character "3" - \113 is the character with octal code 113 (since there - can be no more than 99 back references) - \377 is a byte consisting entirely of 1 bits - \81 is either a back reference, or a binary zero - followed by the two characters "8" and "1" - - Note that octal values of 100 or greater must not be intro- - duced by a leading zero, because no more than three octal - digits are ever read. - - All the sequences that define a single byte value can be - used both inside and outside character classes. In addition, - inside a character class, the sequence "\b" is interpreted - as the backspace character (hex 08). Outside a character - class it has a different meaning (see below). - - The third use of backslash is for specifying generic charac- - ter types: - - \d any decimal digit - \D any character that is not a decimal digit - \s any whitespace character - \S any character that is not a whitespace character - \w any "word" character - \W any "non-word" character - - Each pair of escape sequences partitions the complete set of - characters into two disjoint sets. Any given character - matches one, and only one, of each pair. - - A "word" character is any letter or digit or the underscore - character, that is, any character which can be part of a - Perl "word". The definition of letters and digits is con- - trolled by PCRE's character tables, and may vary if locale- - specific matching is taking place (see "Locale support" - above). For example, in the "fr" (French) locale, some char- - acter codes greater than 128 are used for accented letters, - and these are matched by \w. - - These character type sequences can appear both inside and - outside character classes. They each match one character of - the appropriate type. If the current matching point is at - the end of the subject string, all of them fail, since there - is no character to match. - - The fourth use of backslash is for certain simple asser- - tions. An assertion specifies a condition that has to be met - at a particular point in a match, without consuming any - characters from the subject string. The use of subpatterns - for more complicated assertions is described below. The - backslashed assertions are - - \b word boundary - \B not a word boundary - \A start of subject (independent of multiline mode) - \Z end of subject or newline at end (independent of - multiline mode) - \z end of subject (independent of multiline mode) - - These assertions may not appear in character classes (but - note that "\b" has a different meaning, namely the backspace - character, inside a character class). - - A word boundary is a position in the subject string where - the current character and the previous character do not both - match \w or \W (i.e. one matches \w and the other matches - \W), or the start or end of the string if the first or last - character matches \w, respectively. - - The \A, \Z, and \z assertions differ from the traditional - circumflex and dollar (described below) in that they only - ever match at the very start and end of the subject string, - whatever options are set. They are not affected by the - PCRE_NOTBOL or PCRE_NOTEOL options. If the startoffset argu- - ment of pcre_exec() is non-zero, \A can never match. The - difference between \Z and \z is that \Z matches before a - newline that is the last character of the string as well as - at the end of the string, whereas \z matches only at the - end. - - - -CIRCUMFLEX AND DOLLAR - Outside a character class, in the default matching mode, the - circumflex character is an assertion which is true only if - the current matching point is at the start of the subject - - string. If the startoffset argument of pcre_exec() is non- - zero, circumflex can never match. Inside a character class, - circumflex has an entirely different meaning (see below). - - Circumflex need not be the first character of the pattern if - a number of alternatives are involved, but it should be the - first thing in each alternative in which it appears if the - pattern is ever to match that branch. If all possible alter- - natives start with a circumflex, that is, if the pattern is - constrained to match only at the start of the subject, it is - said to be an "anchored" pattern. (There are also other con- - structs that can cause a pattern to be anchored.) - - A dollar character is an assertion which is true only if the - current matching point is at the end of the subject string, - or immediately before a newline character that is the last - character in the string (by default). Dollar need not be the - last character of the pattern if a number of alternatives - are involved, but it should be the last item in any branch - in which it appears. Dollar has no special meaning in a - character class. - - The meaning of dollar can be changed so that it matches only - at the very end of the string, by setting the - PCRE_DOLLAR_ENDONLY option at compile or matching time. This - does not affect the \Z assertion. - - The meanings of the circumflex and dollar characters are - changed if the PCRE_MULTILINE option is set. When this is - the case, they match immediately after and immediately - before an internal "\n" character, respectively, in addition - to matching at the start and end of the subject string. For - example, the pattern /^abc$/ matches the subject string - "def\nabc" in multiline mode, but not otherwise. Conse- - quently, patterns that are anchored in single line mode - because all branches start with "^" are not anchored in mul- - tiline mode, and a match for circumflex is possible when the - startoffset argument of pcre_exec() is non-zero. The - PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE is - set. - - Note that the sequences \A, \Z, and \z can be used to match - the start and end of the subject in both modes, and if all - branches of a pattern start with \A is it always anchored, - whether PCRE_MULTILINE is set or not. - - - -FULL STOP (PERIOD, DOT) - Outside a character class, a dot in the pattern matches any - one character in the subject, including a non-printing char- - acter, but not (by default) newline. If the PCRE_DOTALL - - option is set, dots match newlines as well. The handling of - dot is entirely independent of the handling of circumflex - and dollar, the only relationship being that they both - involve newline characters. Dot has no special meaning in a - character class. - - - -SQUARE BRACKETS - An opening square bracket introduces a character class, ter- - minated by a closing square bracket. A closing square - bracket on its own is not special. If a closing square - bracket is required as a member of the class, it should be - the first data character in the class (after an initial cir- - cumflex, if present) or escaped with a backslash. - - A character class matches a single character in the subject; - the character must be in the set of characters defined by - the class, unless the first character in the class is a cir- - cumflex, in which case the subject character must not be in - the set defined by the class. If a circumflex is actually - required as a member of the class, ensure it is not the - first character, or escape it with a backslash. - - For example, the character class [aeiou] matches any lower - case vowel, while [^aeiou] matches any character that is not - a lower case vowel. Note that a circumflex is just a con- - venient notation for specifying the characters which are in - the class by enumerating those that are not. It is not an - assertion: it still consumes a character from the subject - string, and fails if the current pointer is at the end of - the string. - - When caseless matching is set, any letters in a class - represent both their upper case and lower case versions, so - for example, a caseless [aeiou] matches "A" as well as "a", - and a caseless [^aeiou] does not match "A", whereas a case- - ful version would. - - The newline character is never treated in any special way in - character classes, whatever the setting of the PCRE_DOTALL - or PCRE_MULTILINE options is. A class such as [^a] will - always match a newline. - - The minus (hyphen) character can be used to specify a range - of characters in a character class. For example, [d-m] - matches any letter between d and m, inclusive. If a minus - character is required in a class, it must be escaped with a - backslash or appear in a position where it cannot be inter- - preted as indicating a range, typically as the first or last - character in the class. - - It is not possible to have the literal character "]" as the - end character of a range. A pattern such as [W-]46] is - interpreted as a class of two characters ("W" and "-") fol- - lowed by a literal string "46]", so it would match "W46]" or - "-46]". However, if the "]" is escaped with a backslash it - is interpreted as the end of range, so [W-\]46] is inter- - preted as a single class containing a range followed by two - separate characters. The octal or hexadecimal representation - of "]" can also be used to end a range. - - Ranges operate in ASCII collating sequence. They can also be - used for characters specified numerically, for example - [\000-\037]. If a range that includes letters is used when - caseless matching is set, it matches the letters in either - case. For example, [W-c] is equivalent to [][\^_`wxyzabc], - matched caselessly, and if character tables for the "fr" - locale are in use, [\xc8-\xcb] matches accented E characters - in both cases. - - The character types \d, \D, \s, \S, \w, and \W may also - appear in a character class, and add the characters that - they match to the class. For example, [\dABCDEF] matches any - hexadecimal digit. A circumflex can conveniently be used - with the upper case character types to specify a more res- - tricted set of characters than the matching lower case type. - For example, the class [^\W_] matches any letter or digit, - but not underscore. - - All non-alphameric characters other than \, -, ^ (at the - start) and the terminating ] are non-special in character - classes, but it does no harm if they are escaped. - - - -POSIX CHARACTER CLASSES - Perl 5.6 (not yet released at the time of writing) is going - to support the POSIX notation for character classes, which - uses names enclosed by [: and :] within the enclosing - square brackets. PCRE supports this notation. For example, - - [01[:alpha:]%] - - matches "0", "1", any alphabetic character, or "%". The sup- - ported class names are - - alnum letters and digits - alpha letters - ascii character codes 0 - 127 - cntrl control characters - digit decimal digits (same as \d) - graph printing characters, excluding space - lower lower case letters - print printing characters, including space - punct printing characters, excluding letters and digits - space white space (same as \s) - upper upper case letters - word "word" characters (same as \w) - xdigit hexadecimal digits - - The names "ascii" and "word" are Perl extensions. Another - Perl extension is negation, which is indicated by a ^ char- - acter after the colon. For example, - - [12[:^digit:]] - - matches "1", "2", or any non-digit. PCRE (and Perl) also - recogize the POSIX syntax [.ch.] and [=ch=] where "ch" is a - "collating element", but these are not supported, and an - error is given if they are encountered. - - - -VERTICAL BAR - Vertical bar characters are used to separate alternative - patterns. For example, the pattern - - gilbert|sullivan - - matches either "gilbert" or "sullivan". Any number of alter- - natives may appear, and an empty alternative is permitted - (matching the empty string). The matching process tries - each alternative in turn, from left to right, and the first - one that succeeds is used. If the alternatives are within a - subpattern (defined below), "succeeds" means matching the - rest of the main pattern as well as the alternative in the - subpattern. - - - -INTERNAL OPTION SETTING - The settings of PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, - and PCRE_EXTENDED can be changed from within the pattern by - a sequence of Perl option letters enclosed between "(?" and - ")". The option letters are - - i for PCRE_CASELESS - m for PCRE_MULTILINE - s for PCRE_DOTALL - x for PCRE_EXTENDED - - For example, (?im) sets caseless, multiline matching. It is - also possible to unset these options by preceding the letter - with a hyphen, and a combined setting and unsetting such as - (?im-sx), which sets PCRE_CASELESS and PCRE_MULTILINE while - unsetting PCRE_DOTALL and PCRE_EXTENDED, is also permitted. - If a letter appears both before and after the hyphen, the - option is unset. - - The scope of these option changes depends on where in the - pattern the setting occurs. For settings that are outside - any subpattern (defined below), the effect is the same as if - the options were set or unset at the start of matching. The - following patterns all behave in exactly the same way: - - (?i)abc - a(?i)bc - ab(?i)c - abc(?i) - - which in turn is the same as compiling the pattern abc with - PCRE_CASELESS set. In other words, such "top level" set- - tings apply to the whole pattern (unless there are other - changes inside subpatterns). If there is more than one set- - ting of the same option at top level, the rightmost setting - is used. - - If an option change occurs inside a subpattern, the effect - is different. This is a change of behaviour in Perl 5.005. - An option change inside a subpattern affects only that part - of the subpattern that follows it, so - - (a(?i)b)c - - matches abc and aBc and no other strings (assuming - PCRE_CASELESS is not used). By this means, options can be - made to have different settings in different parts of the - pattern. Any changes made in one alternative do carry on - into subsequent branches within the same subpattern. For - example, - - (a(?i)b|c) - - matches "ab", "aB", "c", and "C", even though when matching - "C" the first branch is abandoned before the option setting. - This is because the effects of option settings happen at - compile time. There would be some very weird behaviour oth- - erwise. - - The PCRE-specific options PCRE_UNGREEDY and PCRE_EXTRA can - be changed in the same way as the Perl-compatible options by - using the characters U and X respectively. The (?X) flag - setting is special in that it must always occur earlier in - the pattern than any of the additional features it turns on, - even when it is at top level. It is best put at the start. - - - -SUBPATTERNS - Subpatterns are delimited by parentheses (round brackets), - which can be nested. Marking part of a pattern as a subpat- - tern does two things: - - 1. It localizes a set of alternatives. For example, the pat- - tern - - cat(aract|erpillar|) - - matches one of the words "cat", "cataract", or "caterpil- - lar". Without the parentheses, it would match "cataract", - "erpillar" or the empty string. - - 2. It sets up the subpattern as a capturing subpattern (as - defined above). When the whole pattern matches, that por- - tion of the subject string that matched the subpattern is - passed back to the caller via the ovector argument of - pcre_exec(). Opening parentheses are counted from left to - right (starting from 1) to obtain the numbers of the captur- - ing subpatterns. - - For example, if the string "the red king" is matched against - the pattern - - the ((red|white) (king|queen)) - - the captured substrings are "red king", "red", and "king", - and are numbered 1, 2, and 3. - - The fact that plain parentheses fulfil two functions is not - always helpful. There are often times when a grouping sub- - pattern is required without a capturing requirement. If an - opening parenthesis is followed by "?:", the subpattern does - not do any capturing, and is not counted when computing the - number of any subsequent capturing subpatterns. For example, - if the string "the white queen" is matched against the pat- - tern - - the ((?:red|white) (king|queen)) - - the captured substrings are "white queen" and "queen", and - are numbered 1 and 2. The maximum number of captured sub- - strings is 99, and the maximum number of all subpatterns, - both capturing and non-capturing, is 200. - - As a convenient shorthand, if any option settings are - required at the start of a non-capturing subpattern, the - option letters may appear between the "?" and the ":". Thus - the two patterns - - (?i:saturday|sunday) - (?:(?i)saturday|sunday) - - match exactly the same set of strings. Because alternative - branches are tried from left to right, and options are not - reset until the end of the subpattern is reached, an option - setting in one branch does affect subsequent branches, so - the above patterns match "SUNDAY" as well as "Saturday". - - - -REPETITION - Repetition is specified by quantifiers, which can follow any - of the following items: - - a single character, possibly escaped - the . metacharacter - a character class - a back reference (see next section) - a parenthesized subpattern (unless it is an assertion - - see below) - - The general repetition quantifier specifies a minimum and - maximum number of permitted matches, by giving the two - numbers in curly brackets (braces), separated by a comma. - The numbers must be less than 65536, and the first must be - less than or equal to the second. For example: - - z{2,4} - - matches "zz", "zzz", or "zzzz". A closing brace on its own - is not a special character. If the second number is omitted, - but the comma is present, there is no upper limit; if the - second number and the comma are both omitted, the quantifier - specifies an exact number of required matches. Thus - - [aeiou]{3,} - - matches at least 3 successive vowels, but may match many - more, while - - \d{8} - - matches exactly 8 digits. An opening curly bracket that - appears in a position where a quantifier is not allowed, or - one that does not match the syntax of a quantifier, is taken - as a literal character. For example, {,6} is not a quantif- - ier, but a literal string of four characters. - - The quantifier {0} is permitted, causing the expression to - behave as if the previous item and the quantifier were not - present. - - For convenience (and historical compatibility) the three - most common quantifiers have single-character abbreviations: - - * is equivalent to {0,} - + is equivalent to {1,} - ? is equivalent to {0,1} - - It is possible to construct infinite loops by following a - subpattern that can match no characters with a quantifier - that has no upper limit, for example: - - (a?)* - - Earlier versions of Perl and PCRE used to give an error at - compile time for such patterns. However, because there are - cases where this can be useful, such patterns are now - accepted, but if any repetition of the subpattern does in - fact match no characters, the loop is forcibly broken. - - By default, the quantifiers are "greedy", that is, they - match as much as possible (up to the maximum number of per- - mitted times), without causing the rest of the pattern to - fail. The classic example of where this gives problems is in - trying to match comments in C programs. These appear between - the sequences /* and */ and within the sequence, individual - * and / characters may appear. An attempt to match C com- - ments by applying the pattern - - /\*.*\*/ - - to the string - - /* first command */ not comment /* second comment */ - - fails, because it matches the entire string owing to the - greediness of the .* item. - - However, if a quantifier is followed by a question mark, it - ceases to be greedy, and instead matches the minimum number - of times possible, so the pattern - - /\*.*?\*/ - - does the right thing with the C comments. The meaning of the - various quantifiers is not otherwise changed, just the pre- - ferred number of matches. Do not confuse this use of ques- - tion mark with its use as a quantifier in its own right. - Because it has two uses, it can sometimes appear doubled, as - in - - \d??\d - - which matches one digit by preference, but can match two if - that is the only way the rest of the pattern matches. - - If the PCRE_UNGREEDY option is set (an option which is not - available in Perl), the quantifiers are not greedy by - default, but individual ones can be made greedy by following - them with a question mark. In other words, it inverts the - default behaviour. - - When a parenthesized subpattern is quantified with a minimum - repeat count that is greater than 1 or with a limited max- - imum, more store is required for the compiled pattern, in - proportion to the size of the minimum or maximum. - - If a pattern starts with .* or .{0,} and the PCRE_DOTALL - option (equivalent to Perl's /s) is set, thus allowing the . - to match newlines, the pattern is implicitly anchored, - because whatever follows will be tried against every charac- - ter position in the subject string, so there is no point in - retrying the overall match at any position after the first. - PCRE treats such a pattern as though it were preceded by \A. - In cases where it is known that the subject string contains - no newlines, it is worth setting PCRE_DOTALL when the pat- - tern begins with .* in order to obtain this optimization, or - alternatively using ^ to indicate anchoring explicitly. - - When a capturing subpattern is repeated, the value captured - is the substring that matched the final iteration. For exam- - ple, after - - (tweedle[dume]{3}\s*)+ - - has matched "tweedledum tweedledee" the value of the cap- - tured substring is "tweedledee". However, if there are - nested capturing subpatterns, the corresponding captured - values may have been set in previous iterations. For exam- - ple, after - - /(a|(b))+/ - - matches "aba" the value of the second captured substring is - "b". - - - -BACK REFERENCES - Outside a character class, a backslash followed by a digit - greater than 0 (and possibly further digits) is a back - reference to a capturing subpattern earlier (i.e. to its - left) in the pattern, provided there have been that many - previous capturing left parentheses. - - However, if the decimal number following the backslash is - less than 10, it is always taken as a back reference, and - causes an error only if there are not that many capturing - left parentheses in the entire pattern. In other words, the - parentheses that are referenced need not be to the left of - the reference for numbers less than 10. See the section - entitled "Backslash" above for further details of the han- - dling of digits following a backslash. - - A back reference matches whatever actually matched the cap- - turing subpattern in the current subject string, rather than - anything matching the subpattern itself. So the pattern - - (sens|respons)e and \1ibility - - matches "sense and sensibility" and "response and responsi- - bility", but not "sense and responsibility". If caseful - matching is in force at the time of the back reference, the - case of letters is relevant. For example, - - ((?i)rah)\s+\1 - - matches "rah rah" and "RAH RAH", but not "RAH rah", even - though the original capturing subpattern is matched case- - lessly. - - There may be more than one back reference to the same sub- - pattern. If a subpattern has not actually been used in a - particular match, any back references to it always fail. For - example, the pattern - - (a|(bc))\2 - - always fails if it starts to match "a" rather than "bc". - Because there may be up to 99 back references, all digits - following the backslash are taken as part of a potential - back reference number. If the pattern continues with a digit - character, some delimiter must be used to terminate the back - reference. If the PCRE_EXTENDED option is set, this can be - whitespace. Otherwise an empty comment can be used. - - A back reference that occurs inside the parentheses to which - it refers fails when the subpattern is first used, so, for - example, (a\1) never matches. However, such references can - be useful inside repeated subpatterns. For example, the pat- - tern - - (a|b\1)+ - - matches any number of "a"s and also "aba", "ababbaa" etc. At - each iteration of the subpattern, the back reference matches - the character string corresponding to the previous - iteration. In order for this to work, the pattern must be - such that the first iteration does not need to match the - back reference. This can be done using alternation, as in - the example above, or by a quantifier with a minimum of - zero. - - - -ASSERTIONS - An assertion is a test on the characters following or - preceding the current matching point that does not actually - consume any characters. The simple assertions coded as \b, - \B, \A, \Z, \z, ^ and $ are described above. More compli- - cated assertions are coded as subpatterns. There are two - kinds: those that look ahead of the current position in the - subject string, and those that look behind it. - - An assertion subpattern is matched in the normal way, except - that it does not cause the current matching position to be - changed. Lookahead assertions start with (?= for positive - assertions and (?! for negative assertions. For example, - - \w+(?=;) - - matches a word followed by a semicolon, but does not include - the semicolon in the match, and - - foo(?!bar) - - matches any occurrence of "foo" that is not followed by - "bar". Note that the apparently similar pattern - - (?!foo)bar - - does not find an occurrence of "bar" that is preceded by - something other than "foo"; it finds any occurrence of "bar" - whatsoever, because the assertion (?!foo) is always true - when the next three characters are "bar". A lookbehind - assertion is needed to achieve this effect. - - Lookbehind assertions start with (?<= for positive asser- - tions and (? as in this example: - - (?>\d+)bar - - This kind of parenthesis "locks up" the part of the pattern - it contains once it has matched, and a failure further into - the pattern is prevented from backtracking into it. - Backtracking past it to previous items, however, works as - normal. - - An alternative description is that a subpattern of this type - matches the string of characters that an identical stan- - dalone pattern would match, if anchored at the current point - in the subject string. - - Once-only subpatterns are not capturing subpatterns. Simple - cases such as the above example can be thought of as a max- - imizing repeat that must swallow everything it can. So, - while both \d+ and \d+? are prepared to adjust the number of - digits they match in order to make the rest of the pattern - match, (?>\d+) can only match an entire sequence of digits. - - This construction can of course contain arbitrarily compli- - cated subpatterns, and it can be nested. - - Once-only subpatterns can be used in conjunction with look- - behind assertions to specify efficient matching at the end - of the subject string. Consider a simple pattern such as - - abcd$ - - when applied to a long string which does not match. Because - matching proceeds from left to right, PCRE will look for - each "a" in the subject and then see if what follows matches - the rest of the pattern. If the pattern is specified as - - ^.*abcd$ - - the initial .* matches the entire string at first, but when - this fails (because there is no following "a"), it back- - tracks to match all but the last character, then all but the - last two characters, and so on. Once again the search for - "a" covers the entire string, from right to left, so we are - no better off. However, if the pattern is written as - - ^(?>.*)(?<=abcd) - - there can be no backtracking for the .* item; it can match - only the entire string. The subsequent lookbehind assertion - does a single test on the last four characters. If it fails, - the match fails immediately. For long strings, this approach - makes a significant difference to the processing time. - - When a pattern contains an unlimited repeat inside a subpat- - tern that can itself be repeated an unlimited number of - times, the use of a once-only subpattern is the only way to - avoid some failing matches taking a very long time indeed. - The pattern - - (\D+|<\d+>)*[!?] - - matches an unlimited number of substrings that either con- - sist of non-digits, or digits enclosed in <>, followed by - either ! or ?. When it matches, it runs quickly. However, if - it is applied to - - aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa - - it takes a long time before reporting failure. This is - because the string can be divided between the two repeats in - a large number of ways, and all have to be tried. (The exam- - ple used [!?] rather than a single character at the end, - because both PCRE and Perl have an optimization that allows - for fast failure when a single character is used. They - remember the last single character that is required for a - match, and fail early if it is not present in the string.) - If the pattern is changed to - - ((?>\D+)|<\d+>)*[!?] - - sequences of non-digits cannot be broken, and failure hap- - pens quickly. - - - -CONDITIONAL SUBPATTERNS - It is possible to cause the matching process to obey a sub- - pattern conditionally or to choose between two alternative - subpatterns, depending on the result of an assertion, or - whether a previous capturing subpattern matched or not. The - two possible forms of conditional subpattern are - - (?(condition)yes-pattern) - (?(condition)yes-pattern|no-pattern) - - If the condition is satisfied, the yes-pattern is used; oth- - erwise the no-pattern (if present) is used. If there are - more than two alternatives in the subpattern, a compile-time - error occurs. - - There are two kinds of condition. If the text between the - parentheses consists of a sequence of digits, the condition - is satisfied if the capturing subpattern of that number has - previously matched. The number must be greater than zero. - Consider the following pattern, which contains non- - significant white space to make it more readable (assume the - PCRE_EXTENDED option) and to divide it into three parts for - ease of discussion: - - ( \( )? [^()]+ (?(1) \) ) - - The first part matches an optional opening parenthesis, and - if that character is present, sets it as the first captured - substring. The second part matches one or more characters - that are not parentheses. The third part is a conditional - subpattern that tests whether the first set of parentheses - matched or not. If they did, that is, if subject started - with an opening parenthesis, the condition is true, and so - the yes-pattern is executed and a closing parenthesis is - required. Otherwise, since no-pattern is not present, the - subpattern matches nothing. In other words, this pattern - matches a sequence of non-parentheses, optionally enclosed - in parentheses. - - If the condition is not a sequence of digits, it must be an - assertion. This may be a positive or negative lookahead or - lookbehind assertion. Consider this pattern, again contain- - ing non-significant white space, and with the two alterna- - tives on the second line: - - (?(?=[^a-z]*[a-z]) - \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) - - The condition is a positive lookahead assertion that matches - an optional sequence of non-letters followed by a letter. In - other words, it tests for the presence of at least one - letter in the subject. If a letter is found, the subject is - matched against the first alternative; otherwise it is - matched against the second. This pattern matches strings in - one of the two forms dd-aaa-dd or dd-dd-dd, where aaa are - letters and dd are digits. - - - -COMMENTS - The sequence (?# marks the start of a comment which contin- - ues up to the next closing parenthesis. Nested parentheses - are not permitted. The characters that make up a comment - play no part in the pattern matching at all. - - If the PCRE_EXTENDED option is set, an unescaped # character - outside a character class introduces a comment that contin- - ues up to the next newline character in the pattern. - - - -RECURSIVE PATTERNS - Consider the problem of matching a string in parentheses, - allowing for unlimited nested parentheses. Without the use - of recursion, the best that can be done is to use a pattern - that matches up to some fixed depth of nesting. It is not - possible to handle an arbitrary nesting depth. Perl 5.6 has - provided an experimental facility that allows regular - expressions to recurse (amongst other things). It does this - by interpolating Perl code in the expression at run time, - and the code can refer to the expression itself. A Perl pat- - tern to solve the parentheses problem can be created like - this: - - $re = qr{\( (?: (?>[^()]+) | (?p{$re}) )* \)}x; - - The (?p{...}) item interpolates Perl code at run time, and - in this case refers recursively to the pattern in which it - appears. Obviously, PCRE cannot support the interpolation of - Perl code. Instead, the special item (?R) is provided for - the specific case of recursion. This PCRE pattern solves the - parentheses problem (assume the PCRE_EXTENDED option is set - so that white space is ignored): - - \( ( (?>[^()]+) | (?R) )* \) - - First it matches an opening parenthesis. Then it matches any - number of substrings which can either be a sequence of non- - parentheses, or a recursive match of the pattern itself - (i.e. a correctly parenthesized substring). Finally there is - a closing parenthesis. - - This particular example pattern contains nested unlimited - repeats, and so the use of a once-only subpattern for match- - ing strings of non-parentheses is important when applying - the pattern to strings that do not match. For example, when - it is applied to - - (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa() - - it yields "no match" quickly. However, if a once-only sub- - pattern is not used, the match runs for a very long time - indeed because there are so many different ways the + and * - repeats can carve up the subject, and all have to be tested - before failure can be reported. - - The values set for any capturing subpatterns are those from - the outermost level of the recursion at which the subpattern - value is set. If the pattern above is matched against - - (ab(cd)ef) - - the value for the capturing parentheses is "ef", which is - the last value taken on at the top level. If additional - parentheses are added, giving - - \( ( ( (?>[^()]+) | (?R) )* ) \) - ^ ^ - ^ ^ the string they capture is - "ab(cd)ef", the contents of the top level parentheses. If - there are more than 15 capturing parentheses in a pattern, - PCRE has to obtain extra memory to store data during a - recursion, which it does by using pcre_malloc, freeing it - via pcre_free afterwards. If no memory can be obtained, it - saves data for the first 15 capturing parentheses only, as - there is no way to give an out-of-memory error from within a - recursion. - - - -PERFORMANCE - Certain items that may appear in patterns are more efficient - than others. It is more efficient to use a character class - like [aeiou] than a set of alternatives such as (a|e|i|o|u). - In general, the simplest construction that provides the - required behaviour is usually the most efficient. Jeffrey - Friedl's book contains a lot of discussion about optimizing - regular expressions for efficient performance. - - When a pattern begins with .* and the PCRE_DOTALL option is - set, the pattern is implicitly anchored by PCRE, since it - can match only at the start of a subject string. However, if - PCRE_DOTALL is not set, PCRE cannot make this optimization, - because the . metacharacter does not then match a newline, - and if the subject string contains newlines, the pattern may - match from the character immediately following one of them - instead of from the very start. For example, the pattern - - (.*) second - - matches the subject "first\nand second" (where \n stands for - a newline character) with the first captured substring being - "and". In order to do this, PCRE has to retry the match - starting after every newline in the subject. - - If you are using such a pattern with subject strings that do - not contain newlines, the best performance is obtained by - setting PCRE_DOTALL, or starting the pattern with ^.* to - indicate explicit anchoring. That saves PCRE from having to - scan along the subject looking for a newline to restart at. - - Beware of patterns that contain nested indefinite repeats. - These can take a long time to run when applied to a string - that does not match. Consider the pattern fragment - - (a+)* - - This can match "aaaa" in 33 different ways, and this number - increases very rapidly as the string gets longer. (The * - repeat can match 0, 1, 2, 3, or 4 times, and for each of - those cases other than 0, the + repeats can match different - numbers of times.) When the remainder of the pattern is such - that the entire match is going to fail, PCRE has in princi- - ple to try every possible variation, and this can take an - extremely long time. - - An optimization catches some of the more simple cases such - as - - (a+)*b - - where a literal character follows. Before embarking on the - standard matching procedure, PCRE checks that there is a "b" - later in the subject string, and if there is not, it fails - the match immediately. However, when there is no following - literal this optimization cannot be used. You can see the - difference by comparing the behaviour of - - (a+)*\d - - with the pattern above. The former gives a failure almost - instantly when applied to a whole line of "a" characters, - whereas the latter takes an appreciable time with strings - longer than about 20 characters. - - - -UTF-8 SUPPORT - Starting at release 3.3, PCRE has some support for character - strings encoded in the UTF-8 format. This is incomplete, and - is regarded as experimental. In order to use it, you must - configure PCRE to include UTF-8 support in the code, and, in - addition, you must call pcre_compile() with the PCRE_UTF8 - option flag. When you do this, both the pattern and any sub- - ject strings that are matched against it are treated as - UTF-8 strings instead of just strings of bytes, but only in - the cases that are mentioned below. - - If you compile PCRE with UTF-8 support, but do not use it at - run time, the library will be a bit bigger, but the addi- - tional run time overhead is limited to testing the PCRE_UTF8 - flag in several places, so should not be very large. - - PCRE assumes that the strings it is given contain valid - UTF-8 codes. It does not diagnose invalid UTF-8 strings. If - you pass invalid UTF-8 strings to PCRE, the results are - undefined. - - Running with PCRE_UTF8 set causes these changes in the way - PCRE works: - - 1. In a pattern, the escape sequence \x{...}, where the con- - tents of the braces is a string of hexadecimal digits, is - interpreted as a UTF-8 character whose code number is the - given hexadecimal number, for example: \x{1234}. This - inserts from one to six literal bytes into the pattern, - using the UTF-8 encoding. If a non-hexadecimal digit appears - between the braces, the item is not recognized. - - 2. The original hexadecimal escape sequence, \xhh, generates - a two-byte UTF-8 character if its value is greater than 127. - - 3. Repeat quantifiers are NOT correctly handled if they fol- - low a multibyte character. For example, \x{100}* and \xc3+ - do not work. If you want to repeat such characters, you must - enclose them in non-capturing parentheses, for example - (?:\x{100}), at present. - - 4. The dot metacharacter matches one UTF-8 character instead - of a single byte. - - 5. Unlike literal UTF-8 characters, the dot metacharacter - followed by a repeat quantifier does operate correctly on - UTF-8 characters instead of single bytes. - - 4. Although the \x{...} escape is permitted in a character - class, characters whose values are greater than 255 cannot - be included in a class. - - 5. A class is matched against a UTF-8 character instead of - just a single byte, but it can match only characters whose - values are less than 256. Characters with greater values - always fail to match a class. - - 6. Repeated classes work correctly on multiple characters. - - 7. Classes containing just a single character whose value is - greater than 127 (but less than 256), for example, [\x80] or - [^\x{93}], do not work because these are optimized into sin- - gle byte matches. In the first case, of course, the class - brackets are just redundant. - - 8. Lookbehind assertions move backwards in the subject by a - fixed number of characters instead of a fixed number of - bytes. Simple cases have been tested to work correctly, but - there may be hidden gotchas herein. - - 9. The character types such as \d and \w do not work - correctly with UTF-8 characters. They continue to test a - single byte. - - 10. Anything not explicitly mentioned here continues to work - in bytes rather than in characters. - - The following UTF-8 features of Perl 5.6 are not imple- - mented: - 1. The escape sequence \C to match a single byte. - - 2. The use of Unicode tables and properties and escapes \p, - \P, and \X. - - - -AUTHOR - Philip Hazel - University Computing Service, - New Museums Site, - Cambridge CB2 3QG, England. - Phone: +44 1223 334714 - - Last updated: 28 August 2000, - the 250th anniversary of the death of J.S. Bach. - Copyright (c) 1997-2000 University of Cambridge. diff --git a/pcre/doc/pcregrep.1 b/pcre/doc/pcregrep.1 deleted file mode 100644 index ec733fa1..00000000 --- a/pcre/doc/pcregrep.1 +++ /dev/null @@ -1,76 +0,0 @@ -.TH PCREGREP 1 -.SH NAME -pcregrep - a grep with Perl-compatible regular expressions. -.SH SYNOPSIS -.B pcregrep [-Vchilnsvx] pattern [file] ... - - -.SH DESCRIPTION -\fBpcregrep\fR searches files for character patterns, in the same way as other -grep commands do, but it uses the PCRE regular expression library to support -patterns that are compatible with the regular expressions of Perl 5. See -\fBpcre(3)\fR for a full description of syntax and semantics. - -If no files are specified, \fBpcregrep\fR reads the standard input. By default, -each line that matches the pattern is copied to the standard output, and if -there is more than one file, the file name is printed before each line of -output. However, there are options that can change how \fBpcregrep\fR behaves. - -Lines are limited to BUFSIZ characters. BUFSIZ is defined in \fB\fR. -The newline character is removed from the end of each line before it is matched -against the pattern. - - -.SH OPTIONS -.TP 10 -\fB-V\fR -Write the version number of the PCRE library being used to the standard error -stream. -.TP -\fB-c\fR -Do not print individual lines; instead just print a count of the number of -lines that would otherwise have been printed. If several files are given, a -count is printed for each of them. -.TP -\fB-h\fR -Suppress printing of filenames when searching multiple files. -.TP -\fB-i\fR -Ignore upper/lower case distinctions during comparisons. -.TP -\fB-l\fR -Instead of printing lines from the files, just print the names of the files -containing lines that would have been printed. Each file name is printed -once, on a separate line. -.TP -\fB-n\fR -Precede each line by its line number in the file. -.TP -\fB-s\fR -Work silently, that is, display nothing except error messages. -The exit status indicates whether any matches were found. -.TP -\fB-v\fR -Invert the sense of the match, so that lines which do \fInot\fR match the -pattern are now the ones that are found. -.TP -\fB-x\fR -Force the pattern to be anchored (it must start matching at the beginning of -the line) and in addition, require it to match the entire line. This is -equivalent to having ^ and $ characters at the start and end of each -alternative branch in the regular expression. - - -.SH SEE ALSO -\fBpcre(3)\fR, Perl 5 documentation - - -.SH DIAGNOSTICS -Exit status is 0 if any matches were found, 1 if no matches were found, and 2 -for syntax errors or inacessible files (even if matches were found). - - -.SH AUTHOR -Philip Hazel -.br -Copyright (c) 1997-2000 University of Cambridge. diff --git a/pcre/doc/pcregrep.html b/pcre/doc/pcregrep.html deleted file mode 100644 index 19f733c4..00000000 --- a/pcre/doc/pcregrep.html +++ /dev/null @@ -1,105 +0,0 @@ - - -pcregrep specification - - -

    pcregrep specification

    -This HTML document has been generated automatically from the original man page. -If there is any nonsense in it, please consult the man page in case the -conversion went wrong. - -
  • NAME -

    -pcregrep - a grep with Perl-compatible regular expressions. -

    -
  • SYNOPSIS -

    -pcregrep [-Vchilnsvx] pattern [file] ... -

    -
  • DESCRIPTION -

    -pcregrep searches files for character patterns, in the same way as other -grep commands do, but it uses the PCRE regular expression library to support -patterns that are compatible with the regular expressions of Perl 5. See -pcre(3) for a full description of syntax and semantics. -

    -

    -If no files are specified, pcregrep reads the standard input. By default, -each line that matches the pattern is copied to the standard output, and if -there is more than one file, the file name is printed before each line of -output. However, there are options that can change how pcregrep behaves. -

    -

    -Lines are limited to BUFSIZ characters. BUFSIZ is defined in <stdio.h>. -The newline character is removed from the end of each line before it is matched -against the pattern. -

    -
  • OPTIONS -

    --V -Write the version number of the PCRE library being used to the standard error -stream. -

    -

    --c -Do not print individual lines; instead just print a count of the number of -lines that would otherwise have been printed. If several files are given, a -count is printed for each of them. -

    -

    --h -Suppress printing of filenames when searching multiple files. -

    -

    --i -Ignore upper/lower case distinctions during comparisons. -

    -

    --l -Instead of printing lines from the files, just print the names of the files -containing lines that would have been printed. Each file name is printed -once, on a separate line. -

    -

    --n -Precede each line by its line number in the file. -

    -

    --s -Work silently, that is, display nothing except error messages. -The exit status indicates whether any matches were found. -

    -

    --v -Invert the sense of the match, so that lines which do not match the -pattern are now the ones that are found. -

    -

    --x -Force the pattern to be anchored (it must start matching at the beginning of -the line) and in addition, require it to match the entire line. This is -equivalent to having ^ and $ characters at the start and end of each -alternative branch in the regular expression. -

    -
  • SEE ALSO -

    -pcre(3), Perl 5 documentation -

    -
  • DIAGNOSTICS -

    -Exit status is 0 if any matches were found, 1 if no matches were found, and 2 -for syntax errors or inacessible files (even if matches were found). -

    -
  • AUTHOR -

    -Philip Hazel <ph10@cam.ac.uk> -
    -Copyright (c) 1997-2000 University of Cambridge. diff --git a/pcre/doc/pcregrep.txt b/pcre/doc/pcregrep.txt deleted file mode 100644 index 871350ca..00000000 --- a/pcre/doc/pcregrep.txt +++ /dev/null @@ -1,87 +0,0 @@ -NAME - pcregrep - a grep with Perl-compatible regular expressions. - - - -SYNOPSIS - pcregrep [-Vchilnsvx] pattern [file] ... - - - -DESCRIPTION - pcregrep searches files for character patterns, in the same - way as other grep commands do, but it uses the PCRE regular - expression library to support patterns that are compatible - with the regular expressions of Perl 5. See pcre(3) for a - full description of syntax and semantics. - - If no files are specified, pcregrep reads the standard - input. By default, each line that matches the pattern is - copied to the standard output, and if there is more than one - file, the file name is printed before each line of output. - However, there are options that can change how pcregrep - behaves. - - Lines are limited to BUFSIZ characters. BUFSIZ is defined in - . The newline character is removed from the end of - each line before it is matched against the pattern. - - - -OPTIONS - -V Write the version number of the PCRE library being - used to the standard error stream. - - -c Do not print individual lines; instead just print - a count of the number of lines that would other- - wise have been printed. If several files are - given, a count is printed for each of them. - - -h Suppress printing of filenames when searching mul- - tiple files. - - -i Ignore upper/lower case distinctions during com- - parisons. - - -l Instead of printing lines from the files, just - print the names of the files containing lines that - would have been printed. Each file name is printed - once, on a separate line. - - -n Precede each line by its line number in the file. - - -s Work silently, that is, display nothing except - error messages. The exit status indicates whether - any matches were found. - - -v Invert the sense of the match, so that lines which - do not match the pattern are now the ones that are - found. - - -x Force the pattern to be anchored (it must start - matching at the beginning of the line) and in - addition, require it to match the entire line. - This is equivalent to having ^ and $ characters at - the start and end of each alternative branch in - the regular expression. - - - -SEE ALSO - pcre(3), Perl 5 documentation - - - - - -DIAGNOSTICS - Exit status is 0 if any matches were found, 1 if no matches - were found, and 2 for syntax errors or inacessible files - (even if matches were found). - - - -AUTHOR - Philip Hazel - Copyright (c) 1997-2000 University of Cambridge. - diff --git a/pcre/doc/pcreposix.3 b/pcre/doc/pcreposix.3 deleted file mode 100644 index 4853a97f..00000000 --- a/pcre/doc/pcreposix.3 +++ /dev/null @@ -1,149 +0,0 @@ -.TH PCRE 3 -.SH NAME -pcreposix - POSIX API for Perl-compatible regular expressions. -.SH SYNOPSIS -.B #include -.PP -.SM -.br -.B int regcomp(regex_t *\fIpreg\fR, const char *\fIpattern\fR, -.ti +5n -.B int \fIcflags\fR); -.PP -.br -.B int regexec(regex_t *\fIpreg\fR, const char *\fIstring\fR, -.ti +5n -.B size_t \fInmatch\fR, regmatch_t \fIpmatch\fR[], int \fIeflags\fR); -.PP -.br -.B size_t regerror(int \fIerrcode\fR, const regex_t *\fIpreg\fR, -.ti +5n -.B char *\fIerrbuf\fR, size_t \fIerrbuf_size\fR); -.PP -.br -.B void regfree(regex_t *\fIpreg\fR); - - -.SH DESCRIPTION -This set of functions provides a POSIX-style API to the PCRE regular expression -package. See the \fBpcre\fR documentation for a description of the native API, -which contains additional functionality. - -The functions described here are just wrapper functions that ultimately call -the native API. Their prototypes are defined in the \fBpcreposix.h\fR header -file, and on Unix systems the library itself is called \fBpcreposix.a\fR, so -can be accessed by adding \fB-lpcreposix\fR to the command for linking an -application which uses them. Because the POSIX functions call the native ones, -it is also necessary to add \fR-lpcre\fR. - -I have implemented only those option bits that can be reasonably mapped to PCRE -native options. In addition, the options REG_EXTENDED and REG_NOSUB are defined -with the value zero. They have no effect, but since programs that are written -to the POSIX interface often use them, this makes it easier to slot in PCRE as -a replacement library. Other POSIX options are not even defined. - -When PCRE is called via these functions, it is only the API that is POSIX-like -in style. The syntax and semantics of the regular expressions themselves are -still those of Perl, subject to the setting of various PCRE options, as -described below. - -The header for these functions is supplied as \fBpcreposix.h\fR to avoid any -potential clash with other POSIX libraries. It can, of course, be renamed or -aliased as \fBregex.h\fR, which is the "correct" name. It provides two -structure types, \fIregex_t\fR for compiled internal forms, and -\fIregmatch_t\fR for returning captured substrings. It also defines some -constants whose names start with "REG_"; these are used for setting options and -identifying error codes. - - -.SH COMPILING A PATTERN - -The function \fBregcomp()\fR is called to compile a pattern into an -internal form. The pattern is a C string terminated by a binary zero, and -is passed in the argument \fIpattern\fR. The \fIpreg\fR argument is a pointer -to a regex_t structure which is used as a base for storing information about -the compiled expression. - -The argument \fIcflags\fR is either zero, or contains one or more of the bits -defined by the following macros: - - REG_ICASE - -The PCRE_CASELESS option is set when the expression is passed for compilation -to the native function. - - REG_NEWLINE - -The PCRE_MULTILINE option is set when the expression is passed for compilation -to the native function. - -In the absence of these flags, no options are passed to the native function. -This means the the regex is compiled with PCRE default semantics. In -particular, the way it handles newline characters in the subject string is the -Perl way, not the POSIX way. Note that setting PCRE_MULTILINE has only -\fIsome\fR of the effects specified for REG_NEWLINE. It does not affect the way -newlines are matched by . (they aren't) or a negative class such as [^a] (they -are). - -The yield of \fBregcomp()\fR is zero on success, and non-zero otherwise. The -\fIpreg\fR structure is filled in on success, and one member of the structure -is publicized: \fIre_nsub\fR contains the number of capturing subpatterns in -the regular expression. Various error codes are defined in the header file. - - -.SH MATCHING A PATTERN -The function \fBregexec()\fR is called to match a pre-compiled pattern -\fIpreg\fR against a given \fIstring\fR, which is terminated by a zero byte, -subject to the options in \fIeflags\fR. These can be: - - REG_NOTBOL - -The PCRE_NOTBOL option is set when calling the underlying PCRE matching -function. - - REG_NOTEOL - -The PCRE_NOTEOL option is set when calling the underlying PCRE matching -function. - -The portion of the string that was matched, and also any captured substrings, -are returned via the \fIpmatch\fR argument, which points to an array of -\fInmatch\fR structures of type \fIregmatch_t\fR, containing the members -\fIrm_so\fR and \fIrm_eo\fR. These contain the offset to the first character of -each substring and the offset to the first character after the end of each -substring, respectively. The 0th element of the vector relates to the entire -portion of \fIstring\fR that was matched; subsequent elements relate to the -capturing subpatterns of the regular expression. Unused entries in the array -have both structure members set to -1. - -A successful match yields a zero return; various error codes are defined in the -header file, of which REG_NOMATCH is the "expected" failure code. - - -.SH ERROR MESSAGES -The \fBregerror()\fR function maps a non-zero errorcode from either -\fBregcomp\fR or \fBregexec\fR to a printable message. If \fIpreg\fR is not -NULL, the error should have arisen from the use of that structure. A message -terminated by a binary zero is placed in \fIerrbuf\fR. The length of the -message, including the zero, is limited to \fIerrbuf_size\fR. The yield of the -function is the size of buffer needed to hold the whole message. - - -.SH STORAGE -Compiling a regular expression causes memory to be allocated and associated -with the \fIpreg\fR structure. The function \fBregfree()\fR frees all such -memory, after which \fIpreg\fR may no longer be used as a compiled expression. - - -.SH AUTHOR -Philip Hazel -.br -University Computing Service, -.br -New Museums Site, -.br -Cambridge CB2 3QG, England. -.br -Phone: +44 1223 334714 - -Copyright (c) 1997-2000 University of Cambridge. diff --git a/pcre/doc/pcreposix.html b/pcre/doc/pcreposix.html deleted file mode 100644 index 79ff544b..00000000 --- a/pcre/doc/pcreposix.html +++ /dev/null @@ -1,191 +0,0 @@ - - -pcreposix specification - - -

    pcreposix specification

    -This HTML document has been generated automatically from the original man page. -If there is any nonsense in it, please consult the man page in case the -conversion went wrong. - -
  • NAME -

    -pcreposix - POSIX API for Perl-compatible regular expressions. -

    -
  • SYNOPSIS -

    -#include <pcreposix.h> -

    -

    -int regcomp(regex_t *preg, const char *pattern, -int cflags); -

    -

    -int regexec(regex_t *preg, const char *string, -size_t nmatch, regmatch_t pmatch[], int eflags); -

    -

    -size_t regerror(int errcode, const regex_t *preg, -char *errbuf, size_t errbuf_size); -

    -

    -void regfree(regex_t *preg); -

    -
  • DESCRIPTION -

    -This set of functions provides a POSIX-style API to the PCRE regular expression -package. See the pcre documentation for a description of the native API, -which contains additional functionality. -

    -

    -The functions described here are just wrapper functions that ultimately call -the native API. Their prototypes are defined in the pcreposix.h header -file, and on Unix systems the library itself is called pcreposix.a, so -can be accessed by adding -lpcreposix to the command for linking an -application which uses them. Because the POSIX functions call the native ones, -it is also necessary to add \fR-lpcre\fR. -

    -

    -I have implemented only those option bits that can be reasonably mapped to PCRE -native options. In addition, the options REG_EXTENDED and REG_NOSUB are defined -with the value zero. They have no effect, but since programs that are written -to the POSIX interface often use them, this makes it easier to slot in PCRE as -a replacement library. Other POSIX options are not even defined. -

    -

    -When PCRE is called via these functions, it is only the API that is POSIX-like -in style. The syntax and semantics of the regular expressions themselves are -still those of Perl, subject to the setting of various PCRE options, as -described below. -

    -

    -The header for these functions is supplied as pcreposix.h to avoid any -potential clash with other POSIX libraries. It can, of course, be renamed or -aliased as regex.h, which is the "correct" name. It provides two -structure types, regex_t for compiled internal forms, and -regmatch_t for returning captured substrings. It also defines some -constants whose names start with "REG_"; these are used for setting options and -identifying error codes. -

    -
  • COMPILING A PATTERN -

    -The function regcomp() is called to compile a pattern into an -internal form. The pattern is a C string terminated by a binary zero, and -is passed in the argument pattern. The preg argument is a pointer -to a regex_t structure which is used as a base for storing information about -the compiled expression. -

    -

    -The argument cflags is either zero, or contains one or more of the bits -defined by the following macros: -

    -

    -

    -  REG_ICASE
    -
    -

    -

    -The PCRE_CASELESS option is set when the expression is passed for compilation -to the native function. -

    -

    -

    -  REG_NEWLINE
    -
    -

    -

    -The PCRE_MULTILINE option is set when the expression is passed for compilation -to the native function. -

    -

    -In the absence of these flags, no options are passed to the native function. -This means the the regex is compiled with PCRE default semantics. In -particular, the way it handles newline characters in the subject string is the -Perl way, not the POSIX way. Note that setting PCRE_MULTILINE has only -some of the effects specified for REG_NEWLINE. It does not affect the way -newlines are matched by . (they aren't) or a negative class such as [^a] (they -are). -

    -

    -The yield of regcomp() is zero on success, and non-zero otherwise. The -preg structure is filled in on success, and one member of the structure -is publicized: re_nsub contains the number of capturing subpatterns in -the regular expression. Various error codes are defined in the header file. -

    -
  • MATCHING A PATTERN -

    -The function regexec() is called to match a pre-compiled pattern -preg against a given string, which is terminated by a zero byte, -subject to the options in eflags. These can be: -

    -

    -

    -  REG_NOTBOL
    -
    -

    -

    -The PCRE_NOTBOL option is set when calling the underlying PCRE matching -function. -

    -

    -

    -  REG_NOTEOL
    -
    -

    -

    -The PCRE_NOTEOL option is set when calling the underlying PCRE matching -function. -

    -

    -The portion of the string that was matched, and also any captured substrings, -are returned via the pmatch argument, which points to an array of -nmatch structures of type regmatch_t, containing the members -rm_so and rm_eo. These contain the offset to the first character of -each substring and the offset to the first character after the end of each -substring, respectively. The 0th element of the vector relates to the entire -portion of string that was matched; subsequent elements relate to the -capturing subpatterns of the regular expression. Unused entries in the array -have both structure members set to -1. -

    -

    -A successful match yields a zero return; various error codes are defined in the -header file, of which REG_NOMATCH is the "expected" failure code. -

    -
  • ERROR MESSAGES -

    -The regerror() function maps a non-zero errorcode from either -regcomp or regexec to a printable message. If preg is not -NULL, the error should have arisen from the use of that structure. A message -terminated by a binary zero is placed in errbuf. The length of the -message, including the zero, is limited to errbuf_size. The yield of the -function is the size of buffer needed to hold the whole message. -

    -
  • STORAGE -

    -Compiling a regular expression causes memory to be allocated and associated -with the preg structure. The function regfree() frees all such -memory, after which preg may no longer be used as a compiled expression. -

    -
  • AUTHOR -

    -Philip Hazel <ph10@cam.ac.uk> -
    -University Computing Service, -
    -New Museums Site, -
    -Cambridge CB2 3QG, England. -
    -Phone: +44 1223 334714 -

    -

    -Copyright (c) 1997-2000 University of Cambridge. diff --git a/pcre/doc/pcreposix.txt b/pcre/doc/pcreposix.txt deleted file mode 100644 index 2d76f7cd..00000000 --- a/pcre/doc/pcreposix.txt +++ /dev/null @@ -1,159 +0,0 @@ -NAME - pcreposix - POSIX API for Perl-compatible regular expres- - sions. - - - -SYNOPSIS - #include - - int regcomp(regex_t *preg, const char *pattern, - int cflags); - - int regexec(regex_t *preg, const char *string, - size_t nmatch, regmatch_t pmatch[], int eflags); - - size_t regerror(int errcode, const regex_t *preg, - char *errbuf, size_t errbuf_size); - - void regfree(regex_t *preg); - - - -DESCRIPTION - This set of functions provides a POSIX-style API to the PCRE - regular expression package. See the pcre documentation for a - description of the native API, which contains additional - functionality. - - The functions described here are just wrapper functions that - ultimately call the native API. Their prototypes are defined - in the pcreposix.h header file, and on Unix systems the - library itself is called pcreposix.a, so can be accessed by - adding -lpcreposix to the command for linking an application - which uses them. Because the POSIX functions call the native - ones, it is also necessary to add -lpcre. - - I have implemented only those option bits that can be rea- - sonably mapped to PCRE native options. In addition, the - options REG_EXTENDED and REG_NOSUB are defined with the - value zero. They have no effect, but since programs that are - written to the POSIX interface often use them, this makes it - easier to slot in PCRE as a replacement library. Other POSIX - options are not even defined. - - When PCRE is called via these functions, it is only the API - that is POSIX-like in style. The syntax and semantics of the - regular expressions themselves are still those of Perl, sub- - ject to the setting of various PCRE options, as described - below. - - The header for these functions is supplied as pcreposix.h to - avoid any potential clash with other POSIX libraries. It - can, of course, be renamed or aliased as regex.h, which is - the "correct" name. It provides two structure types, regex_t - for compiled internal forms, and regmatch_t for returning - captured substrings. It also defines some constants whose - names start with "REG_"; these are used for setting options - and identifying error codes. - - - -COMPILING A PATTERN - The function regcomp() is called to compile a pattern into - an internal form. The pattern is a C string terminated by a - binary zero, and is passed in the argument pattern. The preg - argument is a pointer to a regex_t structure which is used - as a base for storing information about the compiled expres- - sion. - - The argument cflags is either zero, or contains one or more - of the bits defined by the following macros: - - REG_ICASE - - The PCRE_CASELESS option is set when the expression is - passed for compilation to the native function. - - REG_NEWLINE - - The PCRE_MULTILINE option is set when the expression is - passed for compilation to the native function. - - In the absence of these flags, no options are passed to the - native function. This means the the regex is compiled with - PCRE default semantics. In particular, the way it handles - newline characters in the subject string is the Perl way, - not the POSIX way. Note that setting PCRE_MULTILINE has only - some of the effects specified for REG_NEWLINE. It does not - affect the way newlines are matched by . (they aren't) or a - negative class such as [^a] (they are). - - The yield of regcomp() is zero on success, and non-zero oth- - erwise. The preg structure is filled in on success, and one - member of the structure is publicized: re_nsub contains the - number of capturing subpatterns in the regular expression. - Various error codes are defined in the header file. - - - -MATCHING A PATTERN - The function regexec() is called to match a pre-compiled - pattern preg against a given string, which is terminated by - a zero byte, subject to the options in eflags. These can be: - - REG_NOTBOL - - The PCRE_NOTBOL option is set when calling the underlying - PCRE matching function. - - REG_NOTEOL - - The PCRE_NOTEOL option is set when calling the underlying - PCRE matching function. - - The portion of the string that was matched, and also any - captured substrings, are returned via the pmatch argument, - which points to an array of nmatch structures of type - regmatch_t, containing the members rm_so and rm_eo. These - contain the offset to the first character of each substring - and the offset to the first character after the end of each - substring, respectively. The 0th element of the vector - relates to the entire portion of string that was matched; - subsequent elements relate to the capturing subpatterns of - the regular expression. Unused entries in the array have - both structure members set to -1. - - A successful match yields a zero return; various error codes - are defined in the header file, of which REG_NOMATCH is the - "expected" failure code. - - - -ERROR MESSAGES - The regerror() function maps a non-zero errorcode from - either regcomp or regexec to a printable message. If preg is - not NULL, the error should have arisen from the use of that - structure. A message terminated by a binary zero is placed - in errbuf. The length of the message, including the zero, is - limited to errbuf_size. The yield of the function is the - size of buffer needed to hold the whole message. - - - -STORAGE - Compiling a regular expression causes memory to be allocated - and associated with the preg structure. The function reg- - free() frees all such memory, after which preg may no longer - be used as a compiled expression. - - - -AUTHOR - Philip Hazel - University Computing Service, - New Museums Site, - Cambridge CB2 3QG, England. - Phone: +44 1223 334714 - - Copyright (c) 1997-2000 University of Cambridge. diff --git a/pcre/doc/pcretest.txt b/pcre/doc/pcretest.txt deleted file mode 100644 index 722e6b86..00000000 --- a/pcre/doc/pcretest.txt +++ /dev/null @@ -1,246 +0,0 @@ -The pcretest program --------------------- - -This program is intended for testing PCRE, but it can also be used for -experimenting with regular expressions. - -If it is given two filename arguments, it reads from the first and writes to -the second. If it is given only one filename argument, it reads from that file -and writes to stdout. Otherwise, it reads from stdin and writes to stdout, and -prompts for each line of input, using "re>" to prompt for regular expressions, -and "data>" to prompt for data lines. - -The program handles any number of sets of input on a single input file. Each -set starts with a regular expression, and continues with any number of data -lines to be matched against the pattern. An empty line signals the end of the -data lines, at which point a new regular expression is read. The regular -expressions are given enclosed in any non-alphameric delimiters other than -backslash, for example - - /(a|bc)x+yz/ - -White space before the initial delimiter is ignored. A regular expression may -be continued over several input lines, in which case the newline characters are -included within it. See the test input files in the testdata directory for many -examples. It is possible to include the delimiter within the pattern by -escaping it, for example - - /abc\/def/ - -If you do so, the escape and the delimiter form part of the pattern, but since -delimiters are always non-alphameric, this does not affect its interpretation. -If the terminating delimiter is immediately followed by a backslash, for -example, - - /abc/\ - -then a backslash is added to the end of the pattern. This is done to provide a -way of testing the error condition that arises if a pattern finishes with a -backslash, because - - /abc\/ - -is interpreted as the first line of a pattern that starts with "abc/", causing -pcretest to read the next line as a continuation of the regular expression. - - -PATTERN MODIFIERS ------------------ - -The pattern may be followed by i, m, s, or x to set the PCRE_CASELESS, -PCRE_MULTILINE, PCRE_DOTALL, or PCRE_EXTENDED options, respectively. For -example: - - /caseless/i - -These modifier letters have the same effect as they do in Perl. There are -others which set PCRE options that do not correspond to anything in Perl: /A, -/E, and /X set PCRE_ANCHORED, PCRE_DOLLAR_ENDONLY, and PCRE_EXTRA respectively. - -Searching for all possible matches within each subject string can be requested -by the /g or /G modifier. After finding a match, PCRE is called again to search -the remainder of the subject string. The difference between /g and /G is that -the former uses the startoffset argument to pcre_exec() to start searching at -a new point within the entire string (which is in effect what Perl does), -whereas the latter passes over a shortened substring. This makes a difference -to the matching process if the pattern begins with a lookbehind assertion -(including \b or \B). - -If any call to pcre_exec() in a /g or /G sequence matches an empty string, the -next call is done with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set in order -to search for another, non-empty, match at the same point. If this second match -fails, the start offset is advanced by one, and the normal match is retried. -This imitates the way Perl handles such cases when using the /g modifier or the -split() function. - -There are a number of other modifiers for controlling the way pcretest -operates. - -The /+ modifier requests that as well as outputting the substring that matched -the entire pattern, pcretest should in addition output the remainder of the -subject string. This is useful for tests where the subject contains multiple -copies of the same substring. - -The /L modifier must be followed directly by the name of a locale, for example, - - /pattern/Lfr - -For this reason, it must be the last modifier letter. The given locale is set, -pcre_maketables() is called to build a set of character tables for the locale, -and this is then passed to pcre_compile() when compiling the regular -expression. Without an /L modifier, NULL is passed as the tables pointer; that -is, /L applies only to the expression on which it appears. - -The /I modifier requests that pcretest output information about the compiled -expression (whether it is anchored, has a fixed first character, and so on). It -does this by calling pcre_fullinfo() after compiling an expression, and -outputting the information it gets back. If the pattern is studied, the results -of that are also output. - -The /D modifier is a PCRE debugging feature, which also assumes /I. It causes -the internal form of compiled regular expressions to be output after -compilation. - -The /S modifier causes pcre_study() to be called after the expression has been -compiled, and the results used when the expression is matched. - -The /M modifier causes the size of memory block used to hold the compiled -pattern to be output. - -The /P modifier causes pcretest to call PCRE via the POSIX wrapper API rather -than its native API. When this is done, all other modifiers except /i, /m, and -/+ are ignored. REG_ICASE is set if /i is present, and REG_NEWLINE is set if /m -is present. The wrapper functions force PCRE_DOLLAR_ENDONLY always, and -PCRE_DOTALL unless REG_NEWLINE is set. - -The /8 modifier causes pcretest to call PCRE with the PCRE_UTF8 option set. -This turns on the (currently incomplete) support for UTF-8 character handling -in PCRE, provided that it was compiled with this support enabled. This modifier -also causes any non-printing characters in output strings to be printed using -the \x{hh...} notation if they are valid UTF-8 sequences. - - -DATA LINES ----------- - -Before each data line is passed to pcre_exec(), leading and trailing whitespace -is removed, and it is then scanned for \ escapes. The following are recognized: - - \a alarm (= BEL) - \b backspace - \e escape - \f formfeed - \n newline - \r carriage return - \t tab - \v vertical tab - \nnn octal character (up to 3 octal digits) - \xhh hexadecimal character (up to 2 hex digits) - \x{hh...} hexadecimal UTF-8 character - - \A pass the PCRE_ANCHORED option to pcre_exec() - \B pass the PCRE_NOTBOL option to pcre_exec() - \Cdd call pcre_copy_substring() for substring dd after a successful - match (any decimal number less than 32) - \Gdd call pcre_get_substring() for substring dd after a successful - match (any decimal number less than 32) - \L call pcre_get_substringlist() after a successful match - \N pass the PCRE_NOTEMPTY option to pcre_exec() - \Odd set the size of the output vector passed to pcre_exec() to dd - (any number of decimal digits) - \Z pass the PCRE_NOTEOL option to pcre_exec() - -A backslash followed by anything else just escapes the anything else. If the -very last character is a backslash, it is ignored. This gives a way of passing -an empty line as data, since a real empty line terminates the data input. - -If /P was present on the regex, causing the POSIX wrapper API to be used, only -\B, and \Z have any effect, causing REG_NOTBOL and REG_NOTEOL to be passed to -regexec() respectively. - -The use of \x{hh...} to represent UTF-8 characters is not dependent on the use -of the /8 modifier on the pattern. It is recognized always. There may be any -number of hexadecimal digits inside the braces. The result is from one to six -bytes, encoded according to the UTF-8 rules. - - -OUTPUT FROM PCRETEST --------------------- - -When a match succeeds, pcretest outputs the list of captured substrings that -pcre_exec() returns, starting with number 0 for the string that matched the -whole pattern. Here is an example of an interactive pcretest run. - - $ pcretest - PCRE version 2.06 08-Jun-1999 - - re> /^abc(\d+)/ - data> abc123 - 0: abc123 - 1: 123 - data> xyz - No match - -If the strings contain any non-printing characters, they are output as \0x -escapes, or as \x{...} escapes if the /8 modifier was present on the pattern. -If the pattern has the /+ modifier, then the output for substring 0 is followed -by the the rest of the subject string, identified by "0+" like this: - - re> /cat/+ - data> cataract - 0: cat - 0+ aract - -If the pattern has the /g or /G modifier, the results of successive matching -attempts are output in sequence, like this: - - re> /\Bi(\w\w)/g - data> Mississippi - 0: iss - 1: ss - 0: iss - 1: ss - 0: ipp - 1: pp - -"No match" is output only if the first match attempt fails. - -If any of \C, \G, or \L are present in a data line that is successfully -matched, the substrings extracted by the convenience functions are output with -C, G, or L after the string number instead of a colon. This is in addition to -the normal full list. The string length (that is, the return from the -extraction function) is given in parentheses after each string for \C and \G. - -Note that while patterns can be continued over several lines (a plain ">" -prompt is used for continuations), data lines may not. However newlines can be -included in data by means of the \n escape. - - -COMMAND LINE OPTIONS --------------------- - -If the -p option is given to pcretest, it is equivalent to adding /P to each -regular expression: the POSIX wrapper API is used to call PCRE. None of the -following flags has any effect in this case. - -If the option -d is given to pcretest, it is equivalent to adding /D to each -regular expression: the internal form is output after compilation. - -If the option -i is given to pcretest, it is equivalent to adding /I to each -regular expression: information about the compiled pattern is given after -compilation. - -If the option -m is given to pcretest, it outputs the size of each compiled -pattern after it has been compiled. It is equivalent to adding /M to each -regular expression. For compatibility with earlier versions of pcretest, -s is -a synonym for -m. - -If the -t option is given, each compile, study, and match is run 20000 times -while being timed, and the resulting time per compile or match is output in -milliseconds. Do not set -t with -m, because you will then get the size output -20000 times and the timing will be distorted. If you want to change the number -of repetitions used for timing, edit the definition of LOOPREPEAT at the top of -pcretest.c - -Philip Hazel -August 2000 diff --git a/pcre/doc/perltest.txt b/pcre/doc/perltest.txt deleted file mode 100644 index 33155c1a..00000000 --- a/pcre/doc/perltest.txt +++ /dev/null @@ -1,29 +0,0 @@ -The perltest program --------------------- - -The perltest program tests Perl's regular expressions; it has the same -specification as pcretest, and so can be given identical input, except that -input patterns can be followed only by Perl's lower case modifiers and /+ (as -used by pcretest), which is recognized and handled by the program. - -The data lines are processed as Perl double-quoted strings, so if they contain -" \ $ or @ characters, these have to be escaped. For this reason, all such -characters in testinput1 and testinput3 are escaped so that they can be used -for perltest as well as for pcretest, and the special upper case modifiers such -as /A that pcretest recognizes are not used in these files. The output should -be identical, apart from the initial identifying banner. - -For testing UTF-8 features, an alternative form of perltest, called perltest8, -is supplied. This requires Perl 5.6 or higher. It recognizes the special -modifier /8 that pcretest uses to invoke UTF-8 functionality. The testinput5 -file can be fed to perltest8. - -The testinput2 and testinput4 files are not suitable for feeding to perltest, -since they do make use of the special upper case modifiers and escapes that -pcretest uses to test some features of PCRE. The first of these files also -contains malformed regular expressions, in order to check that PCRE diagnoses -them correctly. Similarly, testinput6 tests UTF-8 features that do not relate -to Perl. - -Philip Hazel -August 2000 diff --git a/pcre/doc/readme b/pcre/doc/readme deleted file mode 100644 index d124ee01..00000000 --- a/pcre/doc/readme +++ /dev/null @@ -1,270 +0,0 @@ -README file for PCRE (Perl-compatible regular expression library) ------------------------------------------------------------------ - -The latest release of PCRE is always available from - - ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz - -Please read the NEWS file if you are upgrading from a previous release. - -PCRE has its own native API, but a set of "wrapper" functions that are based on -the POSIX API are also supplied in the library libpcreposix. Note that this -just provides a POSIX calling interface to PCRE: the regular expressions -themselves still follow Perl syntax and semantics. The header file -for the POSIX-style functions is called pcreposix.h. The official POSIX name is -regex.h, but I didn't want to risk possible problems with existing files of -that name by distributing it that way. To use it with an existing program that -uses the POSIX API, it will have to be renamed or pointed at by a link. - - -Building PCRE on a Unix system ------------------------------- - -To build PCRE on a Unix system, run the "configure" command in the PCRE -distribution directory. This is a standard GNU "autoconf" configuration script, -for which generic instructions are supplied in INSTALL. On many systems just -running "./configure" is sufficient, but the usual methods of changing standard -defaults are available. For example, - -CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local - -specifies that the C compiler should be run with the flags '-O2 -Wall' instead -of the default, and that "make install" should install PCRE under /opt/local -instead of the default /usr/local. - -If you want to make use of the experimential, incomplete support for UTF-8 -character strings in PCRE, you must add --enable-utf8 to the "configure" -command. Without it, the code for handling UTF-8 is not included in the -library. (Even when included, it still has to be enabled by an option at run -time.) - -The "configure" script builds four files: - -. Makefile is built by copying Makefile.in and making substitutions. -. config.h is built by copying config.in and making substitutions. -. pcre-config is built by copying pcre-config.in and making substitutions. -. RunTest is a script for running tests - -Once "configure" has run, you can run "make". It builds two libraries called -libpcre and libpcreposix, a test program called pcretest, and the pcregrep -command. You can use "make install" to copy these, and the public header file -pcre.h, to appropriate live directories on your system, in the normal way. - -Running "make install" also installs the command pcre-config, which can be used -to recall information about the PCRE configuration and installation. For -example, - - pcre-config --version - -prints the version number, and - - pcre-config --libs - -outputs information about where the library is installed. This command can be -included in makefiles for programs that use PCRE, saving the programmer from -having to remember too many details. - - -Shared libraries on Unix systems --------------------------------- - -The default distribution builds PCRE as two shared libraries. This support is -new and experimental and may not work on all systems. It relies on the -"libtool" scripts - these are distributed with PCRE. It should build a -"libtool" script and use this to compile and link shared libraries, which are -placed in a subdirectory called .libs. The programs pcretest and pcregrep are -built to use these uninstalled libraries by means of wrapper scripts. When you -use "make install" to install shared libraries, pcregrep and pcretest are -automatically re-built to use the newly installed libraries. However, only -pcregrep is installed, as pcretest is really just a test program. - -To build PCRE using static libraries you must use --disable-shared when -configuring it. For example - -./configure --prefix=/usr/gnu --disable-shared - -Then run "make" in the usual way. - - -Building on non-Unix systems ----------------------------- - -For a non-Unix system, read the comments in the file NON-UNIX-USE. PCRE has -been compiled on Windows systems and on Macintoshes, but I don't know the -details because I don't use those systems. It should be straightforward to -build PCRE on any system that has a Standard C compiler, because it uses only -Standard C functions. - - -Testing PCRE ------------- - -To test PCRE on a Unix system, run the RunTest script in the pcre directory. -(This can also be run by "make runtest", "make check", or "make test".) For -other systems, see the instruction in NON-UNIX-USE. - -The script runs the pcretest test program (which is documented in -doc/pcretest.txt) on each of the testinput files (in the testdata directory) in -turn, and compares the output with the contents of the corresponding testoutput -file. A file called testtry is used to hold the output from pcretest. To run -pcretest on just one of the test files, give its number as an argument to -RunTest, for example: - - RunTest 3 - -The first and third test files can also be fed directly into the perltest -script to check that Perl gives the same results. The third file requires the -additional features of release 5.005, which is why it is kept separate from the -main test input, which needs only Perl 5.004. In the long run, when 5.005 (or -higher) is widespread, these two test files may get amalgamated. - -The second set of tests check pcre_fullinfo(), pcre_info(), pcre_study(), -pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error -detection, and run-time flags that are specific to PCRE, as well as the POSIX -wrapper API. It also uses the debugging flag to check some of the internals of -pcre_compile(). - -If you build PCRE with a locale setting that is not the standard C locale, the -character tables may be different (see next paragraph). In some cases, this may -cause failures in the second set of tests. For example, in a locale where the -isprint() function yields TRUE for characters in the range 128-255, the use of -[:isascii:] inside a character class defines a different set of characters, and -this shows up in this test as a difference in the compiled code, which is being -listed for checking. Where the comparison test output contains [\x00-\x7f] the -test will contain [\x00-\xff], and similarly in some other cases. This is not a -bug in PCRE. - -The fourth set of tests checks pcre_maketables(), the facility for building a -set of character tables for a specific locale and using them instead of the -default tables. The tests make use of the "fr" (French) locale. Before running -the test, the script checks for the presence of this locale by running the -"locale" command. If that command fails, or if it doesn't include "fr" in the -list of available locales, the fourth test cannot be run, and a comment is -output to say why. If running this test produces instances of the error - - ** Failed to set locale "fr" - -in the comparison output, it means that locale is not available on your system, -despite being listed by "locale". This does not mean that PCRE is broken. - -The fifth test checks the experimental, incomplete UTF-8 support. It is not run -automatically unless PCRE is built with UTF-8 support. This file can be fed -directly to the perltest8 script, which requires Perl 5.6 or higher. The sixth -file tests internal UTF-8 features of PCRE that are not relevant to Perl. - - -Character tables ----------------- - -PCRE uses four tables for manipulating and identifying characters. The final -argument of the pcre_compile() function is a pointer to a block of memory -containing the concatenated tables. A call to pcre_maketables() can be used to -generate a set of tables in the current locale. If the final argument for -pcre_compile() is passed as NULL, a set of default tables that is built into -the binary is used. - -The source file called chartables.c contains the default set of tables. This is -not supplied in the distribution, but is built by the program dftables -(compiled from dftables.c), which uses the ANSI C character handling functions -such as isalnum(), isalpha(), isupper(), islower(), etc. to build the table -sources. This means that the default C locale which is set for your system will -control the contents of these default tables. You can change the default tables -by editing chartables.c and then re-building PCRE. If you do this, you should -probably also edit Makefile to ensure that the file doesn't ever get -re-generated. - -The first two 256-byte tables provide lower casing and case flipping functions, -respectively. The next table consists of three 32-byte bit maps which identify -digits, "word" characters, and white space, respectively. These are used when -building 32-byte bit maps that represent character classes. - -The final 256-byte table has bits indicating various character types, as -follows: - - 1 white space character - 2 letter - 4 decimal digit - 8 hexadecimal digit - 16 alphanumeric or '_' - 128 regular expression metacharacter or binary zero - -You should not alter the set of characters that contain the 128 bit, as that -will cause PCRE to malfunction. - - -Manifest --------- - -The distribution should contain the following files: - -(A) The actual source files of the PCRE library functions and their - headers: - - dftables.c auxiliary program for building chartables.c - get.c ) - maketables.c ) - study.c ) source of - pcre.c ) the functions - pcreposix.c ) - pcre.in "source" for the header for the external API; pcre.h - is built from this by "configure" - pcreposix.h header for the external POSIX wrapper API - internal.h header for internal use - config.in template for config.h, which is built by configure - -(B) Auxiliary files: - - AUTHORS information about the author of PCRE - ChangeLog log of changes to the code - INSTALL generic installation instructions - LICENCE conditions for the use of PCRE - COPYING the same, using GNU's standard name - Makefile.in template for Unix Makefile, which is built by configure - NEWS important changes in this release - NON-UNIX-USE notes on building PCRE on non-Unix systems - README this file - RunTest.in template for a Unix shell script for running tests - config.guess ) files used by libtool, - config.sub ) used only when building a shared library - configure a configuring shell script (built by autoconf) - configure.in the autoconf input used to build configure - doc/Tech.Notes notes on the encoding - doc/pcre.3 man page source for the PCRE functions - doc/pcre.html HTML version - doc/pcre.txt plain text version - doc/pcreposix.3 man page source for the POSIX wrapper API - doc/pcreposix.html HTML version - doc/pcreposix.txt plain text version - doc/pcretest.txt documentation of test program - doc/perltest.txt documentation of Perl test program - doc/pcregrep.1 man page source for the pcregrep utility - doc/pcregrep.html HTML version - doc/pcregrep.txt plain text version - install-sh a shell script for installing files - ltconfig ) files used to build "libtool", - ltmain.sh ) used only when building a shared library - pcretest.c test program - perltest Perl test program - perltest8 Perl test program for UTF-8 tests - pcregrep.c source of a grep utility that uses PCRE - pcre-config.in source of script which retains PCRE information - testdata/testinput1 test data, compatible with Perl 5.004 and 5.005 - testdata/testinput2 test data for error messages and non-Perl things - testdata/testinput3 test data, compatible with Perl 5.005 - testdata/testinput4 test data for locale-specific tests - testdata/testinput5 test data for UTF-8 tests compatible with Perl 5.6 - testdata/testinput6 test data for other UTF-8 tests - testdata/testoutput1 test results corresponding to testinput1 - testdata/testoutput2 test results corresponding to testinput2 - testdata/testoutput3 test results corresponding to testinput3 - testdata/testoutput4 test results corresponding to testinput4 - testdata/testoutput5 test results corresponding to testinput5 - testdata/testoutput6 test results corresponding to testinput6 - -(C) Auxiliary files for Win32 DLL - - dll.mk - pcre.def - -Philip Hazel -August 2000 diff --git a/pcre/get.c b/pcre/get.c deleted file mode 100644 index 42e9bd49..00000000 --- a/pcre/get.c +++ /dev/null @@ -1,227 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* -This is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. See -the file Tech.Notes for some information on the internals. - -Written by: Philip Hazel - - Copyright (c) 1997-2000 University of Cambridge - ------------------------------------------------------------------------------ -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. ------------------------------------------------------------------------------ -*/ - -/* This module contains some convenience functions for extracting substrings -from the subject string after a regex match has succeeded. The original idea -for these functions came from Scott Wimer . */ - - -/* Include the internals header, which itself includes Standard C headers plus -the external pcre header. */ - -#include "internal.h" - - - -/************************************************* -* Copy captured string to given buffer * -*************************************************/ - -/* This function copies a single captured substring into a given buffer. -Note that we use memcpy() rather than strncpy() in case there are binary zeros -in the string. - -Arguments: - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - stringnumber the number of the required substring - buffer where to put the substring - size the size of the buffer - -Returns: if successful: - the length of the copied string, not including the zero - that is put on the end; can be zero - if not successful: - PCRE_ERROR_NOMEMORY (-6) buffer too small - PCRE_ERROR_NOSUBSTRING (-7) no such captured substring -*/ - -int -pcre_copy_substring(const char *subject, int *ovector, int stringcount, - int stringnumber, char *buffer, int size) -{ -int yield; -if (stringnumber < 0 || stringnumber >= stringcount) - return PCRE_ERROR_NOSUBSTRING; -stringnumber *= 2; -yield = ovector[stringnumber+1] - ovector[stringnumber]; -if (size < yield + 1) return PCRE_ERROR_NOMEMORY; -memcpy(buffer, subject + ovector[stringnumber], yield); -buffer[yield] = 0; -return yield; -} - - - -/************************************************* -* Copy all captured strings to new store * -*************************************************/ - -/* This function gets one chunk of store and builds a list of pointers and all -of the captured substrings in it. A NULL pointer is put on the end of the list. - -Arguments: - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - listptr set to point to the list of pointers - -Returns: if successful: 0 - if not successful: - PCRE_ERROR_NOMEMORY (-6) failed to get store -*/ - -int -pcre_get_substring_list(const char *subject, int *ovector, int stringcount, - const char ***listptr) -{ -int i; -int size = sizeof(char *); -int double_count = stringcount * 2; -char **stringlist; -char *p; - -for (i = 0; i < double_count; i += 2) - size += sizeof(char *) + ovector[i+1] - ovector[i] + 1; - -stringlist = (char **)(pcre_malloc)(size); -if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; - -*listptr = (const char **)stringlist; -p = (char *)(stringlist + stringcount + 1); - -for (i = 0; i < double_count; i += 2) - { - int len = ovector[i+1] - ovector[i]; - memcpy(p, subject + ovector[i], len); - *stringlist++ = p; - p += len; - *p++ = 0; - } - -*stringlist = NULL; -return 0; -} - - - -/************************************************* -* Free store obtained by get_substring_list * -*************************************************/ - -/* This function exists for the benefit of people calling PCRE from non-C -programs that can call its functions, but not free() or (pcre_free)() directly. - -Argument: the result of a previous pcre_get_substring_list() -Returns: nothing -*/ - -void -pcre_free_substring_list(const char **pointer) -{ -(pcre_free)((void *)pointer); -} - - - -/************************************************* -* Copy captured string to new store * -*************************************************/ - -/* This function copies a single captured substring into a piece of new -store - -Arguments: - subject the subject string that was matched - ovector pointer to the offsets table - stringcount the number of substrings that were captured - (i.e. the yield of the pcre_exec call, unless - that was zero, in which case it should be 1/3 - of the offset table size) - stringnumber the number of the required substring - stringptr where to put a pointer to the substring - -Returns: if successful: - the length of the string, not including the zero that - is put on the end; can be zero - if not successful: - PCRE_ERROR_NOMEMORY (-6) failed to get store - PCRE_ERROR_NOSUBSTRING (-7) substring not present -*/ - -int -pcre_get_substring(const char *subject, int *ovector, int stringcount, - int stringnumber, const char **stringptr) -{ -int yield; -char *substring; -if (stringnumber < 0 || stringnumber >= stringcount) - return PCRE_ERROR_NOSUBSTRING; -stringnumber *= 2; -yield = ovector[stringnumber+1] - ovector[stringnumber]; -substring = (char *)(pcre_malloc)(yield + 1); -if (substring == NULL) return PCRE_ERROR_NOMEMORY; -memcpy(substring, subject + ovector[stringnumber], yield); -substring[yield] = 0; -*stringptr = substring; -return yield; -} - - - -/************************************************* -* Free store obtained by get_substring * -*************************************************/ - -/* This function exists for the benefit of people calling PCRE from non-C -programs that can call its functions, but not free() or (pcre_free)() directly. - -Argument: the result of a previous pcre_get_substring() -Returns: nothing -*/ - -void -pcre_free_substring(const char *pointer) -{ -(pcre_free)((void *)pointer); -} - -/* End of get.c */ diff --git a/pcre/install b/pcre/install deleted file mode 100644 index 08802812..00000000 --- a/pcre/install +++ /dev/null @@ -1,185 +0,0 @@ -Basic Installation -================== - - These are generic installation instructions that apply to systems that -can run the `configure' shell script - Unix systems and any that imitate -it. They are not specific to PCRE. There are PCRE-specific instructions -for non-Unix systems in the file NON-UNIX-USE. - - The `configure' shell script attempts to guess correct values for -various system-dependent variables used during compilation. It uses -those values to create a `Makefile' in each directory of the package. -It may also create one or more `.h' files containing system-dependent -definitions. Finally, it creates a shell script `config.status' that -you can run in the future to recreate the current configuration, a file -`config.cache' that saves the results of its tests to speed up -reconfiguring, and a file `config.log' containing compiler output -(useful mainly for debugging `configure'). - - If you need to do unusual things to compile the package, please try -to figure out how `configure' could check whether to do them, and mail -diffs or instructions to the address given in the `README' so they can -be considered for the next release. If at some point `config.cache' -contains results you don't want to keep, you may remove or edit it. - - The file `configure.in' is used to create `configure' by a program -called `autoconf'. You only need `configure.in' if you want to change -it or regenerate `configure' using a newer version of `autoconf'. - -The simplest way to compile this package is: - - 1. `cd' to the directory containing the package's source code and type - `./configure' to configure the package for your system. If you're - using `csh' on an old version of System V, you might need to type - `sh ./configure' instead to prevent `csh' from trying to execute - `configure' itself. - - Running `configure' takes awhile. While running, it prints some - messages telling which features it is checking for. - - 2. Type `make' to compile the package. - - 3. Optionally, type `make check' to run any self-tests that come with - the package. - - 4. Type `make install' to install the programs and any data files and - documentation. - - 5. You can remove the program binaries and object files from the - source code directory by typing `make clean'. To also remove the - files that `configure' created (so you can compile the package for - a different kind of computer), type `make distclean'. There is - also a `make maintainer-clean' target, but that is intended mainly - for the package's developers. If you use it, you may have to get - all sorts of other programs in order to regenerate files that came - with the distribution. - -Compilers and Options -===================== - - Some systems require unusual options for compilation or linking that -the `configure' script does not know about. You can give `configure' -initial values for variables by setting them in the environment. Using -a Bourne-compatible shell, you can do that on the command line like -this: - CC=c89 CFLAGS=-O2 LIBS=-lposix ./configure - -Or on systems that have the `env' program, you can do it like this: - env CPPFLAGS=-I/usr/local/include LDFLAGS=-s ./configure - -Compiling For Multiple Architectures -==================================== - - You can compile the package for more than one kind of computer at the -same time, by placing the object files for each architecture in their -own directory. To do this, you must use a version of `make' that -supports the `VPATH' variable, such as GNU `make'. `cd' to the -directory where you want the object files and executables to go and run -the `configure' script. `configure' automatically checks for the -source code in the directory that `configure' is in and in `..'. - - If you have to use a `make' that does not supports the `VPATH' -variable, you have to compile the package for one architecture at a time -in the source code directory. After you have installed the package for -one architecture, use `make distclean' before reconfiguring for another -architecture. - -Installation Names -================== - - By default, `make install' will install the package's files in -`/usr/local/bin', `/usr/local/man', etc. You can specify an -installation prefix other than `/usr/local' by giving `configure' the -option `--prefix=PATH'. - - You can specify separate installation prefixes for -architecture-specific files and architecture-independent files. If you -give `configure' the option `--exec-prefix=PATH', the package will use -PATH as the prefix for installing programs and libraries. -Documentation and other data files will still use the regular prefix. - - In addition, if you use an unusual directory layout you can give -options like `--bindir=PATH' to specify different values for particular -kinds of files. Run `configure --help' for a list of the directories -you can set and what kinds of files go in them. - - If the package supports it, you can cause programs to be installed -with an extra prefix or suffix on their names by giving `configure' the -option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. - -Optional Features -================= - - Some packages pay attention to `--enable-FEATURE' options to -`configure', where FEATURE indicates an optional part of the package. -They may also pay attention to `--with-PACKAGE' options, where PACKAGE -is something like `gnu-as' or `x' (for the X Window System). The -`README' should mention any `--enable-' and `--with-' options that the -package recognizes. - - For packages that use the X Window System, `configure' can usually -find the X include and library files automatically, but if it doesn't, -you can use the `configure' options `--x-includes=DIR' and -`--x-libraries=DIR' to specify their locations. - -Specifying the System Type -========================== - - There may be some features `configure' can not figure out -automatically, but needs to determine by the type of host the package -will run on. Usually `configure' can figure that out, but if it prints -a message saying it can not guess the host type, give it the -`--host=TYPE' option. TYPE can either be a short name for the system -type, such as `sun4', or a canonical name with three fields: - CPU-COMPANY-SYSTEM - -See the file `config.sub' for the possible values of each field. If -`config.sub' isn't included in this package, then this package doesn't -need to know the host type. - - If you are building compiler tools for cross-compiling, you can also -use the `--target=TYPE' option to select the type of system they will -produce code for and the `--build=TYPE' option to select the type of -system on which you are compiling the package. - -Sharing Defaults -================ - - If you want to set default values for `configure' scripts to share, -you can create a site shell script called `config.site' that gives -default values for variables like `CC', `cache_file', and `prefix'. -`configure' looks for `PREFIX/share/config.site' if it exists, then -`PREFIX/etc/config.site' if it exists. Or, you can set the -`CONFIG_SITE' environment variable to the location of the site script. -A warning: not all `configure' scripts look for a site script. - -Operation Controls -================== - - `configure' recognizes the following options to control how it -operates. - -`--cache-file=FILE' - Use and save the results of the tests in FILE instead of - `./config.cache'. Set FILE to `/dev/null' to disable caching, for - debugging `configure'. - -`--help' - Print a summary of the options to `configure', and exit. - -`--quiet' -`--silent' -`-q' - Do not print messages saying which checks are being made. To - suppress all normal output, redirect it to `/dev/null' (any error - messages will still be shown). - -`--srcdir=DIR' - Look for the package's source code in directory DIR. Usually - `configure' can determine that directory automatically. - -`--version' - Print the version of Autoconf used to generate the `configure' - script, and exit. - -`configure' also accepts some other, not widely useful, options. diff --git a/pcre/install-sh b/pcre/install-sh deleted file mode 100644 index e9de2384..00000000 --- a/pcre/install-sh +++ /dev/null @@ -1,251 +0,0 @@ -#!/bin/sh -# -# install - install a program, script, or datafile -# This comes from X11R5 (mit/util/scripts/install.sh). -# -# Copyright 1991 by the Massachusetts Institute of Technology -# -# Permission to use, copy, modify, distribute, and sell this software and its -# documentation for any purpose is hereby granted without fee, provided that -# the above copyright notice appear in all copies and that both that -# copyright notice and this permission notice appear in supporting -# documentation, and that the name of M.I.T. not be used in advertising or -# publicity pertaining to distribution of the software without specific, -# written prior permission. M.I.T. makes no representations about the -# suitability of this software for any purpose. It is provided "as is" -# without express or implied warranty. -# -# Calling this script install-sh is preferred over install.sh, to prevent -# `make' implicit rules from creating a file called install from it -# when there is no Makefile. -# -# This script is compatible with the BSD install script, but was written -# from scratch. It can only install one file at a time, a restriction -# shared with many OS's install programs. - - -# set DOITPROG to echo to test this script - -# Don't use :- since 4.3BSD and earlier shells don't like it. -doit="${DOITPROG-}" - - -# put in absolute paths if you don't have them in your path; or use env. vars. - -mvprog="${MVPROG-mv}" -cpprog="${CPPROG-cp}" -chmodprog="${CHMODPROG-chmod}" -chownprog="${CHOWNPROG-chown}" -chgrpprog="${CHGRPPROG-chgrp}" -stripprog="${STRIPPROG-strip}" -rmprog="${RMPROG-rm}" -mkdirprog="${MKDIRPROG-mkdir}" - -transformbasename="" -transform_arg="" -instcmd="$mvprog" -chmodcmd="$chmodprog 0755" -chowncmd="" -chgrpcmd="" -stripcmd="" -rmcmd="$rmprog -f" -mvcmd="$mvprog" -src="" -dst="" -dir_arg="" - -while [ x"$1" != x ]; do - case $1 in - -c) instcmd="$cpprog" - shift - continue;; - - -d) dir_arg=true - shift - continue;; - - -m) chmodcmd="$chmodprog $2" - shift - shift - continue;; - - -o) chowncmd="$chownprog $2" - shift - shift - continue;; - - -g) chgrpcmd="$chgrpprog $2" - shift - shift - continue;; - - -s) stripcmd="$stripprog" - shift - continue;; - - -t=*) transformarg=`echo $1 | sed 's/-t=//'` - shift - continue;; - - -b=*) transformbasename=`echo $1 | sed 's/-b=//'` - shift - continue;; - - *) if [ x"$src" = x ] - then - src=$1 - else - # this colon is to work around a 386BSD /bin/sh bug - : - dst=$1 - fi - shift - continue;; - esac -done - -if [ x"$src" = x ] -then - echo "install: no input file specified" - exit 1 -else - true -fi - -if [ x"$dir_arg" != x ]; then - dst=$src - src="" - - if [ -d $dst ]; then - instcmd=: - chmodcmd="" - else - instcmd=mkdir - fi -else - -# Waiting for this to be detected by the "$instcmd $src $dsttmp" command -# might cause directories to be created, which would be especially bad -# if $src (and thus $dsttmp) contains '*'. - - if [ -f $src -o -d $src ] - then - true - else - echo "install: $src does not exist" - exit 1 - fi - - if [ x"$dst" = x ] - then - echo "install: no destination specified" - exit 1 - else - true - fi - -# If destination is a directory, append the input filename; if your system -# does not like double slashes in filenames, you may need to add some logic - - if [ -d $dst ] - then - dst="$dst"/`basename $src` - else - true - fi -fi - -## this sed command emulates the dirname command -dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` - -# Make sure that the destination directory exists. -# this part is taken from Noah Friedman's mkinstalldirs script - -# Skip lots of stat calls in the usual case. -if [ ! -d "$dstdir" ]; then -defaultIFS=' -' -IFS="${IFS-${defaultIFS}}" - -oIFS="${IFS}" -# Some sh's can't handle IFS=/ for some reason. -IFS='%' -set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` -IFS="${oIFS}" - -pathcomp='' - -while [ $# -ne 0 ] ; do - pathcomp="${pathcomp}${1}" - shift - - if [ ! -d "${pathcomp}" ] ; - then - $mkdirprog "${pathcomp}" - else - true - fi - - pathcomp="${pathcomp}/" -done -fi - -if [ x"$dir_arg" != x ] -then - $doit $instcmd $dst && - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi -else - -# If we're going to rename the final executable, determine the name now. - - if [ x"$transformarg" = x ] - then - dstfile=`basename $dst` - else - dstfile=`basename $dst $transformbasename | - sed $transformarg`$transformbasename - fi - -# don't allow the sed command to completely eliminate the filename - - if [ x"$dstfile" = x ] - then - dstfile=`basename $dst` - else - true - fi - -# Make a temp file name in the proper directory. - - dsttmp=$dstdir/#inst.$$# - -# Move or copy the file name to the temp name - - $doit $instcmd $src $dsttmp && - - trap "rm -f ${dsttmp}" 0 && - -# and set any options; do chmod last to preserve setuid bits - -# If any of these fail, we abort the whole thing. If we want to -# ignore errors from any of these, just make sure not to ignore -# errors from the above "$doit $instcmd $src $dsttmp" command. - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && - -# Now rename the file to the real destination. - - $doit $rmcmd -f $dstdir/$dstfile && - $doit $mvcmd $dsttmp $dstdir/$dstfile - -fi && - - -exit 0 diff --git a/pcre/internal.h b/pcre/internal.h deleted file mode 100644 index 25bb7f8f..00000000 --- a/pcre/internal.h +++ /dev/null @@ -1,381 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - - -/* This is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. See -the file Tech.Notes for some information on the internals. - -Written by: Philip Hazel - - Copyright (c) 1997-2000 University of Cambridge - ------------------------------------------------------------------------------ -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. ------------------------------------------------------------------------------ -*/ - -/* This header contains definitions that are shared between the different -modules, but which are not relevant to the outside. */ - -/* Get the definitions provided by running "configure" */ - -#include "config.h" - -/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), -define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY -is set. Otherwise, include an emulating function for those systems that have -neither (there some non-Unix environments where this is the case). This assumes -that all calls to memmove are moving strings upwards in store, which is the -case in PCRE. */ - -#if ! HAVE_MEMMOVE -#undef memmove /* some systems may have a macro */ -#if HAVE_BCOPY -#define memmove(a, b, c) bcopy(b, a, c) -#else -void * -pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n) -{ -int i; -dest += n; -src += n; -for (i = 0; i < n; ++i) *(--dest) = *(--src); -} -#define memmove(a, b, c) pcre_memmove(a, b, c) -#endif -#endif - -/* Standard C headers plus the external interface definition */ - -#include -#include -#include -#include -#include -#include -#include "pcre.h" - -/* In case there is no definition of offsetof() provided - though any proper -Standard C system should have one. */ - -#ifndef offsetof -#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field)) -#endif - -/* These are the public options that can change during matching. */ - -#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL) - -/* Private options flags start at the most significant end of the four bytes, -but skip the top bit so we can use ints for convenience without getting tangled -with negative values. The public options defined in pcre.h start at the least -significant end. Make sure they don't overlap, though now that we have expanded -to four bytes there is plenty of space. */ - -#define PCRE_FIRSTSET 0x40000000 /* first_char is set */ -#define PCRE_REQCHSET 0x20000000 /* req_char is set */ -#define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */ -#define PCRE_INGROUP 0x08000000 /* compiling inside a group */ -#define PCRE_ICHANGED 0x04000000 /* i option changes within regex */ - -/* Options for the "extra" block produced by pcre_study(). */ - -#define PCRE_STUDY_MAPPED 0x01 /* a map of starting chars exists */ - -/* Masks for identifying the public options which are permitted at compile -time, run time or study time, respectively. */ - -#define PUBLIC_OPTIONS \ - (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ - PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8) - -#define PUBLIC_EXEC_OPTIONS \ - (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY) - -#define PUBLIC_STUDY_OPTIONS 0 /* None defined */ - -/* Magic number to provide a small check against being handed junk. */ - -#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */ - -/* Miscellaneous definitions */ - -typedef int BOOL; - -#define FALSE 0 -#define TRUE 1 - -/* These are escaped items that aren't just an encoding of a particular data -value such as \n. They must have non-zero values, as check_escape() returns -their negation. Also, they must appear in the same order as in the opcode -definitions below, up to ESC_z. The final one must be ESC_REF as subsequent -values are used for \1, \2, \3, etc. There is a test in the code for an escape -greater than ESC_b and less than ESC_X to detect the types that may be -repeated. If any new escapes are put in-between that don't consume a character, -that code will have to change. */ - -enum { ESC_A = 1, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, - ESC_Z, ESC_z, ESC_REF }; - -/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets -that extract substrings. Starting from 1 (i.e. after OP_END), the values up to -OP_EOD must correspond in order to the list of escapes immediately above. */ - -enum { - OP_END, /* End of pattern */ - - /* Values corresponding to backslashed metacharacters */ - - OP_SOD, /* Start of data: \A */ - OP_NOT_WORD_BOUNDARY, /* \B */ - OP_WORD_BOUNDARY, /* \b */ - OP_NOT_DIGIT, /* \D */ - OP_DIGIT, /* \d */ - OP_NOT_WHITESPACE, /* \S */ - OP_WHITESPACE, /* \s */ - OP_NOT_WORDCHAR, /* \W */ - OP_WORDCHAR, /* \w */ - OP_EODN, /* End of data or \n at end of data: \Z. */ - OP_EOD, /* End of data: \z */ - - OP_OPT, /* Set runtime options */ - OP_CIRC, /* Start of line - varies with multiline switch */ - OP_DOLL, /* End of line - varies with multiline switch */ - OP_ANY, /* Match any character */ - OP_CHARS, /* Match string of characters */ - OP_NOT, /* Match anything but the following char */ - - OP_STAR, /* The maximizing and minimizing versions of */ - OP_MINSTAR, /* all these opcodes must come in pairs, with */ - OP_PLUS, /* the minimizing one second. */ - OP_MINPLUS, /* This first set applies to single characters */ - OP_QUERY, - OP_MINQUERY, - OP_UPTO, /* From 0 to n matches */ - OP_MINUPTO, - OP_EXACT, /* Exactly n matches */ - - OP_NOTSTAR, /* The maximizing and minimizing versions of */ - OP_NOTMINSTAR, /* all these opcodes must come in pairs, with */ - OP_NOTPLUS, /* the minimizing one second. */ - OP_NOTMINPLUS, /* This first set applies to "not" single characters */ - OP_NOTQUERY, - OP_NOTMINQUERY, - OP_NOTUPTO, /* From 0 to n matches */ - OP_NOTMINUPTO, - OP_NOTEXACT, /* Exactly n matches */ - - OP_TYPESTAR, /* The maximizing and minimizing versions of */ - OP_TYPEMINSTAR, /* all these opcodes must come in pairs, with */ - OP_TYPEPLUS, /* the minimizing one second. These codes must */ - OP_TYPEMINPLUS, /* be in exactly the same order as those above. */ - OP_TYPEQUERY, /* This set applies to character types such as \d */ - OP_TYPEMINQUERY, - OP_TYPEUPTO, /* From 0 to n matches */ - OP_TYPEMINUPTO, - OP_TYPEEXACT, /* Exactly n matches */ - - OP_CRSTAR, /* The maximizing and minimizing versions of */ - OP_CRMINSTAR, /* all these opcodes must come in pairs, with */ - OP_CRPLUS, /* the minimizing one second. These codes must */ - OP_CRMINPLUS, /* be in exactly the same order as those above. */ - OP_CRQUERY, /* These are for character classes and back refs */ - OP_CRMINQUERY, - OP_CRRANGE, /* These are different to the three seta above. */ - OP_CRMINRANGE, - - OP_CLASS, /* Match a character class */ - OP_REF, /* Match a back reference */ - OP_RECURSE, /* Match this pattern recursively */ - - OP_ALT, /* Start of alternation */ - OP_KET, /* End of group that doesn't have an unbounded repeat */ - OP_KETRMAX, /* These two must remain together and in this */ - OP_KETRMIN, /* order. They are for groups the repeat for ever. */ - - /* The assertions must come before ONCE and COND */ - - OP_ASSERT, /* Positive lookahead */ - OP_ASSERT_NOT, /* Negative lookahead */ - OP_ASSERTBACK, /* Positive lookbehind */ - OP_ASSERTBACK_NOT, /* Negative lookbehind */ - OP_REVERSE, /* Move pointer back - used in lookbehind assertions */ - - /* ONCE and COND must come after the assertions, with ONCE first, as there's - a test for >= ONCE for a subpattern that isn't an assertion. */ - - OP_ONCE, /* Once matched, don't back up into the subpattern */ - OP_COND, /* Conditional group */ - OP_CREF, /* Used to hold an extraction string number */ - - OP_BRAZERO, /* These two must remain together and in this */ - OP_BRAMINZERO, /* order. */ - - OP_BRA /* This and greater values are used for brackets that - extract substrings. */ -}; - -/* The highest extraction number. This is limited by the number of opcodes -left after OP_BRA, i.e. 255 - OP_BRA. We actually set it somewhat lower. */ - -#define EXTRACT_MAX 99 - -/* The texts of compile-time error messages are defined as macros here so that -they can be accessed by the POSIX wrapper and converted into error codes. Yes, -I could have used error codes in the first place, but didn't feel like changing -just to accommodate the POSIX wrapper. */ - -#define ERR1 "\\ at end of pattern" -#define ERR2 "\\c at end of pattern" -#define ERR3 "unrecognized character follows \\" -#define ERR4 "numbers out of order in {} quantifier" -#define ERR5 "number too big in {} quantifier" -#define ERR6 "missing terminating ] for character class" -#define ERR7 "invalid escape sequence in character class" -#define ERR8 "range out of order in character class" -#define ERR9 "nothing to repeat" -#define ERR10 "operand of unlimited repeat could match the empty string" -#define ERR11 "internal error: unexpected repeat" -#define ERR12 "unrecognized character after (?" -#define ERR13 "too many capturing parenthesized sub-patterns" -#define ERR14 "missing )" -#define ERR15 "back reference to non-existent subpattern" -#define ERR16 "erroffset passed as NULL" -#define ERR17 "unknown option bit(s) set" -#define ERR18 "missing ) after comment" -#define ERR19 "too many sets of parentheses" -#define ERR20 "regular expression too large" -#define ERR21 "failed to get memory" -#define ERR22 "unmatched parentheses" -#define ERR23 "internal error: code overflow" -#define ERR24 "unrecognized character after (?<" -#define ERR25 "lookbehind assertion is not fixed length" -#define ERR26 "malformed number after (?(" -#define ERR27 "conditional group contains more than two branches" -#define ERR28 "assertion expected after (?(" -#define ERR29 "(?p must be followed by )" -#define ERR30 "unknown POSIX class name" -#define ERR31 "POSIX collating elements are not supported" -#define ERR32 "this version of PCRE is not compiled with PCRE_UTF8 support" -#define ERR33 "characters with values > 255 are not yet supported in classes" -#define ERR34 "character value in \\x{...} sequence is too large" -#define ERR35 "invalid condition (?(0)" - -/* All character handling must be done as unsigned characters. Otherwise there -are problems with top-bit-set characters and functions such as isspace(). -However, we leave the interface to the outside world as char *, because that -should make things easier for callers. We define a short type for unsigned char -to save lots of typing. I tried "uchar", but it causes problems on Digital -Unix, where it is defined in sys/types, so use "uschar" instead. */ - -typedef unsigned char uschar; - -/* The real format of the start of the pcre block; the actual code vector -runs on as long as necessary after the end. */ - -typedef struct real_pcre { - unsigned long int magic_number; - size_t size; - const unsigned char *tables; - unsigned long int options; - uschar top_bracket; - uschar top_backref; - uschar first_char; - uschar req_char; - uschar code[1]; -} real_pcre; - -/* The real format of the extra block returned by pcre_study(). */ - -typedef struct real_pcre_extra { - uschar options; - uschar start_bits[32]; -} real_pcre_extra; - - -/* Structure for passing "static" information around between the functions -doing the compiling, so that they are thread-safe. */ - -typedef struct compile_data { - const uschar *lcc; /* Points to lower casing table */ - const uschar *fcc; /* Points to case-flipping table */ - const uschar *cbits; /* Points to character type table */ - const uschar *ctypes; /* Points to table of type maps */ -} compile_data; - -/* Structure for passing "static" information around between the functions -doing the matching, so that they are thread-safe. */ - -typedef struct match_data { - int errorcode; /* As it says */ - int *offset_vector; /* Offset vector */ - int offset_end; /* One past the end */ - int offset_max; /* The maximum usable for return data */ - const uschar *lcc; /* Points to lower casing table */ - const uschar *ctypes; /* Points to table of type maps */ - BOOL offset_overflow; /* Set if too many extractions */ - BOOL notbol; /* NOTBOL flag */ - BOOL noteol; /* NOTEOL flag */ - BOOL utf8; /* UTF8 flag */ - BOOL endonly; /* Dollar not before final \n */ - BOOL notempty; /* Empty string match not wanted */ - const uschar *start_pattern; /* For use when recursing */ - const uschar *start_subject; /* Start of the subject string */ - const uschar *end_subject; /* End of the subject string */ - const uschar *start_match; /* Start of this match attempt */ - const uschar *end_match_ptr; /* Subject position at end match */ - int end_offset_top; /* Highwater mark at end of match */ -} match_data; - -/* Bit definitions for entries in the pcre_ctypes table. */ - -#define ctype_space 0x01 -#define ctype_letter 0x02 -#define ctype_digit 0x04 -#define ctype_xdigit 0x08 -#define ctype_word 0x10 /* alphameric or '_' */ -#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */ - -/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set -of bits for a class map. Some classes are built by combining these tables. */ - -#define cbit_space 0 /* [:space:] or \s */ -#define cbit_xdigit 32 /* [:xdigit:] */ -#define cbit_digit 64 /* [:digit:] or \d */ -#define cbit_upper 96 /* [:upper:] */ -#define cbit_lower 128 /* [:lower:] */ -#define cbit_word 160 /* [:word:] or \w */ -#define cbit_graph 192 /* [:graph:] */ -#define cbit_print 224 /* [:print:] */ -#define cbit_punct 256 /* [:punct:] */ -#define cbit_cntrl 288 /* [:cntrl:] */ -#define cbit_length 320 /* Length of the cbits table */ - -/* Offsets of the various tables from the base tables pointer, and -total length. */ - -#define lcc_offset 0 -#define fcc_offset 256 -#define cbits_offset 512 -#define ctypes_offset (cbits_offset + cbit_length) -#define tables_length (ctypes_offset + 256) - -/* End of internal.h */ diff --git a/pcre/licence b/pcre/licence deleted file mode 100644 index 34d20db9..00000000 --- a/pcre/licence +++ /dev/null @@ -1,46 +0,0 @@ -PCRE LICENCE ------------- - -PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - -Written by: Philip Hazel - -University of Cambridge Computing Service, -Cambridge, England. Phone: +44 1223 334714. - -Copyright (c) 1997-2000 University of Cambridge - -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. In practice, this means that if you use - PCRE in software which you distribute to others, commercially or - otherwise, you must put a sentence like this - - Regular expression support is provided by the PCRE library package, - which is open source software, written by Philip Hazel, and copyright - by the University of Cambridge, England. - - somewhere reasonably visible in your documentation and in any relevant - files or online help data or similar. A reference to the ftp site for - the source, that is, to - - ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/ - - should also be given in the documentation. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. - -End diff --git a/pcre/ltconfig b/pcre/ltconfig deleted file mode 100644 index a01334f9..00000000 --- a/pcre/ltconfig +++ /dev/null @@ -1,3078 +0,0 @@ -#! /bin/sh - -# ltconfig - Create a system-specific libtool. -# Copyright (C) 1996-1999 Free Software Foundation, Inc. -# Originally by Gordon Matzigkeit , 1996 -# -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# A lot of this script is taken from autoconf-2.10. - -# Check that we are running under the correct shell. -SHELL=${CONFIG_SHELL-/bin/sh} -echo=echo -if test "X$1" = X--no-reexec; then - # Discard the --no-reexec flag, and continue. - shift -elif test "X$1" = X--fallback-echo; then - # Avoid inline document here, it may be left over - : -elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then - # Yippee, $echo works! - : -else - # Restart under the correct shell. - exec "$SHELL" "$0" --no-reexec ${1+"$@"} -fi - -if test "X$1" = X--fallback-echo; then - # used as fallback echo - shift - cat </dev/null`} - case X$UNAME in - *-DOS) PATH_SEPARATOR=';' ;; - *) PATH_SEPARATOR=':' ;; - esac -fi - -# The HP-UX ksh and POSIX shell print the target directory to stdout -# if CDPATH is set. -if test "X${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi - -if test "X${echo_test_string+set}" != Xset; then - # find a string as large as possible, as long as the shell can cope with it - for cmd in 'sed 50q "$0"' 'sed 20q "$0"' 'sed 10q "$0"' 'sed 2q "$0"' 'echo test'; do - # expected sizes: less than 2Kb, 1Kb, 512 bytes, 16 bytes, ... - if (echo_test_string="`eval $cmd`") 2>/dev/null && - echo_test_string="`eval $cmd`" && - (test "X$echo_test_string" = "X$echo_test_string") 2>/dev/null; then - break - fi - done -fi - -if test "X`($echo '\t') 2>/dev/null`" != 'X\t' || - test "X`($echo "$echo_test_string") 2>/dev/null`" != X"$echo_test_string"; then - # The Solaris, AIX, and Digital Unix default echo programs unquote - # backslashes. This makes it impossible to quote backslashes using - # echo "$something" | sed 's/\\/\\\\/g' - # - # So, first we look for a working echo in the user's PATH. - - IFS="${IFS= }"; save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR}" - for dir in $PATH /usr/ucb; do - if (test -f $dir/echo || test -f $dir/echo$ac_exeext) && - test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' && - test "X`($dir/echo "$echo_test_string") 2>/dev/null`" = X"$echo_test_string"; then - echo="$dir/echo" - break - fi - done - IFS="$save_ifs" - - if test "X$echo" = Xecho; then - # We didn't find a better echo, so look for alternatives. - if test "X`(print -r '\t') 2>/dev/null`" = 'X\t' && - test "X`(print -r "$echo_test_string") 2>/dev/null`" = X"$echo_test_string"; then - # This shell has a builtin print -r that does the trick. - echo='print -r' - elif (test -f /bin/ksh || test -f /bin/ksh$ac_exeext) && - test "X$CONFIG_SHELL" != X/bin/ksh; then - # If we have ksh, try running ltconfig again with it. - ORIGINAL_CONFIG_SHELL="${CONFIG_SHELL-/bin/sh}" - export ORIGINAL_CONFIG_SHELL - CONFIG_SHELL=/bin/ksh - export CONFIG_SHELL - exec "$CONFIG_SHELL" "$0" --no-reexec ${1+"$@"} - else - # Try using printf. - echo='printf "%s\n"' - if test "X`($echo '\t') 2>/dev/null`" = 'X\t' && - test "X`($echo "$echo_test_string") 2>/dev/null`" = X"$echo_test_string"; then - # Cool, printf works - : - elif test "X`("$ORIGINAL_CONFIG_SHELL" "$0" --fallback-echo '\t') 2>/dev/null`" = 'X\t' && - test "X`("$ORIGINAL_CONFIG_SHELL" "$0" --fallback-echo "$echo_test_string") 2>/dev/null`" = X"$echo_test_string"; then - CONFIG_SHELL="$ORIGINAL_CONFIG_SHELL" - export CONFIG_SHELL - SHELL="$CONFIG_SHELL" - export SHELL - echo="$CONFIG_SHELL $0 --fallback-echo" - elif test "X`("$CONFIG_SHELL" "$0" --fallback-echo '\t') 2>/dev/null`" = 'X\t' && - test "X`("$CONFIG_SHELL" "$0" --fallback-echo "$echo_test_string") 2>/dev/null`" = X"$echo_test_string"; then - echo="$CONFIG_SHELL $0 --fallback-echo" - else - # maybe with a smaller string... - prev=: - - for cmd in 'echo test' 'sed 2q "$0"' 'sed 10q "$0"' 'sed 20q "$0"' 'sed 50q "$0"'; do - if (test "X$echo_test_string" = "X`eval $cmd`") 2>/dev/null; then - break - fi - prev="$cmd" - done - - if test "$prev" != 'sed 50q "$0"'; then - echo_test_string=`eval $prev` - export echo_test_string - exec "${ORIGINAL_CONFIG_SHELL}" "$0" ${1+"$@"} - else - # Oops. We lost completely, so just stick with echo. - echo=echo - fi - fi - fi - fi -fi - -# Sed substitution that helps us do robust quoting. It backslashifies -# metacharacters that are still active within double-quoted strings. -Xsed='sed -e s/^X//' -sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g' - -# Same as above, but do not quote variable references. -double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g' - -# Sed substitution to delay expansion of an escaped shell variable in a -# double_quote_subst'ed string. -delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' - -# The name of this program. -progname=`$echo "X$0" | $Xsed -e 's%^.*/%%'` - -# Constants: -PROGRAM=ltconfig -PACKAGE=libtool -VERSION=1.3.4 -TIMESTAMP=" (1.385.2.196 1999/12/07 21:47:57)" -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -rm="rm -f" - -help="Try \`$progname --help' for more information." - -# Global variables: -default_ofile=libtool -can_build_shared=yes -enable_shared=yes -# All known linkers require a `.a' archive for static linking (except M$VC, -# which needs '.lib'). -enable_static=yes -enable_fast_install=yes -enable_dlopen=unknown -enable_win32_dll=no -ltmain= -silent= -srcdir= -ac_config_guess= -ac_config_sub= -host= -nonopt= -ofile="$default_ofile" -verify_host=yes -with_gcc=no -with_gnu_ld=no -need_locks=yes -ac_ext=c -objext=o -libext=a -exeext= -cache_file= - -old_AR="$AR" -old_CC="$CC" -old_CFLAGS="$CFLAGS" -old_CPPFLAGS="$CPPFLAGS" -old_LDFLAGS="$LDFLAGS" -old_LD="$LD" -old_LN_S="$LN_S" -old_LIBS="$LIBS" -old_NM="$NM" -old_RANLIB="$RANLIB" -old_DLLTOOL="$DLLTOOL" -old_OBJDUMP="$OBJDUMP" -old_AS="$AS" - -# Parse the command line options. -args= -prev= -for option -do - case "$option" in - -*=*) optarg=`echo "$option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) optarg= ;; - esac - - # If the previous option needs an argument, assign it. - if test -n "$prev"; then - eval "$prev=\$option" - prev= - continue - fi - - case "$option" in - --help) cat <&2 - echo "$help" 1>&2 - exit 1 - ;; - - *) - if test -z "$ltmain"; then - ltmain="$option" - elif test -z "$host"; then -# This generates an unnecessary warning for sparc-sun-solaris4.1.3_U1 -# if test -n "`echo $option| sed 's/[-a-z0-9.]//g'`"; then -# echo "$progname: warning \`$option' is not a valid host type" 1>&2 -# fi - host="$option" - else - echo "$progname: too many arguments" 1>&2 - echo "$help" 1>&2 - exit 1 - fi ;; - esac -done - -if test -z "$ltmain"; then - echo "$progname: you must specify a LTMAIN file" 1>&2 - echo "$help" 1>&2 - exit 1 -fi - -if test ! -f "$ltmain"; then - echo "$progname: \`$ltmain' does not exist" 1>&2 - echo "$help" 1>&2 - exit 1 -fi - -# Quote any args containing shell metacharacters. -ltconfig_args= -for arg -do - case "$arg" in - *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*) - ltconfig_args="$ltconfig_args '$arg'" ;; - *) ltconfig_args="$ltconfig_args $arg" ;; - esac -done - -# A relevant subset of AC_INIT. - -# File descriptor usage: -# 0 standard input -# 1 file creation -# 2 errors and warnings -# 3 some systems may open it to /dev/tty -# 4 used on the Kubota Titan -# 5 compiler messages saved in config.log -# 6 checking for... messages and results -if test "$silent" = yes; then - exec 6>/dev/null -else - exec 6>&1 -fi -exec 5>>./config.log - -# NLS nuisances. -# Only set LANG and LC_ALL to C if already set. -# These must not be set unconditionally because not all systems understand -# e.g. LANG=C (notably SCO). -if test "X${LC_ALL+set}" = Xset; then LC_ALL=C; export LC_ALL; fi -if test "X${LANG+set}" = Xset; then LANG=C; export LANG; fi - -if test -n "$cache_file" && test -r "$cache_file"; then - echo "loading cache $cache_file within ltconfig" - . $cache_file -fi - -if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then - # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. - if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then - ac_n= ac_c=' -' ac_t=' ' - else - ac_n=-n ac_c= ac_t= - fi -else - ac_n= ac_c='\c' ac_t= -fi - -if test -z "$srcdir"; then - # Assume the source directory is the same one as the path to LTMAIN. - srcdir=`$echo "X$ltmain" | $Xsed -e 's%/[^/]*$%%'` - test "$srcdir" = "$ltmain" && srcdir=. -fi - -trap "$rm conftest*; exit 1" 1 2 15 -if test "$verify_host" = yes; then - # Check for config.guess and config.sub. - ac_aux_dir= - for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do - if test -f $ac_dir/config.guess; then - ac_aux_dir=$ac_dir - break - fi - done - if test -z "$ac_aux_dir"; then - echo "$progname: cannot find config.guess in $srcdir $srcdir/.. $srcdir/../.." 1>&2 - echo "$help" 1>&2 - exit 1 - fi - ac_config_guess=$ac_aux_dir/config.guess - ac_config_sub=$ac_aux_dir/config.sub - - # Make sure we can run config.sub. - if $SHELL $ac_config_sub sun4 >/dev/null 2>&1; then : - else - echo "$progname: cannot run $ac_config_sub" 1>&2 - echo "$help" 1>&2 - exit 1 - fi - - echo $ac_n "checking host system type""... $ac_c" 1>&6 - - host_alias=$host - case "$host_alias" in - "") - if host_alias=`$SHELL $ac_config_guess`; then : - else - echo "$progname: cannot guess host type; you must specify one" 1>&2 - echo "$help" 1>&2 - exit 1 - fi ;; - esac - host=`$SHELL $ac_config_sub $host_alias` - echo "$ac_t$host" 1>&6 - - # Make sure the host verified. - test -z "$host" && exit 1 - -elif test -z "$host"; then - echo "$progname: you must specify a host type if you use \`--no-verify'" 1>&2 - echo "$help" 1>&2 - exit 1 -else - host_alias=$host -fi - -# Transform linux* to *-*-linux-gnu*, to support old configure scripts. -case "$host_os" in -linux-gnu*) ;; -linux*) host=`echo $host | sed 's/^\(.*-.*-linux\)\(.*\)$/\1-gnu\2/'` -esac - -host_cpu=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` -host_vendor=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` -host_os=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` - -case "$host_os" in -aix3*) - # AIX sometimes has problems with the GCC collect2 program. For some - # reason, if we set the COLLECT_NAMES environment variable, the problems - # vanish in a puff of smoke. - if test "X${COLLECT_NAMES+set}" != Xset; then - COLLECT_NAMES= - export COLLECT_NAMES - fi - ;; -esac - -# Determine commands to create old-style static archives. -old_archive_cmds='$AR cru $oldlib$oldobjs' -old_postinstall_cmds='chmod 644 $oldlib' -old_postuninstall_cmds= - -# Set a sane default for `AR'. -test -z "$AR" && AR=ar - -# Set a sane default for `OBJDUMP'. -test -z "$OBJDUMP" && OBJDUMP=objdump - -# If RANLIB is not set, then run the test. -if test "${RANLIB+set}" != "set"; then - result=no - - echo $ac_n "checking for ranlib... $ac_c" 1>&6 - IFS="${IFS= }"; save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR}" - for dir in $PATH; do - test -z "$dir" && dir=. - if test -f $dir/ranlib || test -f $dir/ranlib$ac_exeext; then - RANLIB="ranlib" - result="ranlib" - break - fi - done - IFS="$save_ifs" - - echo "$ac_t$result" 1>&6 -fi - -if test -n "$RANLIB"; then - old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib" - old_postinstall_cmds="\$RANLIB \$oldlib~$old_postinstall_cmds" -fi - -# Set sane defaults for `DLLTOOL', `OBJDUMP', and `AS', used on cygwin. -test -z "$DLLTOOL" && DLLTOOL=dlltool -test -z "$OBJDUMP" && OBJDUMP=objdump -test -z "$AS" && AS=as - -# Check to see if we are using GCC. -if test "$with_gcc" != yes || test -z "$CC"; then - # If CC is not set, then try to find GCC or a usable CC. - if test -z "$CC"; then - echo $ac_n "checking for gcc... $ac_c" 1>&6 - IFS="${IFS= }"; save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR}" - for dir in $PATH; do - test -z "$dir" && dir=. - if test -f $dir/gcc || test -f $dir/gcc$ac_exeext; then - CC="gcc" - break - fi - done - IFS="$save_ifs" - - if test -n "$CC"; then - echo "$ac_t$CC" 1>&6 - else - echo "$ac_t"no 1>&6 - fi - fi - - # Not "gcc", so try "cc", rejecting "/usr/ucb/cc". - if test -z "$CC"; then - echo $ac_n "checking for cc... $ac_c" 1>&6 - IFS="${IFS= }"; save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR}" - cc_rejected=no - for dir in $PATH; do - test -z "$dir" && dir=. - if test -f $dir/cc || test -f $dir/cc$ac_exeext; then - if test "$dir/cc" = "/usr/ucb/cc"; then - cc_rejected=yes - continue - fi - CC="cc" - break - fi - done - IFS="$save_ifs" - if test $cc_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $CC - shift - if test $# -gt 0; then - # We chose a different compiler from the bogus one. - # However, it has the same name, so the bogon will be chosen - # first if we set CC to just the name; use the full file name. - shift - set dummy "$dir/cc" "$@" - shift - CC="$@" - fi - fi - - if test -n "$CC"; then - echo "$ac_t$CC" 1>&6 - else - echo "$ac_t"no 1>&6 - fi - - if test -z "$CC"; then - echo "$progname: error: no acceptable cc found in \$PATH" 1>&2 - exit 1 - fi - fi - - # Now see if the compiler is really GCC. - with_gcc=no - echo $ac_n "checking whether we are using GNU C... $ac_c" 1>&6 - echo "$progname:581: checking whether we are using GNU C" >&5 - - $rm conftest.c - cat > conftest.c <&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then - with_gcc=yes - fi - $rm conftest.c - echo "$ac_t$with_gcc" 1>&6 -fi - -# Allow CC to be a program name with arguments. -set dummy $CC -compiler="$2" - -echo $ac_n "checking for object suffix... $ac_c" 1>&6 -$rm conftest* -echo 'int i = 1;' > conftest.c -echo "$progname:603: checking for object suffix" >& 5 -if { (eval echo $progname:604: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; }; then - # Append any warnings to the config.log. - cat conftest.err 1>&5 - - for ac_file in conftest.*; do - case $ac_file in - *.c) ;; - *) objext=`echo $ac_file | sed -e s/conftest.//` ;; - esac - done -else - cat conftest.err 1>&5 - echo "$progname: failed program was:" >&5 - cat conftest.c >&5 -fi -$rm conftest* -echo "$ac_t$objext" 1>&6 - -echo $ac_n "checking for executable suffix... $ac_c" 1>&6 -if eval "test \"`echo '$''{'ac_cv_exeext'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_cv_exeext="no" - $rm conftest* - echo 'main () { return 0; }' > conftest.c - echo "$progname:629: checking for executable suffix" >& 5 - if { (eval echo $progname:630: \"$ac_link\") 1>&5; (eval $ac_link) 2>conftest.err; }; then - # Append any warnings to the config.log. - cat conftest.err 1>&5 - - for ac_file in conftest.*; do - case $ac_file in - *.c | *.err | *.$objext ) ;; - *) ac_cv_exeext=.`echo $ac_file | sed -e s/conftest.//` ;; - esac - done - else - cat conftest.err 1>&5 - echo "$progname: failed program was:" >&5 - cat conftest.c >&5 - fi - $rm conftest* -fi -if test "X$ac_cv_exeext" = Xno; then - exeext="" -else - exeext="$ac_cv_exeext" -fi -echo "$ac_t$ac_cv_exeext" 1>&6 - -echo $ac_n "checking for $compiler option to produce PIC... $ac_c" 1>&6 -pic_flag= -special_shlib_compile_flags= -wl= -link_static_flag= -no_builtin_flag= - -if test "$with_gcc" = yes; then - wl='-Wl,' - link_static_flag='-static' - - case "$host_os" in - beos* | irix5* | irix6* | osf3* | osf4* | osf5*) - # PIC is the default for these OSes. - ;; - aix*) - # Below there is a dirty hack to force normal static linking with -ldl - # The problem is because libdl dynamically linked with both libc and - # libC (AIX C++ library), which obviously doesn't included in libraries - # list by gcc. This cause undefined symbols with -static flags. - # This hack allows C programs to be linked with "-static -ldl", but - # we not sure about C++ programs. - link_static_flag="$link_static_flag ${wl}-lC" - ;; - cygwin* | mingw* | os2*) - # We can build DLLs from non-PIC. - ;; - amigaos*) - # FIXME: we need at least 68020 code to build shared libraries, but - # adding the `-m68020' flag to GCC prevents building anything better, - # like `-m68040'. - pic_flag='-m68020 -resident32 -malways-restore-a4' - ;; - sysv4*MP*) - if test -d /usr/nec; then - pic_flag=-Kconform_pic - fi - ;; - *) - pic_flag='-fPIC' - ;; - esac -else - # PORTME Check for PIC flags for the system compiler. - case "$host_os" in - aix3* | aix4*) - # All AIX code is PIC. - link_static_flag='-bnso -bI:/lib/syscalls.exp' - ;; - - hpux9* | hpux10* | hpux11*) - # Is there a better link_static_flag that works with the bundled CC? - wl='-Wl,' - link_static_flag="${wl}-a ${wl}archive" - pic_flag='+Z' - ;; - - irix5* | irix6*) - wl='-Wl,' - link_static_flag='-non_shared' - # PIC (with -KPIC) is the default. - ;; - - cygwin* | mingw* | os2*) - # We can build DLLs from non-PIC. - ;; - - osf3* | osf4* | osf5*) - # All OSF/1 code is PIC. - wl='-Wl,' - link_static_flag='-non_shared' - ;; - - sco3.2v5*) - pic_flag='-Kpic' - link_static_flag='-dn' - special_shlib_compile_flags='-belf' - ;; - - solaris*) - pic_flag='-KPIC' - link_static_flag='-Bstatic' - wl='-Wl,' - ;; - - sunos4*) - pic_flag='-PIC' - link_static_flag='-Bstatic' - wl='-Qoption ld ' - ;; - - sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) - pic_flag='-KPIC' - link_static_flag='-Bstatic' - wl='-Wl,' - ;; - - uts4*) - pic_flag='-pic' - link_static_flag='-Bstatic' - ;; - sysv4*MP*) - if test -d /usr/nec ;then - pic_flag='-Kconform_pic' - link_static_flag='-Bstatic' - fi - ;; - *) - can_build_shared=no - ;; - esac -fi - -if test -n "$pic_flag"; then - echo "$ac_t$pic_flag" 1>&6 - - # Check to make sure the pic_flag actually works. - echo $ac_n "checking if $compiler PIC flag $pic_flag works... $ac_c" 1>&6 - $rm conftest* - echo "int some_variable = 0;" > conftest.c - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $pic_flag -DPIC" - echo "$progname:776: checking if $compiler PIC flag $pic_flag works" >&5 - if { (eval echo $progname:777: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.$objext; then - # Append any warnings to the config.log. - cat conftest.err 1>&5 - - case "$host_os" in - hpux9* | hpux10* | hpux11*) - # On HP-UX, both CC and GCC only warn that PIC is supported... then they - # create non-PIC objects. So, if there were any warnings, we assume that - # PIC is not supported. - if test -s conftest.err; then - echo "$ac_t"no 1>&6 - can_build_shared=no - pic_flag= - else - echo "$ac_t"yes 1>&6 - pic_flag=" $pic_flag" - fi - ;; - *) - echo "$ac_t"yes 1>&6 - pic_flag=" $pic_flag" - ;; - esac - else - # Append any errors to the config.log. - cat conftest.err 1>&5 - can_build_shared=no - pic_flag= - echo "$ac_t"no 1>&6 - fi - CFLAGS="$save_CFLAGS" - $rm conftest* -else - echo "$ac_t"none 1>&6 -fi - -# Check to see if options -o and -c are simultaneously supported by compiler -echo $ac_n "checking if $compiler supports -c -o file.o... $ac_c" 1>&6 -$rm -r conftest 2>/dev/null -mkdir conftest -cd conftest -$rm conftest* -echo "int some_variable = 0;" > conftest.c -mkdir out -# According to Tom Tromey, Ian Lance Taylor reported there are C compilers -# that will create temporary files in the current directory regardless of -# the output directory. Thus, making CWD read-only will cause this test -# to fail, enabling locking or at least warning the user not to do parallel -# builds. -chmod -w . -save_CFLAGS="$CFLAGS" -CFLAGS="$CFLAGS -o out/conftest2.o" -echo "$progname:829: checking if $compiler supports -c -o file.o" >&5 -if { (eval echo $progname:830: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>out/conftest.err; } && test -s out/conftest2.o; then - - # The compiler can only warn and ignore the option if not recognized - # So say no if there are warnings - if test -s out/conftest.err; then - echo "$ac_t"no 1>&6 - compiler_c_o=no - else - echo "$ac_t"yes 1>&6 - compiler_c_o=yes - fi -else - # Append any errors to the config.log. - cat out/conftest.err 1>&5 - compiler_c_o=no - echo "$ac_t"no 1>&6 -fi -CFLAGS="$save_CFLAGS" -chmod u+w . -$rm conftest* out/* -rmdir out -cd .. -rmdir conftest -$rm -r conftest 2>/dev/null - -if test x"$compiler_c_o" = x"yes"; then - # Check to see if we can write to a .lo - echo $ac_n "checking if $compiler supports -c -o file.lo... $ac_c" 1>&6 - $rm conftest* - echo "int some_variable = 0;" > conftest.c - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -c -o conftest.lo" - echo "$progname:862: checking if $compiler supports -c -o file.lo" >&5 -if { (eval echo $progname:863: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.lo; then - - # The compiler can only warn and ignore the option if not recognized - # So say no if there are warnings - if test -s conftest.err; then - echo "$ac_t"no 1>&6 - compiler_o_lo=no - else - echo "$ac_t"yes 1>&6 - compiler_o_lo=yes - fi - else - # Append any errors to the config.log. - cat conftest.err 1>&5 - compiler_o_lo=no - echo "$ac_t"no 1>&6 - fi - CFLAGS="$save_CFLAGS" - $rm conftest* -else - compiler_o_lo=no -fi - -# Check to see if we can do hard links to lock some files if needed -hard_links="nottested" -if test "$compiler_c_o" = no && test "$need_locks" != no; then - # do not overwrite the value of need_locks provided by the user - echo $ac_n "checking if we can lock with hard links... $ac_c" 1>&6 - hard_links=yes - $rm conftest* - ln conftest.a conftest.b 2>/dev/null && hard_links=no - touch conftest.a - ln conftest.a conftest.b 2>&5 || hard_links=no - ln conftest.a conftest.b 2>/dev/null && hard_links=no - echo "$ac_t$hard_links" 1>&6 - $rm conftest* - if test "$hard_links" = no; then - echo "*** WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2 - need_locks=warn - fi -else - need_locks=no -fi - -if test "$with_gcc" = yes; then - # Check to see if options -fno-rtti -fno-exceptions are supported by compiler - echo $ac_n "checking if $compiler supports -fno-rtti -fno-exceptions ... $ac_c" 1>&6 - $rm conftest* - echo "int some_variable = 0;" > conftest.c - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -fno-rtti -fno-exceptions -c conftest.c" - echo "$progname:914: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 - if { (eval echo $progname:915: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.o; then - - # The compiler can only warn and ignore the option if not recognized - # So say no if there are warnings - if test -s conftest.err; then - echo "$ac_t"no 1>&6 - compiler_rtti_exceptions=no - else - echo "$ac_t"yes 1>&6 - compiler_rtti_exceptions=yes - fi - else - # Append any errors to the config.log. - cat conftest.err 1>&5 - compiler_rtti_exceptions=no - echo "$ac_t"no 1>&6 - fi - CFLAGS="$save_CFLAGS" - $rm conftest* - - if test "$compiler_rtti_exceptions" = "yes"; then - no_builtin_flag=' -fno-builtin -fno-rtti -fno-exceptions' - else - no_builtin_flag=' -fno-builtin' - fi - -fi - -# Check for any special shared library compilation flags. -if test -n "$special_shlib_compile_flags"; then - echo "$progname: warning: \`$CC' requires \`$special_shlib_compile_flags' to build shared libraries" 1>&2 - if echo "$old_CC $old_CFLAGS " | egrep -e "[ ]$special_shlib_compile_flags[ ]" >/dev/null; then : - else - echo "$progname: add \`$special_shlib_compile_flags' to the CC or CFLAGS env variable and reconfigure" 1>&2 - can_build_shared=no - fi -fi - -echo $ac_n "checking if $compiler static flag $link_static_flag works... $ac_c" 1>&6 -$rm conftest* -echo 'main(){return(0);}' > conftest.c -save_LDFLAGS="$LDFLAGS" -LDFLAGS="$LDFLAGS $link_static_flag" -echo "$progname:958: checking if $compiler static flag $link_static_flag works" >&5 -if { (eval echo $progname:959: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then - echo "$ac_t$link_static_flag" 1>&6 -else - echo "$ac_t"none 1>&6 - link_static_flag= -fi -LDFLAGS="$save_LDFLAGS" -$rm conftest* - -if test -z "$LN_S"; then - # Check to see if we can use ln -s, or we need hard links. - echo $ac_n "checking whether ln -s works... $ac_c" 1>&6 - $rm conftest.dat - if ln -s X conftest.dat 2>/dev/null; then - $rm conftest.dat - LN_S="ln -s" - else - LN_S=ln - fi - if test "$LN_S" = "ln -s"; then - echo "$ac_t"yes 1>&6 - else - echo "$ac_t"no 1>&6 - fi -fi - -# Make sure LD is an absolute path. -if test -z "$LD"; then - ac_prog=ld - if test "$with_gcc" = yes; then - # Check if gcc -print-prog-name=ld gives a path. - echo $ac_n "checking for ld used by GCC... $ac_c" 1>&6 - echo "$progname:991: checking for ld used by GCC" >&5 - ac_prog=`($CC -print-prog-name=ld) 2>&5` - case "$ac_prog" in - # Accept absolute paths. - [\\/]* | [A-Za-z]:[\\/]*) - re_direlt='/[^/][^/]*/\.\./' - # Canonicalize the path of ld - ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'` - while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do - ac_prog=`echo $ac_prog| sed "s%$re_direlt%/%"` - done - test -z "$LD" && LD="$ac_prog" - ;; - "") - # If it fails, then pretend we are not using GCC. - ac_prog=ld - ;; - *) - # If it is relative, then search for the first ld in PATH. - with_gnu_ld=unknown - ;; - esac - elif test "$with_gnu_ld" = yes; then - echo $ac_n "checking for GNU ld... $ac_c" 1>&6 - echo "$progname:1015: checking for GNU ld" >&5 - else - echo $ac_n "checking for non-GNU ld""... $ac_c" 1>&6 - echo "$progname:1018: checking for non-GNU ld" >&5 - fi - - if test -z "$LD"; then - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR}" - for ac_dir in $PATH; do - test -z "$ac_dir" && ac_dir=. - if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then - LD="$ac_dir/$ac_prog" - # Check to see if the program is GNU ld. I'd rather use --version, - # but apparently some GNU ld's only accept -v. - # Break only if it was the GNU/non-GNU ld that we prefer. - if "$LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then - test "$with_gnu_ld" != no && break - else - test "$with_gnu_ld" != yes && break - fi - fi - done - IFS="$ac_save_ifs" - fi - - if test -n "$LD"; then - echo "$ac_t$LD" 1>&6 - else - echo "$ac_t"no 1>&6 - fi - - if test -z "$LD"; then - echo "$progname: error: no acceptable ld found in \$PATH" 1>&2 - exit 1 - fi -fi - -# Check to see if it really is or is not GNU ld. -echo $ac_n "checking if the linker ($LD) is GNU ld... $ac_c" 1>&6 -# I'd rather use --version here, but apparently some GNU ld's only accept -v. -if $LD -v 2>&1 &5; then - with_gnu_ld=yes -else - with_gnu_ld=no -fi -echo "$ac_t$with_gnu_ld" 1>&6 - -# See if the linker supports building shared libraries. -echo $ac_n "checking whether the linker ($LD) supports shared libraries... $ac_c" 1>&6 - -allow_undefined_flag= -no_undefined_flag= -need_lib_prefix=unknown -need_version=unknown -# when you set need_version to no, make sure it does not cause -set_version -# flags to be left without arguments -archive_cmds= -archive_expsym_cmds= -old_archive_from_new_cmds= -export_dynamic_flag_spec= -whole_archive_flag_spec= -thread_safe_flag_spec= -hardcode_libdir_flag_spec= -hardcode_libdir_separator= -hardcode_direct=no -hardcode_minus_L=no -hardcode_shlibpath_var=unsupported -runpath_var= -always_export_symbols=no -export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | sed '\''s/.* //'\'' | sort | uniq > $export_symbols' -# include_expsyms should be a list of space-separated symbols to be *always* -# included in the symbol list -include_expsyms= -# exclude_expsyms can be an egrep regular expression of symbols to exclude -# it will be wrapped by ` (' and `)$', so one must not match beginning or -# end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', -# as well as any symbol that contains `d'. -exclude_expsyms="_GLOBAL_OFFSET_TABLE_" -# Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out -# platforms (ab)use it in PIC code, but their linkers get confused if -# the symbol is explicitly referenced. Since portable code cannot -# rely on this symbol name, it's probably fine to never include it in -# preloaded symbol tables. - -case "$host_os" in -cygwin* | mingw*) - # FIXME: the MSVC++ port hasn't been tested in a loooong time - # When not using gcc, we currently assume that we are using - # Microsoft Visual C++. - if test "$with_gcc" != yes; then - with_gnu_ld=no - fi - ;; - -esac - -ld_shlibs=yes -if test "$with_gnu_ld" = yes; then - # If archive_cmds runs LD, not CC, wlarc should be empty - wlarc='${wl}' - - # See if GNU ld supports shared libraries. - case "$host_os" in - aix3* | aix4*) - # On AIX, the GNU linker is very broken - ld_shlibs=no - cat <&2 - -*** Warning: the GNU linker, at least up to release 2.9.1, is reported -*** to be unable to reliably create shared libraries on AIX. -*** Therefore, libtool is disabling shared libraries support. If you -*** really care for shared libraries, you may want to modify your PATH -*** so that a non-GNU linker is found, and then restart. - -EOF - ;; - - amigaos*) - archive_cmds='$rm $objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $objdir/a2ixlibrary.data~$AR cru $lib $libobjs~$RANLIB $lib~(cd $objdir && a2ixlibrary -32)' - hardcode_libdir_flag_spec='-L$libdir' - hardcode_minus_L=yes - - # Samuel A. Falvo II reports - # that the semantics of dynamic libraries on AmigaOS, at least up - # to version 4, is to share data among multiple programs linked - # with the same dynamic library. Since this doesn't match the - # behavior of shared libraries on other platforms, we can use - # them. - ld_shlibs=no - ;; - - beos*) - if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then - allow_undefined_flag=unsupported - # Joseph Beckenbach says some releases of gcc - # support --undefined. This deserves some investigation. FIXME - archive_cmds='$CC -nostart $libobjs $deplibs $linkopts ${wl}-soname $wl$soname -o $lib' - else - ld_shlibs=no - fi - ;; - - cygwin* | mingw*) - # hardcode_libdir_flag_spec is actually meaningless, as there is - # no search path for DLLs. - hardcode_libdir_flag_spec='-L$libdir' - allow_undefined_flag=unsupported - always_export_symbols=yes - - # Extract the symbol export list from an `--export-all' def file, - # then regenerate the def file from the symbol export list, so that - # the compiled dll only exports the symbol export list. - export_symbols_cmds='test -f $objdir/$soname-ltdll.c || sed -e "/^# \/\* ltdll\.c starts here \*\//,/^# \/\* ltdll.c ends here \*\// { s/^# //; p; }" -e d < $0 > $objdir/$soname-ltdll.c~ - test -f $objdir/$soname-ltdll.$objext || (cd $objdir && $CC -c $soname-ltdll.c)~ - $DLLTOOL --export-all --exclude-symbols DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12 --output-def $objdir/$soname-def $objdir/$soname-ltdll.$objext $libobjs $convenience~ - sed -e "1,/EXPORTS/d" -e "s/ @ [0-9]* ; *//" < $objdir/$soname-def > $export_symbols' - - archive_expsym_cmds='echo EXPORTS > $objdir/$soname-def~ - _lt_hint=1; - for symbol in `cat $export_symbols`; do - echo " \$symbol @ \$_lt_hint ; " >> $objdir/$soname-def; - _lt_hint=`expr 1 + \$_lt_hint`; - done~ - test -f $objdir/$soname-ltdll.c || sed -e "/^# \/\* ltdll\.c starts here \*\//,/^# \/\* ltdll.c ends here \*\// { s/^# //; p; }" -e d < $0 > $objdir/$soname-ltdll.c~ - test -f $objdir/$soname-ltdll.$objext || (cd $objdir && $CC -c $soname-ltdll.c)~ - $CC -Wl,--base-file,$objdir/$soname-base -Wl,--dll -nostartfiles -Wl,-e,__cygwin_dll_entry@12 -o $lib $objdir/$soname-ltdll.$objext $libobjs $deplibs $linkopts~ - $DLLTOOL --as=$AS --dllname $soname --exclude-symbols DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12 --def $objdir/$soname-def --base-file $objdir/$soname-base --output-exp $objdir/$soname-exp~ - $CC -Wl,--base-file,$objdir/$soname-base $objdir/$soname-exp -Wl,--dll -nostartfiles -Wl,-e,__cygwin_dll_entry@12 -o $lib $objdir/$soname-ltdll.$objext $libobjs $deplibs $linkopts~ - $DLLTOOL --as=$AS --dllname $soname --exclude-symbols DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12 --def $objdir/$soname-def --base-file $objdir/$soname-base --output-exp $objdir/$soname-exp~ - $CC $objdir/$soname-exp -Wl,--dll -nostartfiles -Wl,-e,__cygwin_dll_entry@12 -o $lib $objdir/$soname-ltdll.$objext $libobjs $deplibs $linkopts' - - old_archive_from_new_cmds='$DLLTOOL --as=$AS --dllname $soname --def $objdir/$soname-def --output-lib $objdir/$libname.a' - ;; - - netbsd*) - if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then - archive_cmds='$CC -shared $libobjs $deplibs $linkopts ${wl}-soname $wl$soname -o $lib' - archive_expsym_cmds='$CC -shared $libobjs $deplibs $linkopts ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' - else - archive_cmds='$LD -Bshareable $libobjs $deplibs $linkopts -o $lib' - # can we support soname and/or expsyms with a.out? -oliva - fi - ;; - - solaris* | sysv5*) - if $LD -v 2>&1 | egrep 'BFD 2\.8' > /dev/null; then - ld_shlibs=no - cat <&2 - -*** Warning: The releases 2.8.* of the GNU linker cannot reliably -*** create shared libraries on Solaris systems. Therefore, libtool -*** is disabling shared libraries support. We urge you to upgrade GNU -*** binutils to release 2.9.1 or newer. Another option is to modify -*** your PATH or compiler configuration so that the native linker is -*** used, and then restart. - -EOF - elif $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then - archive_cmds='$CC -shared $libobjs $deplibs $linkopts ${wl}-soname $wl$soname -o $lib' - archive_expsym_cmds='$CC -shared $libobjs $deplibs $linkopts ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' - else - ld_shlibs=no - fi - ;; - - sunos4*) - archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linkopts' - wlarc= - hardcode_direct=yes - hardcode_shlibpath_var=no - ;; - - *) - if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then - archive_cmds='$CC -shared $libobjs $deplibs $linkopts ${wl}-soname $wl$soname -o $lib' - archive_expsym_cmds='$CC -shared $libobjs $deplibs $linkopts ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' - else - ld_shlibs=no - fi - ;; - esac - - if test "$ld_shlibs" = yes; then - runpath_var=LD_RUN_PATH - hardcode_libdir_flag_spec='${wl}--rpath ${wl}$libdir' - export_dynamic_flag_spec='${wl}--export-dynamic' - case $host_os in - cygwin* | mingw*) - # dlltool doesn't understand --whole-archive et. al. - whole_archive_flag_spec= - ;; - *) - # ancient GNU ld didn't support --whole-archive et. al. - if $LD --help 2>&1 | egrep 'no-whole-archive' > /dev/null; then - whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' - else - whole_archive_flag_spec= - fi - ;; - esac - fi -else - # PORTME fill in a description of your system's linker (not GNU ld) - case "$host_os" in - aix3*) - allow_undefined_flag=unsupported - always_export_symbols=yes - archive_expsym_cmds='$LD -o $objdir/$soname $libobjs $deplibs $linkopts -bE:$export_symbols -T512 -H512 -bM:SRE~$AR cru $lib $objdir/$soname' - # Note: this linker hardcodes the directories in LIBPATH if there - # are no directories specified by -L. - hardcode_minus_L=yes - if test "$with_gcc" = yes && test -z "$link_static_flag"; then - # Neither direct hardcoding nor static linking is supported with a - # broken collect2. - hardcode_direct=unsupported - fi - ;; - - aix4*) - hardcode_libdir_flag_spec='${wl}-b ${wl}nolibpath ${wl}-b ${wl}libpath:$libdir:/usr/lib:/lib' - hardcode_libdir_separator=':' - if test "$with_gcc" = yes; then - collect2name=`${CC} -print-prog-name=collect2` - if test -f "$collect2name" && \ - strings "$collect2name" | grep resolve_lib_name >/dev/null - then - # We have reworked collect2 - hardcode_direct=yes - else - # We have old collect2 - hardcode_direct=unsupported - # It fails to find uninstalled libraries when the uninstalled - # path is not listed in the libpath. Setting hardcode_minus_L - # to unsupported forces relinking - hardcode_minus_L=yes - hardcode_libdir_flag_spec='-L$libdir' - hardcode_libdir_separator= - fi - shared_flag='-shared' - else - shared_flag='${wl}-bM:SRE' - hardcode_direct=yes - fi - allow_undefined_flag=' ${wl}-berok' - archive_cmds="\$CC $shared_flag"' -o $objdir/$soname $libobjs $deplibs $linkopts ${wl}-bexpall ${wl}-bnoentry${allow_undefined_flag}' - archive_expsym_cmds="\$CC $shared_flag"' -o $objdir/$soname $libobjs $deplibs $linkopts ${wl}-bE:$export_symbols ${wl}-bnoentry${allow_undefined_flag}' - case "$host_os" in aix4.[01]|aix4.[01].*) - # According to Greg Wooledge, -bexpall is only supported from AIX 4.2 on - always_export_symbols=yes ;; - esac - ;; - - amigaos*) - archive_cmds='$rm $objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $objdir/a2ixlibrary.data~$AR cru $lib $libobjs~$RANLIB $lib~(cd $objdir && a2ixlibrary -32)' - hardcode_libdir_flag_spec='-L$libdir' - hardcode_minus_L=yes - # see comment about different semantics on the GNU ld section - ld_shlibs=no - ;; - - cygwin* | mingw*) - # When not using gcc, we currently assume that we are using - # Microsoft Visual C++. - # hardcode_libdir_flag_spec is actually meaningless, as there is - # no search path for DLLs. - hardcode_libdir_flag_spec=' ' - allow_undefined_flag=unsupported - # Tell ltmain to make .lib files, not .a files. - libext=lib - # FIXME: Setting linknames here is a bad hack. - archive_cmds='$CC -o $lib $libobjs $linkopts `echo "$deplibs" | sed -e '\''s/ -lc$//'\''` -link -dll~linknames=' - # The linker will automatically build a .lib file if we build a DLL. - old_archive_from_new_cmds='true' - # FIXME: Should let the user specify the lib program. - old_archive_cmds='lib /OUT:$oldlib$oldobjs' - fix_srcfile_path='`cygpath -w $srcfile`' - ;; - - freebsd1*) - ld_shlibs=no - ;; - - # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor - # support. Future versions do this automatically, but an explicit c++rt0.o - # does not break anything, and helps significantly (at the cost of a little - # extra space). - freebsd2.2*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linkopts /usr/lib/c++rt0.o' - hardcode_libdir_flag_spec='-R$libdir' - hardcode_direct=yes - hardcode_shlibpath_var=no - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. - freebsd2*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linkopts' - hardcode_direct=yes - hardcode_minus_L=yes - hardcode_shlibpath_var=no - ;; - - # FreeBSD 3 and greater uses gcc -shared to do shared libraries. - freebsd*) - archive_cmds='$CC -shared -o $lib $libobjs $deplibs $linkopts' - hardcode_libdir_flag_spec='-R$libdir' - hardcode_direct=yes - hardcode_shlibpath_var=no - ;; - - hpux9* | hpux10* | hpux11*) - case "$host_os" in - hpux9*) archive_cmds='$rm $objdir/$soname~$LD -b +b $install_libdir -o $objdir/$soname $libobjs $deplibs $linkopts~test $objdir/$soname = $lib || mv $objdir/$soname $lib' ;; - *) archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linkopts' ;; - esac - hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' - hardcode_libdir_separator=: - hardcode_direct=yes - hardcode_minus_L=yes # Not in the search PATH, but as the default - # location of the library. - export_dynamic_flag_spec='${wl}-E' - ;; - - irix5* | irix6*) - if test "$with_gcc" = yes; then - archive_cmds='$CC -shared $libobjs $deplibs $linkopts ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${objdir}/so_locations -o $lib' - else - archive_cmds='$LD -shared $libobjs $deplibs $linkopts -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${objdir}/so_locations -o $lib' - fi - hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' - hardcode_libdir_separator=: - ;; - - netbsd*) - if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linkopts' # a.out - else - archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linkopts' # ELF - fi - hardcode_libdir_flag_spec='${wl}-R$libdir' - hardcode_direct=yes - hardcode_shlibpath_var=no - ;; - - openbsd*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linkopts' - hardcode_libdir_flag_spec='-R$libdir' - hardcode_direct=yes - hardcode_shlibpath_var=no - ;; - - os2*) - hardcode_libdir_flag_spec='-L$libdir' - hardcode_minus_L=yes - allow_undefined_flag=unsupported - archive_cmds='$echo "LIBRARY $libname INITINSTANCE" > $objdir/$libname.def~$echo "DESCRIPTION \"$libname\"" >> $objdir/$libname.def~$echo DATA >> $objdir/$libname.def~$echo " SINGLE NONSHARED" >> $objdir/$libname.def~$echo EXPORTS >> $objdir/$libname.def~emxexp $libobjs >> $objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $linkopts $objdir/$libname.def' - old_archive_from_new_cmds='emximp -o $objdir/$libname.a $objdir/$libname.def' - ;; - - osf3*) - if test "$with_gcc" = yes; then - allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' - archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $linkopts ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${objdir}/so_locations -o $lib' - else - allow_undefined_flag=' -expect_unresolved \*' - archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linkopts -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${objdir}/so_locations -o $lib' - fi - hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' - hardcode_libdir_separator=: - ;; - - osf4* | osf5*) # As osf3* with the addition of the -msym flag - if test "$with_gcc" = yes; then - allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' - archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $linkopts ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${objdir}/so_locations -o $lib' - else - allow_undefined_flag=' -expect_unresolved \*' - archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linkopts -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${objdir}/so_locations -o $lib' - fi - hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' - hardcode_libdir_separator=: - ;; - - sco3.2v5*) - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linkopts' - hardcode_shlibpath_var=no - runpath_var=LD_RUN_PATH - hardcode_runpath_var=yes - ;; - - solaris*) - no_undefined_flag=' -z text' - # $CC -shared without GNU ld will not create a library from C++ - # object files and a static libstdc++, better avoid it by now - archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linkopts' - archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ - $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linkopts~$rm $lib.exp' - hardcode_libdir_flag_spec='-R$libdir' - hardcode_shlibpath_var=no - case "$host_os" in - solaris2.[0-5] | solaris2.[0-5].*) ;; - *) # Supported since Solaris 2.6 (maybe 2.5.1?) - whole_archive_flag_spec='-z allextract$convenience -z defaultextract' ;; - esac - ;; - - sunos4*) - archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linkopts' - hardcode_libdir_flag_spec='-L$libdir' - hardcode_direct=yes - hardcode_minus_L=yes - hardcode_shlibpath_var=no - ;; - - sysv4) - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linkopts' - runpath_var='LD_RUN_PATH' - hardcode_shlibpath_var=no - hardcode_direct=no #Motorola manual says yes, but my tests say they lie - ;; - - sysv4.3*) - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linkopts' - hardcode_shlibpath_var=no - export_dynamic_flag_spec='-Bexport' - ;; - - sysv5*) - no_undefined_flag=' -z text' - # $CC -shared without GNU ld will not create a library from C++ - # object files and a static libstdc++, better avoid it by now - archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linkopts' - archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ - $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linkopts~$rm $lib.exp' - hardcode_libdir_flag_spec= - hardcode_shlibpath_var=no - runpath_var='LD_RUN_PATH' - ;; - - uts4*) - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linkopts' - hardcode_libdir_flag_spec='-L$libdir' - hardcode_shlibpath_var=no - ;; - - dgux*) - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linkopts' - hardcode_libdir_flag_spec='-L$libdir' - hardcode_shlibpath_var=no - ;; - - sysv4*MP*) - if test -d /usr/nec; then - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linkopts' - hardcode_shlibpath_var=no - runpath_var=LD_RUN_PATH - hardcode_runpath_var=yes - ld_shlibs=yes - fi - ;; - - sysv4.2uw2*) - archive_cmds='$LD -G -o $lib $libobjs $deplibs $linkopts' - hardcode_direct=yes - hardcode_minus_L=no - hardcode_shlibpath_var=no - hardcode_runpath_var=yes - runpath_var=LD_RUN_PATH - ;; - - unixware7*) - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linkopts' - runpath_var='LD_RUN_PATH' - hardcode_shlibpath_var=no - ;; - - *) - ld_shlibs=no - ;; - esac -fi -echo "$ac_t$ld_shlibs" 1>&6 -test "$ld_shlibs" = no && can_build_shared=no - -if test -z "$NM"; then - echo $ac_n "checking for BSD-compatible nm... $ac_c" 1>&6 - case "$NM" in - [\\/]* | [A-Za-z]:[\\/]*) ;; # Let the user override the test with a path. - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR}" - for ac_dir in $PATH /usr/ucb /usr/ccs/bin /bin; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/nm || test -f $ac_dir/nm$ac_exeext; then - # Check to see if the nm accepts a BSD-compat flag. - # Adding the `sed 1q' prevents false positives on HP-UX, which says: - # nm: unknown option "B" ignored - if ($ac_dir/nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then - NM="$ac_dir/nm -B" - break - elif ($ac_dir/nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then - NM="$ac_dir/nm -p" - break - else - NM=${NM="$ac_dir/nm"} # keep the first match, but - continue # so that we can try to find one that supports BSD flags - fi - fi - done - IFS="$ac_save_ifs" - test -z "$NM" && NM=nm - ;; - esac - echo "$ac_t$NM" 1>&6 -fi - -# Check for command to grab the raw symbol name followed by C symbol from nm. -echo $ac_n "checking command to parse $NM output... $ac_c" 1>&6 - -# These are sane defaults that work on at least a few old systems. -# [They come from Ultrix. What could be older than Ultrix?!! ;)] - -# Character class describing NM global symbol codes. -symcode='[BCDEGRST]' - -# Regexp to match symbols that can be accessed directly from C. -sympat='\([_A-Za-z][_A-Za-z0-9]*\)' - -# Transform the above into a raw symbol and a C symbol. -symxfrm='\1 \2\3 \3' - -# Transform an extracted symbol line into a proper C declaration -global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern char \1;/p'" - -# Define system-specific variables. -case "$host_os" in -aix*) - symcode='[BCDT]' - ;; -cygwin* | mingw*) - symcode='[ABCDGISTW]' - ;; -hpux*) # Its linker distinguishes data from code symbols - global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern char \1();/p' -e 's/^. .* \(.*\)$/extern char \1;/p'" - ;; -irix*) - symcode='[BCDEGRST]' - ;; -solaris*) - symcode='[BDT]' - ;; -sysv4) - symcode='[DFNSTU]' - ;; -esac - -# If we're using GNU nm, then use its standard symbol codes. -if $NM -V 2>&1 | egrep '(GNU|with BFD)' > /dev/null; then - symcode='[ABCDGISTW]' -fi - -# Try without a prefix undercore, then with it. -for ac_symprfx in "" "_"; do - - # Write the raw and C identifiers. - global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode\)[ ][ ]*\($ac_symprfx\)$sympat$/$symxfrm/p'" - - # Check to see that the pipe works correctly. - pipe_works=no - $rm conftest* - cat > conftest.c <&5 - if { (eval echo $progname:1636: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; } && test -s conftest.$objext; then - # Now try to grab the symbols. - nlist=conftest.nm - if { echo "$progname:1639: eval \"$NM conftest.$objext | $global_symbol_pipe > $nlist\"" >&5; eval "$NM conftest.$objext | $global_symbol_pipe > $nlist 2>&5"; } && test -s "$nlist"; then - - # Try sorting and uniquifying the output. - if sort "$nlist" | uniq > "$nlist"T; then - mv -f "$nlist"T "$nlist" - else - rm -f "$nlist"T - fi - - # Make sure that we snagged all the symbols we need. - if egrep ' nm_test_var$' "$nlist" >/dev/null; then - if egrep ' nm_test_func$' "$nlist" >/dev/null; then - cat < conftest.c -#ifdef __cplusplus -extern "C" { -#endif - -EOF - # Now generate the symbol file. - eval "$global_symbol_to_cdecl"' < "$nlist" >> conftest.c' - - cat <> conftest.c -#if defined (__STDC__) && __STDC__ -# define lt_ptr_t void * -#else -# define lt_ptr_t char * -# define const -#endif - -/* The mapping between symbol names and symbols. */ -const struct { - const char *name; - lt_ptr_t address; -} -lt_preloaded_symbols[] = -{ -EOF - sed 's/^. \(.*\) \(.*\)$/ {"\2", (lt_ptr_t) \&\2},/' < "$nlist" >> conftest.c - cat <<\EOF >> conftest.c - {0, (lt_ptr_t) 0} -}; - -#ifdef __cplusplus -} -#endif -EOF - # Now try linking the two files. - mv conftest.$objext conftstm.$objext - save_LIBS="$LIBS" - save_CFLAGS="$CFLAGS" - LIBS="conftstm.$objext" - CFLAGS="$CFLAGS$no_builtin_flag" - if { (eval echo $progname:1691: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then - pipe_works=yes - else - echo "$progname: failed program was:" >&5 - cat conftest.c >&5 - fi - LIBS="$save_LIBS" - else - echo "cannot find nm_test_func in $nlist" >&5 - fi - else - echo "cannot find nm_test_var in $nlist" >&5 - fi - else - echo "cannot run $global_symbol_pipe" >&5 - fi - else - echo "$progname: failed program was:" >&5 - cat conftest.c >&5 - fi - $rm conftest* conftst* - - # Do not use the global_symbol_pipe unless it works. - if test "$pipe_works" = yes; then - break - else - global_symbol_pipe= - fi -done -if test "$pipe_works" = yes; then - echo "${ac_t}ok" 1>&6 -else - echo "${ac_t}failed" 1>&6 -fi - -if test -z "$global_symbol_pipe"; then - global_symbol_to_cdecl= -fi - -# Check hardcoding attributes. -echo $ac_n "checking how to hardcode library paths into programs... $ac_c" 1>&6 -hardcode_action= -if test -n "$hardcode_libdir_flag_spec" || \ - test -n "$runpath_var"; then - - # We can hardcode non-existant directories. - if test "$hardcode_direct" != no && - # If the only mechanism to avoid hardcoding is shlibpath_var, we - # have to relink, otherwise we might link with an installed library - # when we should be linking with a yet-to-be-installed one - ## test "$hardcode_shlibpath_var" != no && - test "$hardcode_minus_L" != no; then - # Linking always hardcodes the temporary library directory. - hardcode_action=relink - else - # We can link without hardcoding, and we can hardcode nonexisting dirs. - hardcode_action=immediate - fi -else - # We cannot hardcode anything, or else we can only hardcode existing - # directories. - hardcode_action=unsupported -fi -echo "$ac_t$hardcode_action" 1>&6 - - -reload_flag= -reload_cmds='$LD$reload_flag -o $output$reload_objs' -echo $ac_n "checking for $LD option to reload object files... $ac_c" 1>&6 -# PORTME Some linkers may need a different reload flag. -reload_flag='-r' -echo "$ac_t$reload_flag" 1>&6 -test -n "$reload_flag" && reload_flag=" $reload_flag" - -# PORTME Fill in your ld.so characteristics -library_names_spec= -libname_spec='lib$name' -soname_spec= -postinstall_cmds= -postuninstall_cmds= -finish_cmds= -finish_eval= -shlibpath_var= -shlibpath_overrides_runpath=unknown -version_type=none -dynamic_linker="$host_os ld.so" -sys_lib_dlsearch_path_spec="/lib /usr/lib" -sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" -file_magic_cmd= -file_magic_test_file= -deplibs_check_method='unknown' -# Need to set the preceding variable on all platforms that support -# interlibrary dependencies. -# 'none' -- dependencies not supported. -# `unknown' -- same as none, but documents that we really don't know. -# 'pass_all' -- all dependencies passed with no checks. -# 'test_compile' -- check by making test program. -# 'file_magic [regex]' -- check by looking for files in library path -# which responds to the $file_magic_cmd with a given egrep regex. -# If you have `file' or equivalent on your system and you're not sure -# whether `pass_all' will *always* work, you probably want this one. -echo $ac_n "checking dynamic linker characteristics... $ac_c" 1>&6 -case "$host_os" in -aix3*) - version_type=linux - library_names_spec='${libname}${release}.so$versuffix $libname.a' - shlibpath_var=LIBPATH - - # AIX has no versioning support, so we append a major version to the name. - soname_spec='${libname}${release}.so$major' - ;; - -aix4*) - version_type=linux - # AIX has no versioning support, so currently we can not hardcode correct - # soname into executable. Probably we can add versioning support to - # collect2, so additional links can be useful in future. - # We preserve .a as extension for shared libraries though AIX4.2 - # and later linker supports .so - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.a' - shlibpath_var=LIBPATH - deplibs_check_method=pass_all - ;; - -amigaos*) - library_names_spec='$libname.ixlibrary $libname.a' - # Create ${libname}_ixlibrary.a entries in /sys/libs. - finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done' - ;; - -beos*) - library_names_spec='${libname}.so' - dynamic_linker="$host_os ld.so" - shlibpath_var=LIBRARY_PATH - deplibs_check_method=pass_all - lt_cv_dlopen="load_add_on" - lt_cv_dlopen_libs= - lt_cv_dlopen_self=yes - ;; - -bsdi4*) - version_type=linux - need_version=no - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - soname_spec='${libname}${release}.so$major' - finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' - shlibpath_var=LD_LIBRARY_PATH - deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' - file_magic_cmd=/usr/bin/file - file_magic_test_file=/shlib/libc.so - sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" - sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" - export_dynamic_flag_spec=-rdynamic - # the default ld.so.conf also contains /usr/contrib/lib and - # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow - # libtool to hard-code these into programs - ;; - -cygwin* | mingw*) - version_type=windows - need_version=no - need_lib_prefix=no - if test "$with_gcc" = yes; then - library_names_spec='${libname}`echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll $libname.a' - else - library_names_spec='${libname}`echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll $libname.lib' - fi - dynamic_linker='Win32 ld.exe' - deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?' - file_magic_cmd='${OBJDUMP} -f' - # FIXME: first we should search . and the directory the executable is in - shlibpath_var=PATH - lt_cv_dlopen="LoadLibrary" - lt_cv_dlopen_libs= - ;; - -freebsd1*) - dynamic_linker=no - ;; - -freebsd*) - objformat=`test -x /usr/bin/objformat && /usr/bin/objformat || echo aout` - version_type=freebsd-$objformat - case "$version_type" in - freebsd-elf*) - deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB shared object' - file_magic_cmd=/usr/bin/file - file_magic_test_file=`echo /usr/lib/libc.so*` - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so' - need_version=no - need_lib_prefix=no - ;; - freebsd-*) - deplibs_check_method=unknown - library_names_spec='${libname}${release}.so$versuffix $libname.so$versuffix' - need_version=yes - ;; - esac - shlibpath_var=LD_LIBRARY_PATH - case "$host_os" in - freebsd2* | freebsd3.[01]* | freebsdelf3.[01]*) - shlibpath_overrides_runpath=yes - ;; - *) # from 3.2 on - shlibpath_overrides_runpath=no - ;; - esac - ;; - -gnu*) - version_type=linux - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so${major} ${libname}.so' - soname_spec='${libname}${release}.so$major' - shlibpath_var=LD_LIBRARY_PATH - ;; - -hpux9* | hpux10* | hpux11*) - # Give a soname corresponding to the major version so that dld.sl refuses to - # link against other versions. - dynamic_linker="$host_os dld.sl" - version_type=sunos - need_lib_prefix=no - need_version=no - shlibpath_var=SHLIB_PATH - shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH - library_names_spec='${libname}${release}.sl$versuffix ${libname}${release}.sl$major $libname.sl' - soname_spec='${libname}${release}.sl$major' - # HP-UX runs *really* slowly unless shared libraries are mode 555. - postinstall_cmds='chmod 555 $lib' - ;; - -irix5* | irix6*) - version_type=irix - need_lib_prefix=no - need_version=no - soname_spec='${libname}${release}.so.$major' - library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major ${libname}${release}.so $libname.so' - case "$host_os" in - irix5*) - libsuff= shlibsuff= - # this will be overridden with pass_all, but let us keep it just in case - deplibs_check_method="file_magic ELF 32-bit MSB dynamic lib MIPS - version 1" - ;; - *) - case "$LD" in # libtool.m4 will add one of these switches to LD - *-32|*"-32 ") libsuff= shlibsuff= libmagic=32-bit;; - *-n32|*"-n32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; - *-64|*"-64 ") libsuff=64 shlibsuff=64 libmagic=64-bit;; - *) libsuff= shlibsuff= libmagic=never-match;; - esac - ;; - esac - shlibpath_var=LD_LIBRARY${shlibsuff}_PATH - shlibpath_overrides_runpath=no - sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" - sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" - file_magic_cmd=/usr/bin/file - file_magic_test_file=`echo /lib${libsuff}/libc.so*` - deplibs_check_method='pass_all' - ;; - -# No shared lib support for Linux oldld, aout, or coff. -linux-gnuoldld* | linux-gnuaout* | linux-gnucoff*) - dynamic_linker=no - ;; - -# This must be Linux ELF. -linux-gnu*) - version_type=linux - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - soname_spec='${libname}${release}.so$major' - finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' - file_magic_cmd=/usr/bin/file - file_magic_test_file=`echo /lib/libc.so* /lib/libc-*.so` - - if test -f /lib/ld.so.1; then - dynamic_linker='GNU ld.so' - else - # Only the GNU ld.so supports shared libraries on MkLinux. - case "$host_cpu" in - powerpc*) dynamic_linker=no ;; - *) dynamic_linker='Linux ld.so' ;; - esac - fi - ;; - -netbsd*) - version_type=sunos - if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then - library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' - finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' - dynamic_linker='NetBSD (a.out) ld.so' - else - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so ${libname}.so' - soname_spec='${libname}${release}.so$major' - dynamic_linker='NetBSD ld.elf_so' - fi - shlibpath_var=LD_LIBRARY_PATH - ;; - -openbsd*) - version_type=sunos - if test "$with_gnu_ld" = yes; then - need_lib_prefix=no - need_version=no - fi - library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' - finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' - shlibpath_var=LD_LIBRARY_PATH - ;; - -os2*) - libname_spec='$name' - need_lib_prefix=no - library_names_spec='$libname.dll $libname.a' - dynamic_linker='OS/2 ld.exe' - shlibpath_var=LIBPATH - ;; - -osf3* | osf4* | osf5*) - version_type=osf - need_version=no - soname_spec='${libname}${release}.so' - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so' - shlibpath_var=LD_LIBRARY_PATH - # this will be overridden with pass_all, but let us keep it just in case - deplibs_check_method='file_magic COFF format alpha shared library' - file_magic_cmd=/usr/bin/file - file_magic_test_file=/shlib/libc.so - deplibs_check_method='pass_all' - sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" - sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" - ;; - -sco3.2v5*) - version_type=osf - soname_spec='${libname}${release}.so$major' - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - shlibpath_var=LD_LIBRARY_PATH - ;; - -solaris*) - version_type=linux - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - soname_spec='${libname}${release}.so$major' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - # ldd complains unless libraries are executable - postinstall_cmds='chmod +x $lib' - deplibs_check_method="file_magic ELF [0-9][0-9]-bit [LM]SB dynamic lib" - file_magic_cmd=/usr/bin/file - file_magic_test_file=/lib/libc.so - ;; - -sunos4*) - version_type=sunos - library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' - finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - if test "$with_gnu_ld" = yes; then - need_lib_prefix=no - fi - need_version=yes - ;; - -sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) - version_type=linux - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - soname_spec='${libname}${release}.so$major' - shlibpath_var=LD_LIBRARY_PATH - case "$host_vendor" in - ncr) - deplibs_check_method='pass_all' - ;; - motorola) - need_lib_prefix=no - need_version=no - shlibpath_overrides_runpath=no - sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' - deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' - file_magic_cmd=/usr/bin/file - file_magic_test_file=`echo /usr/lib/libc.so*` - ;; - esac - ;; - -uts4*) - version_type=linux - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - soname_spec='${libname}${release}.so$major' - shlibpath_var=LD_LIBRARY_PATH - ;; - -dgux*) - version_type=linux - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - soname_spec='${libname}${release}.so$major' - shlibpath_var=LD_LIBRARY_PATH - ;; - -sysv4*MP*) - if test -d /usr/nec ;then - version_type=linux - library_names_spec='$libname.so.$versuffix $libname.so.$major $libname.so' - soname_spec='$libname.so.$major' - shlibpath_var=LD_LIBRARY_PATH - fi - ;; - -*) - dynamic_linker=no - ;; -esac -echo "$ac_t$dynamic_linker" 1>&6 -test "$dynamic_linker" = no && can_build_shared=no - -# Report the final consequences. -echo "checking if libtool supports shared libraries... $can_build_shared" 1>&6 - -# Only try to build win32 dlls if AC_LIBTOOL_WIN32_DLL was used in -# configure.in, otherwise build static only libraries. -case "$host_os" in -cygwin* | mingw* | os2*) - if test x$can_build_shared = xyes; then - test x$enable_win32_dll = xno && can_build_shared=no - echo "checking if package supports dlls... $can_build_shared" 1>&6 - fi -;; -esac - -if test -n "$file_magic_test_file" && test -n "$file_magic_cmd"; then - case "$deplibs_check_method" in - "file_magic "*) - file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`" - if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | - egrep "$file_magic_regex" > /dev/null; then - : - else - cat <&2 - -*** Warning: the command libtool uses to detect shared libraries, -*** $file_magic_cmd, produces output that libtool cannot recognize. -*** The result is that libtool may fail to recognize shared libraries -*** as such. This will affect the creation of libtool libraries that -*** depend on shared libraries, but programs linked with such libtool -*** libraries will work regardless of this problem. Nevertheless, you -*** may want to report the problem to your system manager and/or to -*** bug-libtool@gnu.org - -EOF - fi ;; - esac -fi - -echo $ac_n "checking whether to build shared libraries... $ac_c" 1>&6 -test "$can_build_shared" = "no" && enable_shared=no - -# On AIX, shared libraries and static libraries use the same namespace, and -# are all built from PIC. -case "$host_os" in -aix3*) - test "$enable_shared" = yes && enable_static=no - if test -n "$RANLIB"; then - archive_cmds="$archive_cmds~\$RANLIB \$lib" - postinstall_cmds='$RANLIB $lib' - fi - ;; - -aix4*) - test "$enable_shared" = yes && enable_static=no - ;; -esac - -echo "$ac_t$enable_shared" 1>&6 - -# Make sure either enable_shared or enable_static is yes. -test "$enable_shared" = yes || enable_static=yes - -echo "checking whether to build static libraries... $enable_static" 1>&6 - -if test "$hardcode_action" = relink; then - # Fast installation is not supported - enable_fast_install=no -elif test "$shlibpath_overrides_runpath" = yes || - test "$enable_shared" = no; then - # Fast installation is not necessary - enable_fast_install=needless -fi - -echo $ac_n "checking for objdir... $ac_c" 1>&6 -rm -f .libs 2>/dev/null -mkdir .libs 2>/dev/null -if test -d .libs; then - objdir=.libs -else - # MS-DOS does not allow filenames that begin with a dot. - objdir=_libs -fi -rmdir .libs 2>/dev/null -echo "$ac_t$objdir" 1>&6 - -if test "x$enable_dlopen" != xyes; then - enable_dlopen=unknown - enable_dlopen_self=unknown - enable_dlopen_self_static=unknown -else -if eval "test \"`echo '$''{'lt_cv_dlopen'+set}'`\" != set"; then - lt_cv_dlopen=no lt_cv_dlopen_libs= -echo $ac_n "checking for dlopen in -ldl""... $ac_c" 1>&6 -echo "$progname:2212: checking for dlopen in -ldl" >&5 -ac_lib_var=`echo dl'_'dlopen | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-ldl $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "$progname: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" -else - echo "$ac_t""no" 1>&6 -echo $ac_n "checking for dlopen""... $ac_c" 1>&6 -echo "$progname:2252: checking for dlopen" >&5 -if eval "test \"`echo '$''{'ac_cv_func_dlopen'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char dlopen(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_dlopen) || defined (__stub___dlopen) -choke me -#else -dlopen(); -#endif - -; return 0; } -EOF -if { (eval echo $progname:2282: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_dlopen=yes" -else - echo "$progname: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_dlopen=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_func_'dlopen`\" = yes"; then - echo "$ac_t""yes" 1>&6 - lt_cv_dlopen="dlopen" -else - echo "$ac_t""no" 1>&6 -echo $ac_n "checking for dld_link in -ldld""... $ac_c" 1>&6 -echo "$progname:2299: checking for dld_link in -ldld" >&5 -ac_lib_var=`echo dld'_'dld_link | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-ldld $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "$progname: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld" -else - echo "$ac_t""no" 1>&6 -echo $ac_n "checking for shl_load""... $ac_c" 1>&6 -echo "$progname:2339: checking for shl_load" >&5 -if eval "test \"`echo '$''{'ac_cv_func_shl_load'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char shl_load(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_shl_load) || defined (__stub___shl_load) -choke me -#else -shl_load(); -#endif - -; return 0; } -EOF -if { (eval echo $progname:2369: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_shl_load=yes" -else - echo "$progname: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_shl_load=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'shl_load`\" = yes"; then - echo "$ac_t""yes" 1>&6 - lt_cv_dlopen="shl_load" -else - echo "$ac_t""no" 1>&6 -echo $ac_n "checking for shl_load in -ldld""... $ac_c" 1>&6 -echo "$progname:2387: checking for shl_load in -ldld" >&5 -ac_lib_var=`echo dld'_'shl_load | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-ldld $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "$progname: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld" -else - echo "$ac_t""no" 1>&6 -fi - - -fi - - -fi - - -fi - - -fi - -fi - - if test "x$lt_cv_dlopen" != xno; then - enable_dlopen=yes - fi - - case "$lt_cv_dlopen" in - dlopen) -for ac_hdr in dlfcn.h; do -ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "$progname:2452: checking for $ac_hdr" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -int fnord = 0; -EOF -ac_try="$ac_compile >/dev/null 2>conftest.out" -{ (eval echo $progname:2462: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "$progname: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi -done - - if test "x$ac_cv_header_dlfcn_h" = xyes; then - CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" - fi - eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" - LIBS="$lt_cv_dlopen_libs $LIBS" - - echo $ac_n "checking whether a program can dlopen itself""... $ac_c" 1>&6 -echo "$progname:2490: checking whether a program can dlopen itself" >&5 -if test "${lt_cv_dlopen_self+set}" = set; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - lt_cv_dlopen_self=cross - else - cat > conftest.c < -#endif - -#include - -#ifdef RTLD_GLOBAL -# define LTDL_GLOBAL RTLD_GLOBAL -#else -# ifdef DL_GLOBAL -# define LTDL_GLOBAL DL_GLOBAL -# else -# define LTDL_GLOBAL 0 -# endif -#endif - -/* We may have to define LTDL_LAZY_OR_NOW in the command line if we - find out it does not work in some platform. */ -#ifndef LTDL_LAZY_OR_NOW -# ifdef RTLD_LAZY -# define LTDL_LAZY_OR_NOW RTLD_LAZY -# else -# ifdef DL_LAZY -# define LTDL_LAZY_OR_NOW DL_LAZY -# else -# ifdef RTLD_NOW -# define LTDL_LAZY_OR_NOW RTLD_NOW -# else -# ifdef DL_NOW -# define LTDL_LAZY_OR_NOW DL_NOW -# else -# define LTDL_LAZY_OR_NOW 0 -# endif -# endif -# endif -# endif -#endif - -fnord() { int i=42;} -main() { void *self, *ptr1, *ptr2; self=dlopen(0,LTDL_GLOBAL|LTDL_LAZY_OR_NOW); - if(self) { ptr1=dlsym(self,"fnord"); ptr2=dlsym(self,"_fnord"); - if(ptr1 || ptr2) { dlclose(self); exit(0); } } exit(1); } - -EOF -if { (eval echo $progname:2544: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null -then - lt_cv_dlopen_self=yes -else - echo "$progname: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - lt_cv_dlopen_self=no -fi -rm -fr conftest* -fi - -fi - -echo "$ac_t""$lt_cv_dlopen_self" 1>&6 - - if test "$lt_cv_dlopen_self" = yes; then - LDFLAGS="$LDFLAGS $link_static_flag" - echo $ac_n "checking whether a statically linked program can dlopen itself""... $ac_c" 1>&6 -echo "$progname:2563: checking whether a statically linked program can dlopen itself" >&5 -if test "${lt_cv_dlopen_self_static+set}" = set; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - lt_cv_dlopen_self_static=cross - else - cat > conftest.c < -#endif - -#include - -#ifdef RTLD_GLOBAL -# define LTDL_GLOBAL RTLD_GLOBAL -#else -# ifdef DL_GLOBAL -# define LTDL_GLOBAL DL_GLOBAL -# else -# define LTDL_GLOBAL 0 -# endif -#endif - -/* We may have to define LTDL_LAZY_OR_NOW in the command line if we - find out it does not work in some platform. */ -#ifndef LTDL_LAZY_OR_NOW -# ifdef RTLD_LAZY -# define LTDL_LAZY_OR_NOW RTLD_LAZY -# else -# ifdef DL_LAZY -# define LTDL_LAZY_OR_NOW DL_LAZY -# else -# ifdef RTLD_NOW -# define LTDL_LAZY_OR_NOW RTLD_NOW -# else -# ifdef DL_NOW -# define LTDL_LAZY_OR_NOW DL_NOW -# else -# define LTDL_LAZY_OR_NOW 0 -# endif -# endif -# endif -# endif -#endif - -fnord() { int i=42;} -main() { void *self, *ptr1, *ptr2; self=dlopen(0,LTDL_GLOBAL|LTDL_LAZY_OR_NOW); - if(self) { ptr1=dlsym(self,"fnord"); ptr2=dlsym(self,"_fnord"); - if(ptr1 || ptr2) { dlclose(self); exit(0); } } exit(1); } - -EOF -if { (eval echo $progname:2617: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null -then - lt_cv_dlopen_self_static=yes -else - echo "$progname: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - lt_cv_dlopen_self_static=no -fi -rm -fr conftest* -fi - -fi - -echo "$ac_t""$lt_cv_dlopen_self_static" 1>&6 -fi - ;; - esac - - case "$lt_cv_dlopen_self" in - yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; - *) enable_dlopen_self=unknown ;; - esac - - case "$lt_cv_dlopen_self_static" in - yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; - *) enable_dlopen_self_static=unknown ;; - esac -fi - -# Copy echo and quote the copy, instead of the original, because it is -# used later. -ltecho="$echo" -if test "X$ltecho" = "X$CONFIG_SHELL $0 --fallback-echo"; then - ltecho="$CONFIG_SHELL \$0 --fallback-echo" -fi -LTSHELL="$SHELL" - -LTCONFIG_VERSION="$VERSION" - -# Only quote variables if we're using ltmain.sh. -case "$ltmain" in -*.sh) - # Now quote all the things that may contain metacharacters. - for var in ltecho old_CC old_CFLAGS old_CPPFLAGS \ - old_LD old_LDFLAGS old_LIBS \ - old_NM old_RANLIB old_LN_S old_DLLTOOL old_OBJDUMP old_AS \ - AR CC LD LN_S NM LTSHELL LTCONFIG_VERSION \ - reload_flag reload_cmds wl \ - pic_flag link_static_flag no_builtin_flag export_dynamic_flag_spec \ - thread_safe_flag_spec whole_archive_flag_spec libname_spec \ - library_names_spec soname_spec \ - RANLIB old_archive_cmds old_archive_from_new_cmds old_postinstall_cmds \ - old_postuninstall_cmds archive_cmds archive_expsym_cmds postinstall_cmds postuninstall_cmds \ - file_magic_cmd export_symbols_cmds deplibs_check_method allow_undefined_flag no_undefined_flag \ - finish_cmds finish_eval global_symbol_pipe global_symbol_to_cdecl \ - hardcode_libdir_flag_spec hardcode_libdir_separator \ - sys_lib_search_path_spec sys_lib_dlsearch_path_spec \ - compiler_c_o compiler_o_lo need_locks exclude_expsyms include_expsyms; do - - case "$var" in - reload_cmds | old_archive_cmds | old_archive_from_new_cmds | \ - old_postinstall_cmds | old_postuninstall_cmds | \ - export_symbols_cmds | archive_cmds | archive_expsym_cmds | \ - postinstall_cmds | postuninstall_cmds | \ - finish_cmds | sys_lib_search_path_spec | sys_lib_dlsearch_path_spec) - # Double-quote double-evaled strings. - eval "$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\"" - ;; - *) - eval "$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\"" - ;; - esac - done - - case "$ltecho" in - *'\$0 --fallback-echo"') - ltecho=`$echo "X$ltecho" | $Xsed -e 's/\\\\\\\$0 --fallback-echo"$/$0 --fallback-echo"/'` - ;; - esac - - trap "$rm \"$ofile\"; exit 1" 1 2 15 - echo "creating $ofile" - $rm "$ofile" - cat < "$ofile" -#! $SHELL - -# `$echo "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. -# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP) -# NOTE: Changes made to this file will be lost: look at ltconfig or ltmain.sh. -# -# Copyright (C) 1996-1999 Free Software Foundation, Inc. -# Originally by Gordon Matzigkeit , 1996 -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Sed that helps us avoid accidentally triggering echo(1) options like -n. -Xsed="sed -e s/^X//" - -# The HP-UX ksh and POSIX shell print the target directory to stdout -# if CDPATH is set. -if test "X\${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi - -### BEGIN LIBTOOL CONFIG -EOF - cfgfile="$ofile" - ;; - -*) - # Double-quote the variables that need it (for aesthetics). - for var in old_CC old_CFLAGS old_CPPFLAGS \ - old_LD old_LDFLAGS old_LIBS \ - old_NM old_RANLIB old_LN_S old_DLLTOOL old_OBJDUMP old_AS; do - eval "$var=\\\"\$var\\\"" - done - - # Just create a config file. - cfgfile="$ofile.cfg" - trap "$rm \"$cfgfile\"; exit 1" 1 2 15 - echo "creating $cfgfile" - $rm "$cfgfile" - cat < "$cfgfile" -# `$echo "$cfgfile" | sed 's%^.*/%%'` - Libtool configuration file. -# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP) -EOF - ;; -esac - -cat <> "$cfgfile" -# Libtool was configured as follows, on host `(hostname || uname -n) 2>/dev/null | sed 1q`: -# -# CC=$old_CC CFLAGS=$old_CFLAGS CPPFLAGS=$old_CPPFLAGS \\ -# LD=$old_LD LDFLAGS=$old_LDFLAGS LIBS=$old_LIBS \\ -# NM=$old_NM RANLIB=$old_RANLIB LN_S=$old_LN_S \\ -# DLLTOOL=$old_DLLTOOL OBJDUMP=$old_OBJDUMP AS=$old_AS \\ -# $0$ltconfig_args -# -# Compiler and other test output produced by $progname, useful for -# debugging $progname, is in ./config.log if it exists. - -# The version of $progname that generated this script. -LTCONFIG_VERSION=$LTCONFIG_VERSION - -# Shell to use when invoking shell scripts. -SHELL=$LTSHELL - -# Whether or not to build shared libraries. -build_libtool_libs=$enable_shared - -# Whether or not to build static libraries. -build_old_libs=$enable_static - -# Whether or not to optimize for fast installation. -fast_install=$enable_fast_install - -# The host system. -host_alias=$host_alias -host=$host - -# An echo program that does not interpret backslashes. -echo=$ltecho - -# The archiver. -AR=$AR - -# The default C compiler. -CC=$CC - -# The linker used to build libraries. -LD=$LD - -# Whether we need hard or soft links. -LN_S=$LN_S - -# A BSD-compatible nm program. -NM=$NM - -# Used on cygwin: DLL creation program. -DLLTOOL="$DLLTOOL" - -# Used on cygwin: object dumper. -OBJDUMP="$OBJDUMP" - -# Used on cygwin: assembler. -AS="$AS" - -# The name of the directory that contains temporary libtool files. -objdir=$objdir - -# How to create reloadable object files. -reload_flag=$reload_flag -reload_cmds=$reload_cmds - -# How to pass a linker flag through the compiler. -wl=$wl - -# Object file suffix (normally "o"). -objext="$objext" - -# Old archive suffix (normally "a"). -libext="$libext" - -# Executable file suffix (normally ""). -exeext="$exeext" - -# Additional compiler flags for building library objects. -pic_flag=$pic_flag - -# Does compiler simultaneously support -c and -o options? -compiler_c_o=$compiler_c_o - -# Can we write directly to a .lo ? -compiler_o_lo=$compiler_o_lo - -# Must we lock files when doing compilation ? -need_locks=$need_locks - -# Do we need the lib prefix for modules? -need_lib_prefix=$need_lib_prefix - -# Do we need a version for libraries? -need_version=$need_version - -# Whether dlopen is supported. -dlopen=$enable_dlopen - -# Whether dlopen of programs is supported. -dlopen_self=$enable_dlopen_self - -# Whether dlopen of statically linked programs is supported. -dlopen_self_static=$enable_dlopen_self_static - -# Compiler flag to prevent dynamic linking. -link_static_flag=$link_static_flag - -# Compiler flag to turn off builtin functions. -no_builtin_flag=$no_builtin_flag - -# Compiler flag to allow reflexive dlopens. -export_dynamic_flag_spec=$export_dynamic_flag_spec - -# Compiler flag to generate shared objects directly from archives. -whole_archive_flag_spec=$whole_archive_flag_spec - -# Compiler flag to generate thread-safe objects. -thread_safe_flag_spec=$thread_safe_flag_spec - -# Library versioning type. -version_type=$version_type - -# Format of library name prefix. -libname_spec=$libname_spec - -# List of archive names. First name is the real one, the rest are links. -# The last name is the one that the linker finds with -lNAME. -library_names_spec=$library_names_spec - -# The coded name of the library, if different from the real name. -soname_spec=$soname_spec - -# Commands used to build and install an old-style archive. -RANLIB=$RANLIB -old_archive_cmds=$old_archive_cmds -old_postinstall_cmds=$old_postinstall_cmds -old_postuninstall_cmds=$old_postuninstall_cmds - -# Create an old-style archive from a shared archive. -old_archive_from_new_cmds=$old_archive_from_new_cmds - -# Commands used to build and install a shared archive. -archive_cmds=$archive_cmds -archive_expsym_cmds=$archive_expsym_cmds -postinstall_cmds=$postinstall_cmds -postuninstall_cmds=$postuninstall_cmds - -# Method to check whether dependent libraries are shared objects. -deplibs_check_method=$deplibs_check_method - -# Command to use when deplibs_check_method == file_magic. -file_magic_cmd=$file_magic_cmd - -# Flag that allows shared libraries with undefined symbols to be built. -allow_undefined_flag=$allow_undefined_flag - -# Flag that forces no undefined symbols. -no_undefined_flag=$no_undefined_flag - -# Commands used to finish a libtool library installation in a directory. -finish_cmds=$finish_cmds - -# Same as above, but a single script fragment to be evaled but not shown. -finish_eval=$finish_eval - -# Take the output of nm and produce a listing of raw symbols and C names. -global_symbol_pipe=$global_symbol_pipe - -# Transform the output of nm in a proper C declaration -global_symbol_to_cdecl=$global_symbol_to_cdecl - -# This is the shared library runtime path variable. -runpath_var=$runpath_var - -# This is the shared library path variable. -shlibpath_var=$shlibpath_var - -# Is shlibpath searched before the hard-coded library search path? -shlibpath_overrides_runpath=$shlibpath_overrides_runpath - -# How to hardcode a shared library path into an executable. -hardcode_action=$hardcode_action - -# Flag to hardcode \$libdir into a binary during linking. -# This must work even if \$libdir does not exist. -hardcode_libdir_flag_spec=$hardcode_libdir_flag_spec - -# Whether we need a single -rpath flag with a separated argument. -hardcode_libdir_separator=$hardcode_libdir_separator - -# Set to yes if using DIR/libNAME.so during linking hardcodes DIR into the -# resulting binary. -hardcode_direct=$hardcode_direct - -# Set to yes if using the -LDIR flag during linking hardcodes DIR into the -# resulting binary. -hardcode_minus_L=$hardcode_minus_L - -# Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into -# the resulting binary. -hardcode_shlibpath_var=$hardcode_shlibpath_var - -# Compile-time system search path for libraries -sys_lib_search_path_spec=$sys_lib_search_path_spec - -# Run-time system search path for libraries -sys_lib_dlsearch_path_spec=$sys_lib_dlsearch_path_spec - -# Fix the shell variable \$srcfile for the compiler. -fix_srcfile_path="$fix_srcfile_path" - -# Set to yes if exported symbols are required. -always_export_symbols=$always_export_symbols - -# The commands to list exported symbols. -export_symbols_cmds=$export_symbols_cmds - -# Symbols that should not be listed in the preloaded symbols. -exclude_expsyms=$exclude_expsyms - -# Symbols that must always be exported. -include_expsyms=$include_expsyms - -EOF - -case "$ltmain" in -*.sh) - echo '### END LIBTOOL CONFIG' >> "$ofile" - echo >> "$ofile" - case "$host_os" in - aix3*) - cat <<\EOF >> "$ofile" - -# AIX sometimes has problems with the GCC collect2 program. For some -# reason, if we set the COLLECT_NAMES environment variable, the problems -# vanish in a puff of smoke. -if test "X${COLLECT_NAMES+set}" != Xset; then - COLLECT_NAMES= - export COLLECT_NAMES -fi -EOF - ;; - esac - - # Append the ltmain.sh script. - sed '$q' "$ltmain" >> "$ofile" || (rm -f "$ofile"; exit 1) - # We use sed instead of cat because bash on DJGPP gets confused if - # if finds mixed CR/LF and LF-only lines. Since sed operates in - # text mode, it properly converts lines to CR/LF. This bash problem - # is reportedly fixed, but why not run on old versions too? - - chmod +x "$ofile" - ;; - -*) - # Compile the libtool program. - echo "FIXME: would compile $ltmain" - ;; -esac - -test -n "$cache_file" || exit 0 - -# AC_CACHE_SAVE -trap '' 1 2 15 -cat > confcache <<\EOF -# This file is a shell script that caches the results of configure -# tests run on this system so they can be shared between configure -# scripts and configure runs. It is not useful on other systems. -# If it contains results you don't want to keep, you may remove or edit it. -# -# By default, configure uses ./config.cache as the cache file, -# creating it if it does not exist already. You can give configure -# the --cache-file=FILE option to use a different cache file; that is -# what configure does when it calls configure scripts in -# subdirectories, so they share the cache. -# Giving --cache-file=/dev/null disables caching, for debugging configure. -# config.status only pays attention to the cache file if you give it the -# --recheck option to rerun configure. -# -EOF -# The following way of writing the cache mishandles newlines in values, -# but we know of no workaround that is simple, portable, and efficient. -# So, don't put newlines in cache variables' values. -# Ultrix sh set writes to stderr and can't be redirected directly, -# and sets the high bit in the cache file unless we assign to the vars. -(set) 2>&1 | - case `(ac_space=' '; set | grep ac_space) 2>&1` in - *ac_space=\ *) - # `set' does not quote correctly, so add quotes (double-quote substitution - # turns \\\\ into \\, and sed turns \\ into \). - sed -n \ - -e "s/'/'\\\\''/g" \ - -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p" - ;; - *) - # `set' quotes correctly as required by POSIX, so do not add quotes. - sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p' - ;; - esac >> confcache -if cmp -s $cache_file confcache; then - : -else - if test -w $cache_file; then - echo "updating cache $cache_file" - cat confcache > $cache_file - else - echo "not updating unwritable cache $cache_file" - fi -fi -rm -f confcache - -exit 0 - -# Local Variables: -# mode:shell-script -# sh-indentation:2 -# End: diff --git a/pcre/ltmain.sh b/pcre/ltmain.sh deleted file mode 100644 index ab65054f..00000000 --- a/pcre/ltmain.sh +++ /dev/null @@ -1,4012 +0,0 @@ -# ltmain.sh - Provide generalized library-building support services. -# NOTE: Changing this file will not affect anything until you rerun ltconfig. -# -# Copyright (C) 1996-1999 Free Software Foundation, Inc. -# Originally by Gordon Matzigkeit , 1996 -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Check that we have a working $echo. -if test "X$1" = X--no-reexec; then - # Discard the --no-reexec flag, and continue. - shift -elif test "X$1" = X--fallback-echo; then - # Avoid inline document here, it may be left over - : -elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then - # Yippee, $echo works! - : -else - # Restart under the correct shell, and then maybe $echo will work. - exec $SHELL "$0" --no-reexec ${1+"$@"} -fi - -if test "X$1" = X--fallback-echo; then - # used as fallback echo - shift - cat <&2 - echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2 - exit 1 -fi - -if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then - echo "$modename: not configured to build any kind of library" 1>&2 - echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2 - exit 1 -fi - -# Global variables. -mode=$default_mode -nonopt= -prev= -prevopt= -run= -show="$echo" -show_help= -execute_dlfiles= -lo2o="s/\\.lo\$/.${objext}/" -o2lo="s/\\.${objext}\$/.lo/" - -# Parse our command line options once, thoroughly. -while test $# -gt 0 -do - arg="$1" - shift - - case "$arg" in - -*=*) optarg=`$echo "X$arg" | $Xsed -e 's/[-_a-zA-Z0-9]*=//'` ;; - *) optarg= ;; - esac - - # If the previous option needs an argument, assign it. - if test -n "$prev"; then - case "$prev" in - execute_dlfiles) - eval "$prev=\"\$$prev \$arg\"" - ;; - *) - eval "$prev=\$arg" - ;; - esac - - prev= - prevopt= - continue - fi - - # Have we seen a non-optional argument yet? - case "$arg" in - --help) - show_help=yes - ;; - - --version) - echo "$PROGRAM (GNU $PACKAGE) $VERSION$TIMESTAMP" - exit 0 - ;; - - --config) - sed -e '1,/^### BEGIN LIBTOOL CONFIG/d' -e '/^### END LIBTOOL CONFIG/,$d' $0 - exit 0 - ;; - - --debug) - echo "$progname: enabling shell trace mode" - set -x - ;; - - --dry-run | -n) - run=: - ;; - - --features) - echo "host: $host" - if test "$build_libtool_libs" = yes; then - echo "enable shared libraries" - else - echo "disable shared libraries" - fi - if test "$build_old_libs" = yes; then - echo "enable static libraries" - else - echo "disable static libraries" - fi - exit 0 - ;; - - --finish) mode="finish" ;; - - --mode) prevopt="--mode" prev=mode ;; - --mode=*) mode="$optarg" ;; - - --quiet | --silent) - show=: - ;; - - -dlopen) - prevopt="-dlopen" - prev=execute_dlfiles - ;; - - -*) - $echo "$modename: unrecognized option \`$arg'" 1>&2 - $echo "$help" 1>&2 - exit 1 - ;; - - *) - nonopt="$arg" - break - ;; - esac -done - -if test -n "$prevopt"; then - $echo "$modename: option \`$prevopt' requires an argument" 1>&2 - $echo "$help" 1>&2 - exit 1 -fi - -if test -z "$show_help"; then - - # Infer the operation mode. - if test -z "$mode"; then - case "$nonopt" in - *cc | *++ | gcc* | *-gcc*) - mode=link - for arg - do - case "$arg" in - -c) - mode=compile - break - ;; - esac - done - ;; - *db | *dbx | *strace | *truss) - mode=execute - ;; - *install*|cp|mv) - mode=install - ;; - *rm) - mode=uninstall - ;; - *) - # If we have no mode, but dlfiles were specified, then do execute mode. - test -n "$execute_dlfiles" && mode=execute - - # Just use the default operation mode. - if test -z "$mode"; then - if test -n "$nonopt"; then - $echo "$modename: warning: cannot infer operation mode from \`$nonopt'" 1>&2 - else - $echo "$modename: warning: cannot infer operation mode without MODE-ARGS" 1>&2 - fi - fi - ;; - esac - fi - - # Only execute mode is allowed to have -dlopen flags. - if test -n "$execute_dlfiles" && test "$mode" != execute; then - $echo "$modename: unrecognized option \`-dlopen'" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - # Change the help message to a mode-specific one. - generic_help="$help" - help="Try \`$modename --help --mode=$mode' for more information." - - # These modes are in order of execution frequency so that they run quickly. - case "$mode" in - # libtool compile mode - compile) - modename="$modename: compile" - # Get the compilation command and the source file. - base_compile= - lastarg= - srcfile="$nonopt" - suppress_output= - - user_target=no - for arg - do - # Accept any command-line options. - case "$arg" in - -o) - if test "$user_target" != "no"; then - $echo "$modename: you cannot specify \`-o' more than once" 1>&2 - exit 1 - fi - user_target=next - ;; - - -static) - build_old_libs=yes - continue - ;; - esac - - case "$user_target" in - next) - # The next one is the -o target name - user_target=yes - continue - ;; - yes) - # We got the output file - user_target=set - libobj="$arg" - continue - ;; - esac - - # Accept the current argument as the source file. - lastarg="$srcfile" - srcfile="$arg" - - # Aesthetically quote the previous argument. - - # Backslashify any backslashes, double quotes, and dollar signs. - # These are the only characters that are still specially - # interpreted inside of double-quoted scrings. - lastarg=`$echo "X$lastarg" | $Xsed -e "$sed_quote_subst"` - - # Double-quote args containing other shell metacharacters. - # Many Bourne shells cannot handle close brackets correctly in scan - # sets, so we specify it separately. - case "$lastarg" in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) - lastarg="\"$lastarg\"" - ;; - esac - - # Add the previous argument to base_compile. - if test -z "$base_compile"; then - base_compile="$lastarg" - else - base_compile="$base_compile $lastarg" - fi - done - - case "$user_target" in - set) - ;; - no) - # Get the name of the library object. - libobj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%'` - ;; - *) - $echo "$modename: you must specify a target with \`-o'" 1>&2 - exit 1 - ;; - esac - - # Recognize several different file suffixes. - # If the user specifies -o file.o, it is replaced with file.lo - xform='[cCFSfmso]' - case "$libobj" in - *.ada) xform=ada ;; - *.adb) xform=adb ;; - *.ads) xform=ads ;; - *.asm) xform=asm ;; - *.c++) xform=c++ ;; - *.cc) xform=cc ;; - *.cpp) xform=cpp ;; - *.cxx) xform=cxx ;; - *.f90) xform=f90 ;; - *.for) xform=for ;; - esac - - libobj=`$echo "X$libobj" | $Xsed -e "s/\.$xform$/.lo/"` - - case "$libobj" in - *.lo) obj=`$echo "X$libobj" | $Xsed -e "$lo2o"` ;; - *) - $echo "$modename: cannot determine name of library object from \`$libobj'" 1>&2 - exit 1 - ;; - esac - - if test -z "$base_compile"; then - $echo "$modename: you must specify a compilation command" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - # Delete any leftover library objects. - if test "$build_old_libs" = yes; then - removelist="$obj $libobj" - else - removelist="$libobj" - fi - - $run $rm $removelist - trap "$run $rm $removelist; exit 1" 1 2 15 - - # Calculate the filename of the output object if compiler does - # not support -o with -c - if test "$compiler_c_o" = no; then - output_obj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%' -e 's%\..*$%%'`.${objext} - lockfile="$output_obj.lock" - removelist="$removelist $output_obj $lockfile" - trap "$run $rm $removelist; exit 1" 1 2 15 - else - need_locks=no - lockfile= - fi - - # Lock this critical section if it is needed - # We use this script file to make the link, it avoids creating a new file - if test "$need_locks" = yes; then - until ln "$0" "$lockfile" 2>/dev/null; do - $show "Waiting for $lockfile to be removed" - sleep 2 - done - elif test "$need_locks" = warn; then - if test -f "$lockfile"; then - echo "\ -*** ERROR, $lockfile exists and contains: -`cat $lockfile 2>/dev/null` - -This indicates that another process is trying to use the same -temporary object file, and libtool could not work around it because -your compiler does not support \`-c' and \`-o' together. If you -repeat this compilation, it may succeed, by chance, but you had better -avoid parallel builds (make -j) in this platform, or get a better -compiler." - - $run $rm $removelist - exit 1 - fi - echo $srcfile > "$lockfile" - fi - - if test -n "$fix_srcfile_path"; then - eval srcfile=\"$fix_srcfile_path\" - fi - - # Only build a PIC object if we are building libtool libraries. - if test "$build_libtool_libs" = yes; then - # Without this assignment, base_compile gets emptied. - fbsd_hideous_sh_bug=$base_compile - - # All platforms use -DPIC, to notify preprocessed assembler code. - command="$base_compile $srcfile $pic_flag -DPIC" - if test "$build_old_libs" = yes; then - lo_libobj="$libobj" - dir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'` - if test "X$dir" = "X$libobj"; then - dir="$objdir" - else - dir="$dir/$objdir" - fi - libobj="$dir/"`$echo "X$libobj" | $Xsed -e 's%^.*/%%'` - - if test -d "$dir"; then - $show "$rm $libobj" - $run $rm $libobj - else - $show "$mkdir $dir" - $run $mkdir $dir - status=$? - if test $status -ne 0 && test ! -d $dir; then - exit $status - fi - fi - fi - if test "$compiler_o_lo" = yes; then - output_obj="$libobj" - command="$command -o $output_obj" - elif test "$compiler_c_o" = yes; then - output_obj="$obj" - command="$command -o $output_obj" - fi - - $run $rm "$output_obj" - $show "$command" - if $run eval "$command"; then : - else - test -n "$output_obj" && $run $rm $removelist - exit 1 - fi - - if test "$need_locks" = warn && - test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then - echo "\ -*** ERROR, $lockfile contains: -`cat $lockfile 2>/dev/null` - -but it should contain: -$srcfile - -This indicates that another process is trying to use the same -temporary object file, and libtool could not work around it because -your compiler does not support \`-c' and \`-o' together. If you -repeat this compilation, it may succeed, by chance, but you had better -avoid parallel builds (make -j) in this platform, or get a better -compiler." - - $run $rm $removelist - exit 1 - fi - - # Just move the object if needed, then go on to compile the next one - if test x"$output_obj" != x"$libobj"; then - $show "$mv $output_obj $libobj" - if $run $mv $output_obj $libobj; then : - else - error=$? - $run $rm $removelist - exit $error - fi - fi - - # If we have no pic_flag, then copy the object into place and finish. - if test -z "$pic_flag" && test "$build_old_libs" = yes; then - # Rename the .lo from within objdir to obj - if test -f $obj; then - $show $rm $obj - $run $rm $obj - fi - - $show "$mv $libobj $obj" - if $run $mv $libobj $obj; then : - else - error=$? - $run $rm $removelist - exit $error - fi - - xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'` - if test "X$xdir" = "X$obj"; then - xdir="." - else - xdir="$xdir" - fi - baseobj=`$echo "X$obj" | $Xsed -e "s%.*/%%"` - libobj=`$echo "X$baseobj" | $Xsed -e "$o2lo"` - # Now arrange that obj and lo_libobj become the same file - $show "(cd $xdir && $LN_S $baseobj $libobj)" - if $run eval '(cd $xdir && $LN_S $baseobj $libobj)'; then - exit 0 - else - error=$? - $run $rm $removelist - exit $error - fi - fi - - # Allow error messages only from the first compilation. - suppress_output=' >/dev/null 2>&1' - fi - - # Only build a position-dependent object if we build old libraries. - if test "$build_old_libs" = yes; then - command="$base_compile $srcfile" - if test "$compiler_c_o" = yes; then - command="$command -o $obj" - output_obj="$obj" - fi - - # Suppress compiler output if we already did a PIC compilation. - command="$command$suppress_output" - $run $rm "$output_obj" - $show "$command" - if $run eval "$command"; then : - else - $run $rm $removelist - exit 1 - fi - - if test "$need_locks" = warn && - test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then - echo "\ -*** ERROR, $lockfile contains: -`cat $lockfile 2>/dev/null` - -but it should contain: -$srcfile - -This indicates that another process is trying to use the same -temporary object file, and libtool could not work around it because -your compiler does not support \`-c' and \`-o' together. If you -repeat this compilation, it may succeed, by chance, but you had better -avoid parallel builds (make -j) in this platform, or get a better -compiler." - - $run $rm $removelist - exit 1 - fi - - # Just move the object if needed - if test x"$output_obj" != x"$obj"; then - $show "$mv $output_obj $obj" - if $run $mv $output_obj $obj; then : - else - error=$? - $run $rm $removelist - exit $error - fi - fi - - # Create an invalid libtool object if no PIC, so that we do not - # accidentally link it into a program. - if test "$build_libtool_libs" != yes; then - $show "echo timestamp > $libobj" - $run eval "echo timestamp > \$libobj" || exit $? - else - # Move the .lo from within objdir - $show "$mv $libobj $lo_libobj" - if $run $mv $libobj $lo_libobj; then : - else - error=$? - $run $rm $removelist - exit $error - fi - fi - fi - - # Unlock the critical section if it was locked - if test "$need_locks" != no; then - $rm "$lockfile" - fi - - exit 0 - ;; - - # libtool link mode - link) - modename="$modename: link" - case "$host" in - *-*-cygwin* | *-*-mingw* | *-*-os2*) - # It is impossible to link a dll without this setting, and - # we shouldn't force the makefile maintainer to figure out - # which system we are compiling for in order to pass an extra - # flag for every libtool invokation. - # allow_undefined=no - - # FIXME: Unfortunately, there are problems with the above when trying - # to make a dll which has undefined symbols, in which case not - # even a static library is built. For now, we need to specify - # -no-undefined on the libtool link line when we can be certain - # that all symbols are satisfied, otherwise we get a static library. - allow_undefined=yes - - # This is a source program that is used to create dlls on Windows - # Don't remove nor modify the starting and closing comments -# /* ltdll.c starts here */ -# #define WIN32_LEAN_AND_MEAN -# #include -# #undef WIN32_LEAN_AND_MEAN -# #include -# -# #ifndef __CYGWIN__ -# # ifdef __CYGWIN32__ -# # define __CYGWIN__ __CYGWIN32__ -# # endif -# #endif -# -# #ifdef __cplusplus -# extern "C" { -# #endif -# BOOL APIENTRY DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved); -# #ifdef __cplusplus -# } -# #endif -# -# #ifdef __CYGWIN__ -# #include -# DECLARE_CYGWIN_DLL( DllMain ); -# #endif -# HINSTANCE __hDllInstance_base; -# -# BOOL APIENTRY -# DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved) -# { -# __hDllInstance_base = hInst; -# return TRUE; -# } -# /* ltdll.c ends here */ - # This is a source program that is used to create import libraries - # on Windows for dlls which lack them. Don't remove nor modify the - # starting and closing comments -# /* impgen.c starts here */ -# /* Copyright (C) 1999 Free Software Foundation, Inc. -# -# This file is part of GNU libtool. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# */ -# -# #include /* for printf() */ -# #include /* for open(), lseek(), read() */ -# #include /* for O_RDONLY, O_BINARY */ -# #include /* for strdup() */ -# -# static unsigned int -# pe_get16 (fd, offset) -# int fd; -# int offset; -# { -# unsigned char b[2]; -# lseek (fd, offset, SEEK_SET); -# read (fd, b, 2); -# return b[0] + (b[1]<<8); -# } -# -# static unsigned int -# pe_get32 (fd, offset) -# int fd; -# int offset; -# { -# unsigned char b[4]; -# lseek (fd, offset, SEEK_SET); -# read (fd, b, 4); -# return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24); -# } -# -# static unsigned int -# pe_as32 (ptr) -# void *ptr; -# { -# unsigned char *b = ptr; -# return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24); -# } -# -# int -# main (argc, argv) -# int argc; -# char *argv[]; -# { -# int dll; -# unsigned long pe_header_offset, opthdr_ofs, num_entries, i; -# unsigned long export_rva, export_size, nsections, secptr, expptr; -# unsigned long name_rvas, nexp; -# unsigned char *expdata, *erva; -# char *filename, *dll_name; -# -# filename = argv[1]; -# -# dll = open(filename, O_RDONLY|O_BINARY); -# if (!dll) -# return 1; -# -# dll_name = filename; -# -# for (i=0; filename[i]; i++) -# if (filename[i] == '/' || filename[i] == '\\' || filename[i] == ':') -# dll_name = filename + i +1; -# -# pe_header_offset = pe_get32 (dll, 0x3c); -# opthdr_ofs = pe_header_offset + 4 + 20; -# num_entries = pe_get32 (dll, opthdr_ofs + 92); -# -# if (num_entries < 1) /* no exports */ -# return 1; -# -# export_rva = pe_get32 (dll, opthdr_ofs + 96); -# export_size = pe_get32 (dll, opthdr_ofs + 100); -# nsections = pe_get16 (dll, pe_header_offset + 4 +2); -# secptr = (pe_header_offset + 4 + 20 + -# pe_get16 (dll, pe_header_offset + 4 + 16)); -# -# expptr = 0; -# for (i = 0; i < nsections; i++) -# { -# char sname[8]; -# unsigned long secptr1 = secptr + 40 * i; -# unsigned long vaddr = pe_get32 (dll, secptr1 + 12); -# unsigned long vsize = pe_get32 (dll, secptr1 + 16); -# unsigned long fptr = pe_get32 (dll, secptr1 + 20); -# lseek(dll, secptr1, SEEK_SET); -# read(dll, sname, 8); -# if (vaddr <= export_rva && vaddr+vsize > export_rva) -# { -# expptr = fptr + (export_rva - vaddr); -# if (export_rva + export_size > vaddr + vsize) -# export_size = vsize - (export_rva - vaddr); -# break; -# } -# } -# -# expdata = (unsigned char*)malloc(export_size); -# lseek (dll, expptr, SEEK_SET); -# read (dll, expdata, export_size); -# erva = expdata - export_rva; -# -# nexp = pe_as32 (expdata+24); -# name_rvas = pe_as32 (expdata+32); -# -# printf ("EXPORTS\n"); -# for (i = 0; i&2 - fi - if test -n "$link_static_flag"; then - dlopen_self=$dlopen_self_static - fi - else - if test -z "$pic_flag" && test -n "$link_static_flag"; then - dlopen_self=$dlopen_self_static - fi - fi - build_libtool_libs=no - build_old_libs=yes - prefer_static_libs=yes - break - ;; - esac - done - - # See if our shared archives depend on static archives. - test -n "$old_archive_from_new_cmds" && build_old_libs=yes - - # Go through the arguments, transforming them on the way. - while test $# -gt 0; do - arg="$1" - shift - - # If the previous option needs an argument, assign it. - if test -n "$prev"; then - case "$prev" in - output) - compile_command="$compile_command @OUTPUT@" - finalize_command="$finalize_command @OUTPUT@" - ;; - esac - - case "$prev" in - dlfiles|dlprefiles) - if test "$preload" = no; then - # Add the symbol object into the linking commands. - compile_command="$compile_command @SYMFILE@" - finalize_command="$finalize_command @SYMFILE@" - preload=yes - fi - case "$arg" in - *.la | *.lo) ;; # We handle these cases below. - force) - if test "$dlself" = no; then - dlself=needless - export_dynamic=yes - fi - prev= - continue - ;; - self) - if test "$prev" = dlprefiles; then - dlself=yes - elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then - dlself=yes - else - dlself=needless - export_dynamic=yes - fi - prev= - continue - ;; - *) - if test "$prev" = dlfiles; then - dlfiles="$dlfiles $arg" - else - dlprefiles="$dlprefiles $arg" - fi - prev= - ;; - esac - ;; - expsyms) - export_symbols="$arg" - if test ! -f "$arg"; then - $echo "$modename: symbol file \`$arg' does not exist" - exit 1 - fi - prev= - continue - ;; - expsyms_regex) - export_symbols_regex="$arg" - prev= - continue - ;; - release) - release="-$arg" - prev= - continue - ;; - rpath | xrpath) - # We need an absolute path. - case "$arg" in - [\\/]* | [A-Za-z]:[\\/]*) ;; - *) - $echo "$modename: only absolute run-paths are allowed" 1>&2 - exit 1 - ;; - esac - if test "$prev" = rpath; then - case "$rpath " in - *" $arg "*) ;; - *) rpath="$rpath $arg" ;; - esac - else - case "$xrpath " in - *" $arg "*) ;; - *) xrpath="$xrpath $arg" ;; - esac - fi - prev= - continue - ;; - *) - eval "$prev=\"\$arg\"" - prev= - continue - ;; - esac - fi - - prevarg="$arg" - - case "$arg" in - -all-static) - if test -n "$link_static_flag"; then - compile_command="$compile_command $link_static_flag" - finalize_command="$finalize_command $link_static_flag" - fi - continue - ;; - - -allow-undefined) - # FIXME: remove this flag sometime in the future. - $echo "$modename: \`-allow-undefined' is deprecated because it is the default" 1>&2 - continue - ;; - - -avoid-version) - avoid_version=yes - continue - ;; - - -dlopen) - prev=dlfiles - continue - ;; - - -dlpreopen) - prev=dlprefiles - continue - ;; - - -export-dynamic) - export_dynamic=yes - continue - ;; - - -export-symbols | -export-symbols-regex) - if test -n "$export_symbols" || test -n "$export_symbols_regex"; then - $echo "$modename: not more than one -exported-symbols argument allowed" - exit 1 - fi - if test "X$arg" = "X-export-symbols"; then - prev=expsyms - else - prev=expsyms_regex - fi - continue - ;; - - -L*) - dir=`$echo "X$arg" | $Xsed -e 's/^-L//'` - # We need an absolute path. - case "$dir" in - [\\/]* | [A-Za-z]:[\\/]*) ;; - *) - absdir=`cd "$dir" && pwd` - if test -z "$absdir"; then - $echo "$modename: warning: cannot determine absolute directory name of \`$dir'" 1>&2 - $echo "$modename: passing it literally to the linker, although it might fail" 1>&2 - absdir="$dir" - fi - dir="$absdir" - ;; - esac - case " $deplibs " in - *" $arg "*) ;; - *) deplibs="$deplibs $arg";; - esac - case " $lib_search_path " in - *" $dir "*) ;; - *) lib_search_path="$lib_search_path $dir";; - esac - case "$host" in - *-*-cygwin* | *-*-mingw* | *-*-os2*) - dllsearchdir=`cd "$dir" && pwd || echo "$dir"` - case ":$dllsearchpath:" in - ::) dllsearchpath="$dllsearchdir";; - *":$dllsearchdir:"*) ;; - *) dllsearchpath="$dllsearchpath:$dllsearchdir";; - esac - ;; - esac - ;; - - -l*) - if test "$arg" = "-lc"; then - case "$host" in - *-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos*) - # These systems don't actually have c library (as such) - continue - ;; - esac - elif test "$arg" = "-lm"; then - case "$host" in - *-*-cygwin* | *-*-beos*) - # These systems don't actually have math library (as such) - continue - ;; - esac - fi - deplibs="$deplibs $arg" - ;; - - -module) - module=yes - continue - ;; - - -no-undefined) - allow_undefined=no - continue - ;; - - -o) prev=output ;; - - -release) - prev=release - continue - ;; - - -rpath) - prev=rpath - continue - ;; - - -R) - prev=xrpath - continue - ;; - - -R*) - dir=`$echo "X$arg" | $Xsed -e 's/^-R//'` - # We need an absolute path. - case "$dir" in - [\\/]* | [A-Za-z]:[\\/]*) ;; - *) - $echo "$modename: only absolute run-paths are allowed" 1>&2 - exit 1 - ;; - esac - case "$xrpath " in - *" $dir "*) ;; - *) xrpath="$xrpath $dir" ;; - esac - continue - ;; - - -static) - # If we have no pic_flag, then this is the same as -all-static. - if test -z "$pic_flag" && test -n "$link_static_flag"; then - compile_command="$compile_command $link_static_flag" - finalize_command="$finalize_command $link_static_flag" - fi - continue - ;; - - -thread-safe) - thread_safe=yes - continue - ;; - - -version-info) - prev=vinfo - continue - ;; - - # Some other compiler flag. - -* | +*) - # Unknown arguments in both finalize_command and compile_command need - # to be aesthetically quoted because they are evaled later. - arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` - case "$arg" in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) - arg="\"$arg\"" - ;; - esac - ;; - - *.o | *.obj | *.a | *.lib) - # A standard object. - objs="$objs $arg" - ;; - - *.lo) - # A library object. - if test "$prev" = dlfiles; then - dlfiles="$dlfiles $arg" - if test "$build_libtool_libs" = yes && test "$dlopen" = yes; then - prev= - continue - else - # If libtool objects are unsupported, then we need to preload. - prev=dlprefiles - fi - fi - - if test "$prev" = dlprefiles; then - # Preload the old-style object. - dlprefiles="$dlprefiles "`$echo "X$arg" | $Xsed -e "$lo2o"` - prev= - fi - libobjs="$libobjs $arg" - ;; - - *.la) - # A libtool-controlled library. - - dlname= - libdir= - library_names= - old_library= - - # Check to see that this really is a libtool archive. - if (sed -e '2q' $arg | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : - else - $echo "$modename: \`$arg' is not a valid libtool archive" 1>&2 - exit 1 - fi - - # If the library was installed with an old release of libtool, - # it will not redefine variable installed. - installed=yes - - # Read the .la file - # If there is no directory component, then add one. - case "$arg" in - */* | *\\*) . $arg ;; - *) . ./$arg ;; - esac - - # Get the name of the library we link against. - linklib= - for l in $old_library $library_names; do - linklib="$l" - done - - if test -z "$linklib"; then - $echo "$modename: cannot find name of link library for \`$arg'" 1>&2 - exit 1 - fi - - # Find the relevant object directory and library name. - name=`$echo "X$arg" | $Xsed -e 's%^.*/%%' -e 's/\.la$//' -e 's/^lib//'` - - if test "X$installed" = Xyes; then - dir="$libdir" - else - dir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'` - if test "X$dir" = "X$arg"; then - dir="$objdir" - else - dir="$dir/$objdir" - fi - fi - - if test -n "$dependency_libs"; then - # Extract -R and -L from dependency_libs - temp_deplibs= - for deplib in $dependency_libs; do - case "$deplib" in - -R*) temp_xrpath=`$echo "X$deplib" | $Xsed -e 's/^-R//'` - case " $rpath $xrpath " in - *" $temp_xrpath "*) ;; - *) xrpath="$xrpath $temp_xrpath";; - esac;; - -L*) case "$compile_command $temp_deplibs " in - *" $deplib "*) ;; - *) temp_deplibs="$temp_deplibs $deplib";; - esac - temp_dir=`$echo "X$deplib" | $Xsed -e 's/^-L//'` - case " $lib_search_path " in - *" $temp_dir "*) ;; - *) lib_search_path="$lib_search_path $temp_dir";; - esac - ;; - *) temp_deplibs="$temp_deplibs $deplib";; - esac - done - dependency_libs="$temp_deplibs" - fi - - if test -z "$libdir"; then - # It is a libtool convenience library, so add in its objects. - convenience="$convenience $dir/$old_library" - old_convenience="$old_convenience $dir/$old_library" - deplibs="$deplibs$dependency_libs" - compile_command="$compile_command $dir/$old_library$dependency_libs" - finalize_command="$finalize_command $dir/$old_library$dependency_libs" - continue - fi - - # This library was specified with -dlopen. - if test "$prev" = dlfiles; then - dlfiles="$dlfiles $arg" - if test -z "$dlname" || test "$dlopen" != yes || test "$build_libtool_libs" = no; then - # If there is no dlname, no dlopen support or we're linking statically, - # we need to preload. - prev=dlprefiles - else - # We should not create a dependency on this library, but we - # may need any libraries it requires. - compile_command="$compile_command$dependency_libs" - finalize_command="$finalize_command$dependency_libs" - prev= - continue - fi - fi - - # The library was specified with -dlpreopen. - if test "$prev" = dlprefiles; then - # Prefer using a static library (so that no silly _DYNAMIC symbols - # are required to link). - if test -n "$old_library"; then - dlprefiles="$dlprefiles $dir/$old_library" - else - dlprefiles="$dlprefiles $dir/$linklib" - fi - prev= - fi - - if test -n "$library_names" && - { test "$prefer_static_libs" = no || test -z "$old_library"; }; then - link_against_libtool_libs="$link_against_libtool_libs $arg" - if test -n "$shlibpath_var"; then - # Make sure the rpath contains only unique directories. - case "$temp_rpath " in - *" $dir "*) ;; - *) temp_rpath="$temp_rpath $dir" ;; - esac - fi - - # We need an absolute path. - case "$dir" in - [\\/] | [A-Za-z]:[\\/]*) absdir="$dir" ;; - *) - absdir=`cd "$dir" && pwd` - if test -z "$absdir"; then - $echo "$modename: warning: cannot determine absolute directory name of \`$dir'" 1>&2 - $echo "$modename: passing it literally to the linker, although it might fail" 1>&2 - absdir="$dir" - fi - ;; - esac - - # This is the magic to use -rpath. - # Skip directories that are in the system default run-time - # search path, unless they have been requested with -R. - case " $sys_lib_dlsearch_path " in - *" $absdir "*) ;; - *) - case "$compile_rpath " in - *" $absdir "*) ;; - *) compile_rpath="$compile_rpath $absdir" - esac - ;; - esac - - case " $sys_lib_dlsearch_path " in - *" $libdir "*) ;; - *) - case "$finalize_rpath " in - *" $libdir "*) ;; - *) finalize_rpath="$finalize_rpath $libdir" - esac - ;; - esac - - lib_linked=yes - case "$hardcode_action" in - immediate | unsupported) - if test "$hardcode_direct" = no; then - compile_command="$compile_command $dir/$linklib" - deplibs="$deplibs $dir/$linklib" - case "$host" in - *-*-cygwin* | *-*-mingw* | *-*-os2*) - dllsearchdir=`cd "$dir" && pwd || echo "$dir"` - if test -n "$dllsearchpath"; then - dllsearchpath="$dllsearchpath:$dllsearchdir" - else - dllsearchpath="$dllsearchdir" - fi - ;; - esac - elif test "$hardcode_minus_L" = no; then - case "$host" in - *-*-sunos*) - compile_shlibpath="$compile_shlibpath$dir:" - ;; - esac - case "$compile_command " in - *" -L$dir "*) ;; - *) compile_command="$compile_command -L$dir";; - esac - compile_command="$compile_command -l$name" - deplibs="$deplibs -L$dir -l$name" - elif test "$hardcode_shlibpath_var" = no; then - case ":$compile_shlibpath:" in - *":$dir:"*) ;; - *) compile_shlibpath="$compile_shlibpath$dir:";; - esac - compile_command="$compile_command -l$name" - deplibs="$deplibs -l$name" - else - lib_linked=no - fi - ;; - - relink) - if test "$hardcode_direct" = yes; then - compile_command="$compile_command $absdir/$linklib" - deplibs="$deplibs $absdir/$linklib" - elif test "$hardcode_minus_L" = yes; then - case "$compile_command " in - *" -L$absdir "*) ;; - *) compile_command="$compile_command -L$absdir";; - esac - compile_command="$compile_command -l$name" - deplibs="$deplibs -L$absdir -l$name" - elif test "$hardcode_shlibpath_var" = yes; then - case ":$compile_shlibpath:" in - *":$absdir:"*) ;; - *) compile_shlibpath="$compile_shlibpath$absdir:";; - esac - compile_command="$compile_command -l$name" - deplibs="$deplibs -l$name" - else - lib_linked=no - fi - ;; - - *) - lib_linked=no - ;; - esac - - if test "$lib_linked" != yes; then - $echo "$modename: configuration error: unsupported hardcode properties" - exit 1 - fi - - # Finalize command for both is simple: just hardcode it. - if test "$hardcode_direct" = yes; then - finalize_command="$finalize_command $libdir/$linklib" - elif test "$hardcode_minus_L" = yes; then - case "$finalize_command " in - *" -L$libdir "*) ;; - *) finalize_command="$finalize_command -L$libdir";; - esac - finalize_command="$finalize_command -l$name" - elif test "$hardcode_shlibpath_var" = yes; then - case ":$finalize_shlibpath:" in - *":$libdir:"*) ;; - *) finalize_shlibpath="$finalize_shlibpath$libdir:";; - esac - finalize_command="$finalize_command -l$name" - else - # We cannot seem to hardcode it, guess we'll fake it. - case "$finalize_command " in - *" -L$dir "*) ;; - *) finalize_command="$finalize_command -L$libdir";; - esac - finalize_command="$finalize_command -l$name" - fi - else - # Transform directly to old archives if we don't build new libraries. - if test -n "$pic_flag" && test -z "$old_library"; then - $echo "$modename: cannot find static library for \`$arg'" 1>&2 - exit 1 - fi - - # Here we assume that one of hardcode_direct or hardcode_minus_L - # is not unsupported. This is valid on all known static and - # shared platforms. - if test "$hardcode_direct" != unsupported; then - test -n "$old_library" && linklib="$old_library" - compile_command="$compile_command $dir/$linklib" - finalize_command="$finalize_command $dir/$linklib" - else - case "$compile_command " in - *" -L$dir "*) ;; - *) compile_command="$compile_command -L$dir";; - esac - compile_command="$compile_command -l$name" - case "$finalize_command " in - *" -L$dir "*) ;; - *) finalize_command="$finalize_command -L$dir";; - esac - finalize_command="$finalize_command -l$name" - fi - fi - - # Add in any libraries that this one depends upon. - compile_command="$compile_command$dependency_libs" - finalize_command="$finalize_command$dependency_libs" - continue - ;; - - # Some other compiler argument. - *) - # Unknown arguments in both finalize_command and compile_command need - # to be aesthetically quoted because they are evaled later. - arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` - case "$arg" in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) - arg="\"$arg\"" - ;; - esac - ;; - esac - - # Now actually substitute the argument into the commands. - if test -n "$arg"; then - compile_command="$compile_command $arg" - finalize_command="$finalize_command $arg" - fi - done - - if test -n "$prev"; then - $echo "$modename: the \`$prevarg' option requires an argument" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then - eval arg=\"$export_dynamic_flag_spec\" - compile_command="$compile_command $arg" - finalize_command="$finalize_command $arg" - fi - - oldlibs= - # calculate the name of the file, without its directory - outputname=`$echo "X$output" | $Xsed -e 's%^.*/%%'` - libobjs_save="$libobjs" - - case "$output" in - "") - $echo "$modename: you must specify an output file" 1>&2 - $echo "$help" 1>&2 - exit 1 - ;; - - *.a | *.lib) - if test -n "$link_against_libtool_libs"; then - $echo "$modename: error: cannot link libtool libraries into archives" 1>&2 - exit 1 - fi - - if test -n "$deplibs"; then - $echo "$modename: warning: \`-l' and \`-L' are ignored for archives" 1>&2 - fi - - if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then - $echo "$modename: warning: \`-dlopen' is ignored for archives" 1>&2 - fi - - if test -n "$rpath"; then - $echo "$modename: warning: \`-rpath' is ignored for archives" 1>&2 - fi - - if test -n "$xrpath"; then - $echo "$modename: warning: \`-R' is ignored for archives" 1>&2 - fi - - if test -n "$vinfo"; then - $echo "$modename: warning: \`-version-info' is ignored for archives" 1>&2 - fi - - if test -n "$release"; then - $echo "$modename: warning: \`-release' is ignored for archives" 1>&2 - fi - - if test -n "$export_symbols" || test -n "$export_symbols_regex"; then - $echo "$modename: warning: \`-export-symbols' is ignored for archives" 1>&2 - fi - - # Now set the variables for building old libraries. - build_libtool_libs=no - oldlibs="$output" - ;; - - *.la) - # Make sure we only generate libraries of the form `libNAME.la'. - case "$outputname" in - lib*) - name=`$echo "X$outputname" | $Xsed -e 's/\.la$//' -e 's/^lib//'` - eval libname=\"$libname_spec\" - ;; - *) - if test "$module" = no; then - $echo "$modename: libtool library \`$output' must begin with \`lib'" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - if test "$need_lib_prefix" != no; then - # Add the "lib" prefix for modules if required - name=`$echo "X$outputname" | $Xsed -e 's/\.la$//'` - eval libname=\"$libname_spec\" - else - libname=`$echo "X$outputname" | $Xsed -e 's/\.la$//'` - fi - ;; - esac - - output_objdir=`$echo "X$output" | $Xsed -e 's%/[^/]*$%%'` - if test "X$output_objdir" = "X$output"; then - output_objdir="$objdir" - else - output_objdir="$output_objdir/$objdir" - fi - - if test -n "$objs"; then - $echo "$modename: cannot build libtool library \`$output' from non-libtool objects:$objs" 2>&1 - exit 1 - fi - - # How the heck are we supposed to write a wrapper for a shared library? - if test -n "$link_against_libtool_libs"; then - $echo "$modename: error: cannot link shared libraries into libtool libraries" 1>&2 - exit 1 - fi - - if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then - $echo "$modename: warning: \`-dlopen' is ignored for libtool libraries" 1>&2 - fi - - set dummy $rpath - if test $# -gt 2; then - $echo "$modename: warning: ignoring multiple \`-rpath's for a libtool library" 1>&2 - fi - install_libdir="$2" - - oldlibs= - if test -z "$rpath"; then - if test "$build_libtool_libs" = yes; then - # Building a libtool convenience library. - libext=al - oldlibs="$output_objdir/$libname.$libext $oldlibs" - build_libtool_libs=convenience - build_old_libs=yes - fi - dependency_libs="$deplibs" - - if test -n "$vinfo"; then - $echo "$modename: warning: \`-version-info' is ignored for convenience libraries" 1>&2 - fi - - if test -n "$release"; then - $echo "$modename: warning: \`-release' is ignored for convenience libraries" 1>&2 - fi - else - - # Parse the version information argument. - IFS="${IFS= }"; save_ifs="$IFS"; IFS=':' - set dummy $vinfo 0 0 0 - IFS="$save_ifs" - - if test -n "$8"; then - $echo "$modename: too many parameters to \`-version-info'" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - current="$2" - revision="$3" - age="$4" - - # Check that each of the things are valid numbers. - case "$current" in - 0 | [1-9] | [1-9][0-9]*) ;; - *) - $echo "$modename: CURRENT \`$current' is not a nonnegative integer" 1>&2 - $echo "$modename: \`$vinfo' is not valid version information" 1>&2 - exit 1 - ;; - esac - - case "$revision" in - 0 | [1-9] | [1-9][0-9]*) ;; - *) - $echo "$modename: REVISION \`$revision' is not a nonnegative integer" 1>&2 - $echo "$modename: \`$vinfo' is not valid version information" 1>&2 - exit 1 - ;; - esac - - case "$age" in - 0 | [1-9] | [1-9][0-9]*) ;; - *) - $echo "$modename: AGE \`$age' is not a nonnegative integer" 1>&2 - $echo "$modename: \`$vinfo' is not valid version information" 1>&2 - exit 1 - ;; - esac - - if test $age -gt $current; then - $echo "$modename: AGE \`$age' is greater than the current interface number \`$current'" 1>&2 - $echo "$modename: \`$vinfo' is not valid version information" 1>&2 - exit 1 - fi - - # Calculate the version variables. - major= - versuffix= - verstring= - case "$version_type" in - none) ;; - - irix) - major=`expr $current - $age + 1` - versuffix="$major.$revision" - verstring="sgi$major.$revision" - - # Add in all the interfaces that we are compatible with. - loop=$revision - while test $loop != 0; do - iface=`expr $revision - $loop` - loop=`expr $loop - 1` - verstring="sgi$major.$iface:$verstring" - done - ;; - - linux) - major=.`expr $current - $age` - versuffix="$major.$age.$revision" - ;; - - osf) - major=`expr $current - $age` - versuffix=".$current.$age.$revision" - verstring="$current.$age.$revision" - - # Add in all the interfaces that we are compatible with. - loop=$age - while test $loop != 0; do - iface=`expr $current - $loop` - loop=`expr $loop - 1` - verstring="$verstring:${iface}.0" - done - - # Make executables depend on our current version. - verstring="$verstring:${current}.0" - ;; - - sunos) - major=".$current" - versuffix=".$current.$revision" - ;; - - freebsd-aout) - major=".$current" - versuffix=".$current.$revision"; - ;; - - freebsd-elf) - major=".$current" - versuffix=".$current"; - ;; - - windows) - # Like Linux, but with '-' rather than '.', since we only - # want one extension on Windows 95. - major=`expr $current - $age` - versuffix="-$major-$age-$revision" - ;; - - *) - $echo "$modename: unknown library version type \`$version_type'" 1>&2 - echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2 - exit 1 - ;; - esac - - # Clear the version info if we defaulted, and they specified a release. - if test -z "$vinfo" && test -n "$release"; then - major= - verstring="0.0" - if test "$need_version" = no; then - versuffix= - else - versuffix=".0.0" - fi - fi - - # Remove version info from name if versioning should be avoided - if test "$avoid_version" = yes && test "$need_version" = no; then - major= - versuffix= - verstring="" - fi - - # Check to see if the archive will have undefined symbols. - if test "$allow_undefined" = yes; then - if test "$allow_undefined_flag" = unsupported; then - $echo "$modename: warning: undefined symbols not allowed in $host shared libraries" 1>&2 - build_libtool_libs=no - build_old_libs=yes - fi - else - # Don't allow undefined symbols. - allow_undefined_flag="$no_undefined_flag" - fi - - dependency_libs="$deplibs" - case "$host" in - *-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos*) - # these systems don't actually have a c library (as such)! - ;; - *) - # Add libc to deplibs on all other systems. - deplibs="$deplibs -lc" - ;; - esac - fi - - # Create the output directory, or remove our outputs if we need to. - if test -d $output_objdir; then - $show "${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.*" - $run ${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.* - else - $show "$mkdir $output_objdir" - $run $mkdir $output_objdir - status=$? - if test $status -ne 0 && test ! -d $output_objdir; then - exit $status - fi - fi - - # Now set the variables for building old libraries. - if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then - oldlibs="$oldlibs $output_objdir/$libname.$libext" - - # Transform .lo files to .o files. - oldobjs="$objs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e "$lo2o" | $NL2SP` - fi - - if test "$build_libtool_libs" = yes; then - # Transform deplibs into only deplibs that can be linked in shared. - name_save=$name - libname_save=$libname - release_save=$release - versuffix_save=$versuffix - major_save=$major - # I'm not sure if I'm treating the release correctly. I think - # release should show up in the -l (ie -lgmp5) so we don't want to - # add it in twice. Is that correct? - release="" - versuffix="" - major="" - newdeplibs= - droppeddeps=no - case "$deplibs_check_method" in - pass_all) - # Don't check for shared/static. Everything works. - # This might be a little naive. We might want to check - # whether the library exists or not. But this is on - # osf3 & osf4 and I'm not really sure... Just - # implementing what was already the behaviour. - newdeplibs=$deplibs - ;; - test_compile) - # This code stresses the "libraries are programs" paradigm to its - # limits. Maybe even breaks it. We compile a program, linking it - # against the deplibs as a proxy for the library. Then we can check - # whether they linked in statically or dynamically with ldd. - $rm conftest.c - cat > conftest.c </dev/null` - for potent_lib in $potential_libs; do - # Follow soft links. - if ls -lLd "$potent_lib" 2>/dev/null \ - | grep " -> " >/dev/null; then - continue - fi - # The statement above tries to avoid entering an - # endless loop below, in case of cyclic links. - # We might still enter an endless loop, since a link - # loop can be closed while we follow links, - # but so what? - potlib="$potent_lib" - while test -h "$potlib" 2>/dev/null; do - potliblink=`ls -ld $potlib | sed 's/.* -> //'` - case "$potliblink" in - [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";; - *) potlib=`$echo "X$potlib" | $Xsed -e 's,[^/]*$,,'`"$potliblink";; - esac - done - if eval $file_magic_cmd \"\$potlib\" 2>/dev/null \ - | sed 10q \ - | egrep "$file_magic_regex" > /dev/null; then - newdeplibs="$newdeplibs $a_deplib" - a_deplib="" - break 2 - fi - done - done - if test -n "$a_deplib" ; then - droppeddeps=yes - echo - echo "*** Warning: This library needs some functionality provided by $a_deplib." - echo "*** I have the capability to make that library automatically link in when" - echo "*** you link to this library. But I can only do this if you have a" - echo "*** shared version of the library, which you do not appear to have." - fi - else - # Add a -L argument. - newdeplibs="$newdeplibs $a_deplib" - fi - done # Gone through all deplibs. - ;; - none | unknown | *) - newdeplibs="" - if $echo "X $deplibs" | $Xsed -e 's/ -lc$//' \ - -e 's/ -[LR][^ ]*//g' -e 's/[ ]//g' | - grep . >/dev/null; then - echo - if test "X$deplibs_check_method" = "Xnone"; then - echo "*** Warning: inter-library dependencies are not supported in this platform." - else - echo "*** Warning: inter-library dependencies are not known to be supported." - fi - echo "*** All declared inter-library dependencies are being dropped." - droppeddeps=yes - fi - ;; - esac - versuffix=$versuffix_save - major=$major_save - release=$release_save - libname=$libname_save - name=$name_save - - if test "$droppeddeps" = yes; then - if test "$module" = yes; then - echo - echo "*** Warning: libtool could not satisfy all declared inter-library" - echo "*** dependencies of module $libname. Therefore, libtool will create" - echo "*** a static module, that should work as long as the dlopening" - echo "*** application is linked with the -dlopen flag." - if test -z "$global_symbol_pipe"; then - echo - echo "*** However, this would only work if libtool was able to extract symbol" - echo "*** lists from a program, using \`nm' or equivalent, but libtool could" - echo "*** not find such a program. So, this module is probably useless." - echo "*** \`nm' from GNU binutils and a full rebuild may help." - fi - if test "$build_old_libs" = no; then - oldlibs="$output_objdir/$libname.$libext" - build_libtool_libs=module - build_old_libs=yes - else - build_libtool_libs=no - fi - else - echo "*** The inter-library dependencies that have been dropped here will be" - echo "*** automatically added whenever a program is linked with this library" - echo "*** or is declared to -dlopen it." - fi - fi - # Done checking deplibs! - deplibs=$newdeplibs - fi - - # All the library-specific variables (install_libdir is set above). - library_names= - old_library= - dlname= - - # Test again, we may have decided not to build it any more - if test "$build_libtool_libs" = yes; then - # Get the real and link names of the library. - eval library_names=\"$library_names_spec\" - set dummy $library_names - realname="$2" - shift; shift - - if test -n "$soname_spec"; then - eval soname=\"$soname_spec\" - else - soname="$realname" - fi - - lib="$output_objdir/$realname" - for link - do - linknames="$linknames $link" - done - - # Ensure that we have .o objects for linkers which dislike .lo - # (e.g. aix) in case we are running --disable-static - for obj in $libobjs; do - xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'` - if test "X$xdir" = "X$obj"; then - xdir="." - else - xdir="$xdir" - fi - baseobj=`$echo "X$obj" | $Xsed -e 's%^.*/%%'` - oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"` - if test ! -f $xdir/$oldobj; then - $show "(cd $xdir && ${LN_S} $baseobj $oldobj)" - $run eval '(cd $xdir && ${LN_S} $baseobj $oldobj)' || exit $? - fi - done - - # Use standard objects if they are pic - test -z "$pic_flag" && libobjs=`$echo "X$libobjs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` - - # Prepare the list of exported symbols - if test -z "$export_symbols"; then - if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then - $show "generating symbol list for \`$libname.la'" - export_symbols="$output_objdir/$libname.exp" - $run $rm $export_symbols - eval cmds=\"$export_symbols_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - if test -n "$export_symbols_regex"; then - $show "egrep -e \"$export_symbols_regex\" \"$export_symbols\" > \"${export_symbols}T\"" - $run eval 'egrep -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' - $show "$mv \"${export_symbols}T\" \"$export_symbols\"" - $run eval '$mv "${export_symbols}T" "$export_symbols"' - fi - fi - fi - - if test -n "$export_symbols" && test -n "$include_expsyms"; then - $run eval '$echo "X$include_expsyms" | $SP2NL >> "$export_symbols"' - fi - - if test -n "$convenience"; then - if test -n "$whole_archive_flag_spec"; then - eval libobjs=\"\$libobjs $whole_archive_flag_spec\" - else - gentop="$output_objdir/${outputname}x" - $show "${rm}r $gentop" - $run ${rm}r "$gentop" - $show "mkdir $gentop" - $run mkdir "$gentop" - status=$? - if test $status -ne 0 && test ! -d "$gentop"; then - exit $status - fi - generated="$generated $gentop" - - for xlib in $convenience; do - # Extract the objects. - case "$xlib" in - [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;; - *) xabs=`pwd`"/$xlib" ;; - esac - xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'` - xdir="$gentop/$xlib" - - $show "${rm}r $xdir" - $run ${rm}r "$xdir" - $show "mkdir $xdir" - $run mkdir "$xdir" - status=$? - if test $status -ne 0 && test ! -d "$xdir"; then - exit $status - fi - $show "(cd $xdir && $AR x $xabs)" - $run eval "(cd \$xdir && $AR x \$xabs)" || exit $? - - libobjs="$libobjs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP` - done - fi - fi - - if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then - eval flag=\"$thread_safe_flag_spec\" - linkopts="$linkopts $flag" - fi - - # Do each of the archive commands. - if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then - eval cmds=\"$archive_expsym_cmds\" - else - eval cmds=\"$archive_cmds\" - fi - IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - - # Create links to the real library. - for linkname in $linknames; do - if test "$realname" != "$linkname"; then - $show "(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)" - $run eval '(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)' || exit $? - fi - done - - # If -module or -export-dynamic was specified, set the dlname. - if test "$module" = yes || test "$export_dynamic" = yes; then - # On all known operating systems, these are identical. - dlname="$soname" - fi - fi - ;; - - *.lo | *.o | *.obj) - if test -n "$link_against_libtool_libs"; then - $echo "$modename: error: cannot link libtool libraries into objects" 1>&2 - exit 1 - fi - - if test -n "$deplibs"; then - $echo "$modename: warning: \`-l' and \`-L' are ignored for objects" 1>&2 - fi - - if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then - $echo "$modename: warning: \`-dlopen' is ignored for objects" 1>&2 - fi - - if test -n "$rpath"; then - $echo "$modename: warning: \`-rpath' is ignored for objects" 1>&2 - fi - - if test -n "$xrpath"; then - $echo "$modename: warning: \`-R' is ignored for objects" 1>&2 - fi - - if test -n "$vinfo"; then - $echo "$modename: warning: \`-version-info' is ignored for objects" 1>&2 - fi - - if test -n "$release"; then - $echo "$modename: warning: \`-release' is ignored for objects" 1>&2 - fi - - case "$output" in - *.lo) - if test -n "$objs"; then - $echo "$modename: cannot build library object \`$output' from non-libtool objects" 1>&2 - exit 1 - fi - libobj="$output" - obj=`$echo "X$output" | $Xsed -e "$lo2o"` - ;; - *) - libobj= - obj="$output" - ;; - esac - - # Delete the old objects. - $run $rm $obj $libobj - - # Objects from convenience libraries. This assumes - # single-version convenience libraries. Whenever we create - # different ones for PIC/non-PIC, this we'll have to duplicate - # the extraction. - reload_conv_objs= - gentop= - # reload_cmds runs $LD directly, so let us get rid of - # -Wl from whole_archive_flag_spec - wl= - - if test -n "$convenience"; then - if test -n "$whole_archive_flag_spec"; then - eval reload_conv_objs=\"\$reload_objs $whole_archive_flag_spec\" - else - gentop="$output_objdir/${obj}x" - $show "${rm}r $gentop" - $run ${rm}r "$gentop" - $show "mkdir $gentop" - $run mkdir "$gentop" - status=$? - if test $status -ne 0 && test ! -d "$gentop"; then - exit $status - fi - generated="$generated $gentop" - - for xlib in $convenience; do - # Extract the objects. - case "$xlib" in - [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;; - *) xabs=`pwd`"/$xlib" ;; - esac - xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'` - xdir="$gentop/$xlib" - - $show "${rm}r $xdir" - $run ${rm}r "$xdir" - $show "mkdir $xdir" - $run mkdir "$xdir" - status=$? - if test $status -ne 0 && test ! -d "$xdir"; then - exit $status - fi - $show "(cd $xdir && $AR x $xabs)" - $run eval "(cd \$xdir && $AR x \$xabs)" || exit $? - - reload_conv_objs="$reload_objs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP` - done - fi - fi - - # Create the old-style object. - reload_objs="$objs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}$'/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`" $reload_conv_objs" - - output="$obj" - eval cmds=\"$reload_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - - # Exit if we aren't doing a library object file. - if test -z "$libobj"; then - if test -n "$gentop"; then - $show "${rm}r $gentop" - $run ${rm}r $gentop - fi - - exit 0 - fi - - if test "$build_libtool_libs" != yes; then - if test -n "$gentop"; then - $show "${rm}r $gentop" - $run ${rm}r $gentop - fi - - # Create an invalid libtool object if no PIC, so that we don't - # accidentally link it into a program. - $show "echo timestamp > $libobj" - $run eval "echo timestamp > $libobj" || exit $? - exit 0 - fi - - if test -n "$pic_flag"; then - # Only do commands if we really have different PIC objects. - reload_objs="$libobjs $reload_conv_objs" - output="$libobj" - eval cmds=\"$reload_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - else - # Just create a symlink. - $show $rm $libobj - $run $rm $libobj - xdir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'` - if test "X$xdir" = "X$libobj"; then - xdir="." - else - xdir="$xdir" - fi - baseobj=`$echo "X$libobj" | $Xsed -e 's%^.*/%%'` - oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"` - $show "(cd $xdir && $LN_S $oldobj $baseobj)" - $run eval '(cd $xdir && $LN_S $oldobj $baseobj)' || exit $? - fi - - if test -n "$gentop"; then - $show "${rm}r $gentop" - $run ${rm}r $gentop - fi - - exit 0 - ;; - - # Anything else should be a program. - *) - if test -n "$vinfo"; then - $echo "$modename: warning: \`-version-info' is ignored for programs" 1>&2 - fi - - if test -n "$release"; then - $echo "$modename: warning: \`-release' is ignored for programs" 1>&2 - fi - - if test "$preload" = yes; then - if test "$dlopen" = unknown && test "$dlopen_self" = unknown && - test "$dlopen_self_static" = unknown; then - $echo "$modename: warning: \`AC_LIBTOOL_DLOPEN' not used. Assuming no dlopen support." - fi - fi - - if test -n "$rpath$xrpath"; then - # If the user specified any rpath flags, then add them. - for libdir in $rpath $xrpath; do - # This is the magic to use -rpath. - case "$compile_rpath " in - *" $libdir "*) ;; - *) compile_rpath="$compile_rpath $libdir" ;; - esac - case "$finalize_rpath " in - *" $libdir "*) ;; - *) finalize_rpath="$finalize_rpath $libdir" ;; - esac - done - fi - - # Now hardcode the library paths - rpath= - hardcode_libdirs= - for libdir in $compile_rpath $finalize_rpath; do - if test -n "$hardcode_libdir_flag_spec"; then - if test -n "$hardcode_libdir_separator"; then - if test -z "$hardcode_libdirs"; then - hardcode_libdirs="$libdir" - else - # Just accumulate the unique libdirs. - case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in - *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) - ;; - *) - hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" - ;; - esac - fi - else - eval flag=\"$hardcode_libdir_flag_spec\" - rpath="$rpath $flag" - fi - elif test -n "$runpath_var"; then - case "$perm_rpath " in - *" $libdir "*) ;; - *) perm_rpath="$perm_rpath $libdir" ;; - esac - fi - done - # Substitute the hardcoded libdirs into the rpath. - if test -n "$hardcode_libdir_separator" && - test -n "$hardcode_libdirs"; then - libdir="$hardcode_libdirs" - eval rpath=\" $hardcode_libdir_flag_spec\" - fi - compile_rpath="$rpath" - - rpath= - hardcode_libdirs= - for libdir in $finalize_rpath; do - if test -n "$hardcode_libdir_flag_spec"; then - if test -n "$hardcode_libdir_separator"; then - if test -z "$hardcode_libdirs"; then - hardcode_libdirs="$libdir" - else - # Just accumulate the unique libdirs. - case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in - *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) - ;; - *) - hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" - ;; - esac - fi - else - eval flag=\"$hardcode_libdir_flag_spec\" - rpath="$rpath $flag" - fi - elif test -n "$runpath_var"; then - case "$finalize_perm_rpath " in - *" $libdir "*) ;; - *) finalize_perm_rpath="$finalize_perm_rpath $libdir" ;; - esac - fi - done - # Substitute the hardcoded libdirs into the rpath. - if test -n "$hardcode_libdir_separator" && - test -n "$hardcode_libdirs"; then - libdir="$hardcode_libdirs" - eval rpath=\" $hardcode_libdir_flag_spec\" - fi - finalize_rpath="$rpath" - - output_objdir=`$echo "X$output" | $Xsed -e 's%/[^/]*$%%'` - if test "X$output_objdir" = "X$output"; then - output_objdir="$objdir" - else - output_objdir="$output_objdir/$objdir" - fi - - # Create the binary in the object directory, then wrap it. - if test ! -d $output_objdir; then - $show "$mkdir $output_objdir" - $run $mkdir $output_objdir - status=$? - if test $status -ne 0 && test ! -d $output_objdir; then - exit $status - fi - fi - - if test -n "$libobjs" && test "$build_old_libs" = yes; then - # Transform all the library objects into standard objects. - compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` - finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` - fi - - dlsyms= - if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then - if test -n "$NM" && test -n "$global_symbol_pipe"; then - dlsyms="${outputname}S.c" - else - $echo "$modename: not configured to extract global symbols from dlpreopened files" 1>&2 - fi - fi - - if test -n "$dlsyms"; then - case "$dlsyms" in - "") ;; - *.c) - # Discover the nlist of each of the dlfiles. - nlist="$output_objdir/${outputname}.nm" - - $show "$rm $nlist ${nlist}S ${nlist}T" - $run $rm "$nlist" "${nlist}S" "${nlist}T" - - # Parse the name list into a source file. - $show "creating $output_objdir/$dlsyms" - - test -z "$run" && $echo > "$output_objdir/$dlsyms" "\ -/* $dlsyms - symbol resolution table for \`$outputname' dlsym emulation. */ -/* Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP */ - -#ifdef __cplusplus -extern \"C\" { -#endif - -/* Prevent the only kind of declaration conflicts we can make. */ -#define lt_preloaded_symbols some_other_symbol - -/* External symbol declarations for the compiler. */\ -" - - if test "$dlself" = yes; then - $show "generating symbol list for \`$output'" - - test -z "$run" && $echo ': @PROGRAM@ ' > "$nlist" - - # Add our own program objects to the symbol list. - progfiles=`$echo "X$objs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` - for arg in $progfiles; do - $show "extracting global C symbols from \`$arg'" - $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'" - done - - if test -n "$exclude_expsyms"; then - $run eval 'egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' - $run eval '$mv "$nlist"T "$nlist"' - fi - - if test -n "$export_symbols_regex"; then - $run eval 'egrep -e "$export_symbols_regex" "$nlist" > "$nlist"T' - $run eval '$mv "$nlist"T "$nlist"' - fi - - # Prepare the list of exported symbols - if test -z "$export_symbols"; then - export_symbols="$output_objdir/$output.exp" - $run $rm $export_symbols - $run eval "sed -n -e '/^: @PROGRAM@$/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' - else - $run eval "sed -e 's/\([][.*^$]\)/\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$output.exp"' - $run eval 'grep -f "$output_objdir/$output.exp" < "$nlist" > "$nlist"T' - $run eval 'mv "$nlist"T "$nlist"' - fi - fi - - for arg in $dlprefiles; do - $show "extracting global C symbols from \`$arg'" - name=`echo "$arg" | sed -e 's%^.*/%%'` - $run eval 'echo ": $name " >> "$nlist"' - $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'" - done - - if test -z "$run"; then - # Make sure we have at least an empty file. - test -f "$nlist" || : > "$nlist" - - if test -n "$exclude_expsyms"; then - egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T - $mv "$nlist"T "$nlist" - fi - - # Try sorting and uniquifying the output. - if grep -v "^: " < "$nlist" | sort +2 | uniq > "$nlist"S; then - : - else - grep -v "^: " < "$nlist" > "$nlist"S - fi - - if test -f "$nlist"S; then - eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$dlsyms"' - else - echo '/* NONE */' >> "$output_objdir/$dlsyms" - fi - - $echo >> "$output_objdir/$dlsyms" "\ - -#undef lt_preloaded_symbols - -#if defined (__STDC__) && __STDC__ -# define lt_ptr_t void * -#else -# define lt_ptr_t char * -# define const -#endif - -/* The mapping between symbol names and symbols. */ -const struct { - const char *name; - lt_ptr_t address; -} -lt_preloaded_symbols[] = -{\ -" - - sed -n -e 's/^: \([^ ]*\) $/ {\"\1\", (lt_ptr_t) 0},/p' \ - -e 's/^. \([^ ]*\) \([^ ]*\)$/ {"\2", (lt_ptr_t) \&\2},/p' \ - < "$nlist" >> "$output_objdir/$dlsyms" - - $echo >> "$output_objdir/$dlsyms" "\ - {0, (lt_ptr_t) 0} -}; - -/* This works around a problem in FreeBSD linker */ -#ifdef FREEBSD_WORKAROUND -static const void *lt_preloaded_setup() { - return lt_preloaded_symbols; -} -#endif - -#ifdef __cplusplus -} -#endif\ -" - fi - - pic_flag_for_symtable= - case "$host" in - # compiling the symbol table file with pic_flag works around - # a FreeBSD bug that causes programs to crash when -lm is - # linked before any other PIC object. But we must not use - # pic_flag when linking with -static. The problem exists in - # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. - *-*-freebsd2*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) - case "$compile_command " in - *" -static "*) ;; - *) pic_flag_for_symtable=" $pic_flag -DPIC -DFREEBSD_WORKAROUND";; - esac;; - *-*-hpux*) - case "$compile_command " in - *" -static "*) ;; - *) pic_flag_for_symtable=" $pic_flag -DPIC";; - esac - esac - - # Now compile the dynamic symbol file. - $show "(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable \"$dlsyms\")" - $run eval '(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable "$dlsyms")' || exit $? - - # Clean up the generated files. - $show "$rm $output_objdir/$dlsyms $nlist ${nlist}S ${nlist}T" - $run $rm "$output_objdir/$dlsyms" "$nlist" "${nlist}S" "${nlist}T" - - # Transform the symbol file into the correct name. - compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"` - finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"` - ;; - *) - $echo "$modename: unknown suffix for \`$dlsyms'" 1>&2 - exit 1 - ;; - esac - else - # We keep going just in case the user didn't refer to - # lt_preloaded_symbols. The linker will fail if global_symbol_pipe - # really was required. - - # Nullify the symbol file. - compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"` - finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"` - fi - - if test -z "$link_against_libtool_libs" || test "$build_libtool_libs" != yes; then - # Replace the output file specification. - compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'` - link_command="$compile_command$compile_rpath" - - # We have no uninstalled library dependencies, so finalize right now. - $show "$link_command" - $run eval "$link_command" - status=$? - - # Delete the generated files. - if test -n "$dlsyms"; then - $show "$rm $output_objdir/${outputname}S.${objext}" - $run $rm "$output_objdir/${outputname}S.${objext}" - fi - - exit $status - fi - - if test -n "$shlibpath_var"; then - # We should set the shlibpath_var - rpath= - for dir in $temp_rpath; do - case "$dir" in - [\\/]* | [A-Za-z]:[\\/]*) - # Absolute path. - rpath="$rpath$dir:" - ;; - *) - # Relative path: add a thisdir entry. - rpath="$rpath\$thisdir/$dir:" - ;; - esac - done - temp_rpath="$rpath" - fi - - if test -n "$compile_shlibpath$finalize_shlibpath"; then - compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" - fi - if test -n "$finalize_shlibpath"; then - finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" - fi - - compile_var= - finalize_var= - if test -n "$runpath_var"; then - if test -n "$perm_rpath"; then - # We should set the runpath_var. - rpath= - for dir in $perm_rpath; do - rpath="$rpath$dir:" - done - compile_var="$runpath_var=\"$rpath\$$runpath_var\" " - fi - if test -n "$finalize_perm_rpath"; then - # We should set the runpath_var. - rpath= - for dir in $finalize_perm_rpath; do - rpath="$rpath$dir:" - done - finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " - fi - fi - - if test "$hardcode_action" = relink; then - # Fast installation is not supported - link_command="$compile_var$compile_command$compile_rpath" - relink_command="$finalize_var$finalize_command$finalize_rpath" - - $echo "$modename: warning: this platform does not like uninstalled shared libraries" 1>&2 - $echo "$modename: \`$output' will be relinked during installation" 1>&2 - else - if test "$fast_install" != no; then - link_command="$finalize_var$compile_command$finalize_rpath" - if test "$fast_install" = yes; then - relink_command=`$echo "X$compile_var$compile_command$compile_rpath" | $Xsed -e 's%@OUTPUT@%\$progdir/\$file%g'` - else - # fast_install is set to needless - relink_command= - fi - else - link_command="$compile_var$compile_command$compile_rpath" - relink_command="$finalize_var$finalize_command$finalize_rpath" - fi - fi - - # Replace the output file specification. - link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` - - # Delete the old output files. - $run $rm $output $output_objdir/$outputname $output_objdir/lt-$outputname - - $show "$link_command" - $run eval "$link_command" || exit $? - - # Now create the wrapper script. - $show "creating $output" - - # Quote the relink command for shipping. - if test -n "$relink_command"; then - relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"` - fi - - # Quote $echo for shipping. - if test "X$echo" = "X$SHELL $0 --fallback-echo"; then - case "$0" in - [\\/]* | [A-Za-z]:[\\/]*) qecho="$SHELL $0 --fallback-echo";; - *) qecho="$SHELL `pwd`/$0 --fallback-echo";; - esac - qecho=`$echo "X$qecho" | $Xsed -e "$sed_quote_subst"` - else - qecho=`$echo "X$echo" | $Xsed -e "$sed_quote_subst"` - fi - - # Only actually do things if our run command is non-null. - if test -z "$run"; then - # win32 will think the script is a binary if it has - # a .exe suffix, so we strip it off here. - case $output in - *.exe) output=`echo $output|sed 's,.exe$,,'` ;; - esac - $rm $output - trap "$rm $output; exit 1" 1 2 15 - - $echo > $output "\ -#! $SHELL - -# $output - temporary wrapper script for $objdir/$outputname -# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP -# -# The $output program cannot be directly executed until all the libtool -# libraries that it depends on are installed. -# -# This wrapper script should never be moved out of the build directory. -# If it is, it will not operate correctly. - -# Sed substitution that helps us do robust quoting. It backslashifies -# metacharacters that are still active within double-quoted strings. -Xsed='sed -e 1s/^X//' -sed_quote_subst='$sed_quote_subst' - -# The HP-UX ksh and POSIX shell print the target directory to stdout -# if CDPATH is set. -if test \"\${CDPATH+set}\" = set; then CDPATH=:; export CDPATH; fi - -relink_command=\"$relink_command\" - -# This environment variable determines our operation mode. -if test \"\$libtool_install_magic\" = \"$magic\"; then - # install mode needs the following variable: - link_against_libtool_libs='$link_against_libtool_libs' -else - # When we are sourced in execute mode, \$file and \$echo are already set. - if test \"\$libtool_execute_magic\" != \"$magic\"; then - echo=\"$qecho\" - file=\"\$0\" - # Make sure echo works. - if test \"X\$1\" = X--no-reexec; then - # Discard the --no-reexec flag, and continue. - shift - elif test \"X\`(\$echo '\t') 2>/dev/null\`\" = 'X\t'; then - # Yippee, \$echo works! - : - else - # Restart under the correct shell, and then maybe \$echo will work. - exec $SHELL \"\$0\" --no-reexec \${1+\"\$@\"} - fi - fi\ -" - $echo >> $output "\ - - # Find the directory that this script lives in. - thisdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*$%%'\` - test \"x\$thisdir\" = \"x\$file\" && thisdir=. - - # Follow symbolic links until we get to the real thisdir. - file=\`ls -ld \"\$file\" | sed -n 's/.*-> //p'\` - while test -n \"\$file\"; do - destdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*\$%%'\` - - # If there was a directory component, then change thisdir. - if test \"x\$destdir\" != \"x\$file\"; then - case \"\$destdir\" in - [\\/]* | [A-Za-z]:[\\/]*) thisdir=\"\$destdir\" ;; - *) thisdir=\"\$thisdir/\$destdir\" ;; - esac - fi - - file=\`\$echo \"X\$file\" | \$Xsed -e 's%^.*/%%'\` - file=\`ls -ld \"\$thisdir/\$file\" | sed -n 's/.*-> //p'\` - done - - # Try to get the absolute directory name. - absdir=\`cd \"\$thisdir\" && pwd\` - test -n \"\$absdir\" && thisdir=\"\$absdir\" -" - - if test "$fast_install" = yes; then - echo >> $output "\ - program=lt-'$outputname' - progdir=\"\$thisdir/$objdir\" - - if test ! -f \"\$progdir/\$program\" || \\ - { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | sed 1q\`; \\ - test \"X\$file\" != \"X\$progdir/\$program\"; }; then - - file=\"\$\$-\$program\" - - if test ! -d \"\$progdir\"; then - $mkdir \"\$progdir\" - else - $rm \"\$progdir/\$file\" - fi" - - echo >> $output "\ - - # relink executable if necessary - if test -n \"\$relink_command\"; then - if (cd \"\$thisdir\" && eval \$relink_command); then : - else - $rm \"\$progdir/\$file\" - exit 1 - fi - fi - - $mv \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || - { $rm \"\$progdir/\$program\"; - $mv \"\$progdir/\$file\" \"\$progdir/\$program\"; } - $rm \"\$progdir/\$file\" - fi" - else - echo >> $output "\ - program='$outputname' - progdir=\"\$thisdir/$objdir\" -" - fi - - echo >> $output "\ - - if test -f \"\$progdir/\$program\"; then" - - # Export our shlibpath_var if we have one. - if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then - $echo >> $output "\ - # Add our own library path to $shlibpath_var - $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" - - # Some systems cannot cope with colon-terminated $shlibpath_var - # The second colon is a workaround for a bug in BeOS R4 sed - $shlibpath_var=\`\$echo \"X\$$shlibpath_var\" | \$Xsed -e 's/::*\$//'\` - - export $shlibpath_var -" - fi - - # fixup the dll searchpath if we need to. - if test -n "$dllsearchpath"; then - $echo >> $output "\ - # Add the dll search path components to the executable PATH - PATH=$dllsearchpath:\$PATH -" - fi - - $echo >> $output "\ - if test \"\$libtool_execute_magic\" != \"$magic\"; then - # Run the actual program with our arguments. -" - case $host in - *-*-cygwin* | *-*-mingw | *-*-os2*) - # win32 systems need to use the prog path for dll - # lookup to work - $echo >> $output "\ - exec \$progdir\\\\\$program \${1+\"\$@\"} -" - ;; - *) - $echo >> $output "\ - # Export the path to the program. - PATH=\"\$progdir:\$PATH\" - export PATH - - exec \$program \${1+\"\$@\"} -" - ;; - esac - $echo >> $output "\ - \$echo \"\$0: cannot exec \$program \${1+\"\$@\"}\" - exit 1 - fi - else - # The program doesn't exist. - \$echo \"\$0: error: \$progdir/\$program does not exist\" 1>&2 - \$echo \"This script is just a wrapper for \$program.\" 1>&2 - echo \"See the $PACKAGE documentation for more information.\" 1>&2 - exit 1 - fi -fi\ -" - chmod +x $output - fi - exit 0 - ;; - esac - - # See if we need to build an old-fashioned archive. - for oldlib in $oldlibs; do - - if test "$build_libtool_libs" = convenience; then - oldobjs="$libobjs_save" - addlibs="$convenience" - build_libtool_libs=no - else - if test "$build_libtool_libs" = module; then - oldobjs="$libobjs_save" - build_libtool_libs=no - else - oldobjs="$objs "`$echo "X$libobjs_save" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP` - fi - addlibs="$old_convenience" - fi - - if test -n "$addlibs"; then - gentop="$output_objdir/${outputname}x" - $show "${rm}r $gentop" - $run ${rm}r "$gentop" - $show "mkdir $gentop" - $run mkdir "$gentop" - status=$? - if test $status -ne 0 && test ! -d "$gentop"; then - exit $status - fi - generated="$generated $gentop" - - # Add in members from convenience archives. - for xlib in $addlibs; do - # Extract the objects. - case "$xlib" in - [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;; - *) xabs=`pwd`"/$xlib" ;; - esac - xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'` - xdir="$gentop/$xlib" - - $show "${rm}r $xdir" - $run ${rm}r "$xdir" - $show "mkdir $xdir" - $run mkdir "$xdir" - status=$? - if test $status -ne 0 && test ! -d "$xdir"; then - exit $status - fi - $show "(cd $xdir && $AR x $xabs)" - $run eval "(cd \$xdir && $AR x \$xabs)" || exit $? - - oldobjs="$oldobjs "`find $xdir -name \*.${objext} -print -o -name \*.lo -print | $NL2SP` - done - fi - - # Do each command in the archive commands. - if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then - eval cmds=\"$old_archive_from_new_cmds\" - else - # Ensure that we have .o objects in place in case we decided - # not to build a shared library, and have fallen back to building - # static libs even though --disable-static was passed! - for oldobj in $oldobjs; do - if test ! -f $oldobj; then - xdir=`$echo "X$oldobj" | $Xsed -e 's%/[^/]*$%%'` - if test "X$xdir" = "X$oldobj"; then - xdir="." - else - xdir="$xdir" - fi - baseobj=`$echo "X$oldobj" | $Xsed -e 's%^.*/%%'` - obj=`$echo "X$baseobj" | $Xsed -e "$o2lo"` - $show "(cd $xdir && ${LN_S} $obj $baseobj)" - $run eval '(cd $xdir && ${LN_S} $obj $baseobj)' || exit $? - fi - done - - eval cmds=\"$old_archive_cmds\" - fi - IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - done - - if test -n "$generated"; then - $show "${rm}r$generated" - $run ${rm}r$generated - fi - - # Now create the libtool archive. - case "$output" in - *.la) - old_library= - test "$build_old_libs" = yes && old_library="$libname.$libext" - $show "creating $output" - - if test -n "$xrpath"; then - temp_xrpath= - for libdir in $xrpath; do - temp_xrpath="$temp_xrpath -R$libdir" - done - dependency_libs="$temp_xrpath $dependency_libs" - fi - - # Only create the output if not a dry run. - if test -z "$run"; then - for installed in no yes; do - if test "$installed" = yes; then - if test -z "$install_libdir"; then - break - fi - output="$output_objdir/$outputname"i - fi - $rm $output - $echo > $output "\ -# $outputname - a libtool library file -# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP -# -# Please DO NOT delete this file! -# It is necessary for linking the library. - -# The name that we can dlopen(3). -dlname='$dlname' - -# Names of this library. -library_names='$library_names' - -# The name of the static archive. -old_library='$old_library' - -# Libraries that this one depends upon. -dependency_libs='$dependency_libs' - -# Version information for $libname. -current=$current -age=$age -revision=$revision - -# Is this an already installed library? -installed=$installed - -# Directory that this library needs to be installed in: -libdir='$install_libdir'\ -" - done - fi - - # Do a symbolic link so that the libtool archive can be found in - # LD_LIBRARY_PATH before the program is installed. - $show "(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)" - $run eval "(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)" || exit $? - ;; - esac - exit 0 - ;; - - # libtool install mode - install) - modename="$modename: install" - - # There may be an optional sh(1) argument at the beginning of - # install_prog (especially on Windows NT). - if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh; then - # Aesthetically quote it. - arg=`$echo "X$nonopt" | $Xsed -e "$sed_quote_subst"` - case "$arg" in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) - arg="\"$arg\"" - ;; - esac - install_prog="$arg " - arg="$1" - shift - else - install_prog= - arg="$nonopt" - fi - - # The real first argument should be the name of the installation program. - # Aesthetically quote it. - arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` - case "$arg" in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) - arg="\"$arg\"" - ;; - esac - install_prog="$install_prog$arg" - - # We need to accept at least all the BSD install flags. - dest= - files= - opts= - prev= - install_type= - isdir=no - stripme= - for arg - do - if test -n "$dest"; then - files="$files $dest" - dest="$arg" - continue - fi - - case "$arg" in - -d) isdir=yes ;; - -f) prev="-f" ;; - -g) prev="-g" ;; - -m) prev="-m" ;; - -o) prev="-o" ;; - -s) - stripme=" -s" - continue - ;; - -*) ;; - - *) - # If the previous option needed an argument, then skip it. - if test -n "$prev"; then - prev= - else - dest="$arg" - continue - fi - ;; - esac - - # Aesthetically quote the argument. - arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` - case "$arg" in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) - arg="\"$arg\"" - ;; - esac - install_prog="$install_prog $arg" - done - - if test -z "$install_prog"; then - $echo "$modename: you must specify an install program" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - if test -n "$prev"; then - $echo "$modename: the \`$prev' option requires an argument" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - if test -z "$files"; then - if test -z "$dest"; then - $echo "$modename: no file or destination specified" 1>&2 - else - $echo "$modename: you must specify a destination" 1>&2 - fi - $echo "$help" 1>&2 - exit 1 - fi - - # Strip any trailing slash from the destination. - dest=`$echo "X$dest" | $Xsed -e 's%/$%%'` - - # Check to see that the destination is a directory. - test -d "$dest" && isdir=yes - if test "$isdir" = yes; then - destdir="$dest" - destname= - else - destdir=`$echo "X$dest" | $Xsed -e 's%/[^/]*$%%'` - test "X$destdir" = "X$dest" && destdir=. - destname=`$echo "X$dest" | $Xsed -e 's%^.*/%%'` - - # Not a directory, so check to see that there is only one file specified. - set dummy $files - if test $# -gt 2; then - $echo "$modename: \`$dest' is not a directory" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - fi - case "$destdir" in - [\\/]* | [A-Za-z]:[\\/]*) ;; - *) - for file in $files; do - case "$file" in - *.lo) ;; - *) - $echo "$modename: \`$destdir' must be an absolute directory name" 1>&2 - $echo "$help" 1>&2 - exit 1 - ;; - esac - done - ;; - esac - - # This variable tells wrapper scripts just to set variables rather - # than running their programs. - libtool_install_magic="$magic" - - staticlibs= - future_libdirs= - current_libdirs= - for file in $files; do - - # Do each installation. - case "$file" in - *.a | *.lib) - # Do the static libraries later. - staticlibs="$staticlibs $file" - ;; - - *.la) - # Check to see that this really is a libtool archive. - if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : - else - $echo "$modename: \`$file' is not a valid libtool archive" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - library_names= - old_library= - # If there is no directory component, then add one. - case "$file" in - */* | *\\*) . $file ;; - *) . ./$file ;; - esac - - # Add the libdir to current_libdirs if it is the destination. - if test "X$destdir" = "X$libdir"; then - case "$current_libdirs " in - *" $libdir "*) ;; - *) current_libdirs="$current_libdirs $libdir" ;; - esac - else - # Note the libdir as a future libdir. - case "$future_libdirs " in - *" $libdir "*) ;; - *) future_libdirs="$future_libdirs $libdir" ;; - esac - fi - - dir="`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`/" - test "X$dir" = "X$file/" && dir= - dir="$dir$objdir" - - # See the names of the shared library. - set dummy $library_names - if test -n "$2"; then - realname="$2" - shift - shift - - # Install the shared library and build the symlinks. - $show "$install_prog $dir/$realname $destdir/$realname" - $run eval "$install_prog $dir/$realname $destdir/$realname" || exit $? - - if test $# -gt 0; then - # Delete the old symlinks, and create new ones. - for linkname - do - if test "$linkname" != "$realname"; then - $show "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)" - $run eval "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)" - fi - done - fi - - # Do each command in the postinstall commands. - lib="$destdir/$realname" - eval cmds=\"$postinstall_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - fi - - # Install the pseudo-library for information purposes. - name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` - instname="$dir/$name"i - $show "$install_prog $instname $destdir/$name" - $run eval "$install_prog $instname $destdir/$name" || exit $? - - # Maybe install the static library, too. - test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library" - ;; - - *.lo) - # Install (i.e. copy) a libtool object. - - # Figure out destination file name, if it wasn't already specified. - if test -n "$destname"; then - destfile="$destdir/$destname" - else - destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'` - destfile="$destdir/$destfile" - fi - - # Deduce the name of the destination old-style object file. - case "$destfile" in - *.lo) - staticdest=`$echo "X$destfile" | $Xsed -e "$lo2o"` - ;; - *.o | *.obj) - staticdest="$destfile" - destfile= - ;; - *) - $echo "$modename: cannot copy a libtool object to \`$destfile'" 1>&2 - $echo "$help" 1>&2 - exit 1 - ;; - esac - - # Install the libtool object if requested. - if test -n "$destfile"; then - $show "$install_prog $file $destfile" - $run eval "$install_prog $file $destfile" || exit $? - fi - - # Install the old object if enabled. - if test "$build_old_libs" = yes; then - # Deduce the name of the old-style object file. - staticobj=`$echo "X$file" | $Xsed -e "$lo2o"` - - $show "$install_prog $staticobj $staticdest" - $run eval "$install_prog \$staticobj \$staticdest" || exit $? - fi - exit 0 - ;; - - *) - # Figure out destination file name, if it wasn't already specified. - if test -n "$destname"; then - destfile="$destdir/$destname" - else - destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'` - destfile="$destdir/$destfile" - fi - - # Do a test to see if this is really a libtool program. - if (sed -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then - link_against_libtool_libs= - relink_command= - - # If there is no directory component, then add one. - case "$file" in - */* | *\\*) . $file ;; - *) . ./$file ;; - esac - - # Check the variables that should have been set. - if test -z "$link_against_libtool_libs"; then - $echo "$modename: invalid libtool wrapper script \`$file'" 1>&2 - exit 1 - fi - - finalize=yes - for lib in $link_against_libtool_libs; do - # Check to see that each library is installed. - libdir= - if test -f "$lib"; then - # If there is no directory component, then add one. - case "$lib" in - */* | *\\*) . $lib ;; - *) . ./$lib ;; - esac - fi - libfile="$libdir/`$echo "X$lib" | $Xsed -e 's%^.*/%%g'`" - if test -n "$libdir" && test ! -f "$libfile"; then - $echo "$modename: warning: \`$lib' has not been installed in \`$libdir'" 1>&2 - finalize=no - fi - done - - outputname= - if test "$fast_install" = no && test -n "$relink_command"; then - if test "$finalize" = yes && test -z "$run"; then - tmpdir="/tmp" - test -n "$TMPDIR" && tmpdir="$TMPDIR" - tmpdir="$tmpdir/libtool-$$" - if $mkdir -p "$tmpdir" && chmod 700 "$tmpdir"; then : - else - $echo "$modename: error: cannot create temporary directory \`$tmpdir'" 1>&2 - continue - fi - outputname="$tmpdir/$file" - # Replace the output file specification. - relink_command=`$echo "X$relink_command" | $Xsed -e 's%@OUTPUT@%'"$outputname"'%g'` - - $show "$relink_command" - if $run eval "$relink_command"; then : - else - $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2 - ${rm}r "$tmpdir" - continue - fi - file="$outputname" - else - $echo "$modename: warning: cannot relink \`$file'" 1>&2 - fi - else - # Install the binary that we compiled earlier. - file=`$echo "X$file" | $Xsed -e "s%\([^/]*\)$%$objdir/\1%"` - fi - fi - - $show "$install_prog$stripme $file $destfile" - $run eval "$install_prog\$stripme \$file \$destfile" || exit $? - test -n "$outputname" && ${rm}r "$tmpdir" - ;; - esac - done - - for file in $staticlibs; do - name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` - - # Set up the ranlib parameters. - oldlib="$destdir/$name" - - $show "$install_prog $file $oldlib" - $run eval "$install_prog \$file \$oldlib" || exit $? - - # Do each command in the postinstall commands. - eval cmds=\"$old_postinstall_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - done - - if test -n "$future_libdirs"; then - $echo "$modename: warning: remember to run \`$progname --finish$future_libdirs'" 1>&2 - fi - - if test -n "$current_libdirs"; then - # Maybe just do a dry run. - test -n "$run" && current_libdirs=" -n$current_libdirs" - exec $SHELL $0 --finish$current_libdirs - exit 1 - fi - - exit 0 - ;; - - # libtool finish mode - finish) - modename="$modename: finish" - libdirs="$nonopt" - admincmds= - - if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then - for dir - do - libdirs="$libdirs $dir" - done - - for libdir in $libdirs; do - if test -n "$finish_cmds"; then - # Do each command in the finish commands. - eval cmds=\"$finish_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || admincmds="$admincmds - $cmd" - done - IFS="$save_ifs" - fi - if test -n "$finish_eval"; then - # Do the single finish_eval. - eval cmds=\"$finish_eval\" - $run eval "$cmds" || admincmds="$admincmds - $cmds" - fi - done - fi - - # Exit here if they wanted silent mode. - test "$show" = : && exit 0 - - echo "----------------------------------------------------------------------" - echo "Libraries have been installed in:" - for libdir in $libdirs; do - echo " $libdir" - done - echo - echo "If you ever happen to want to link against installed libraries" - echo "in a given directory, LIBDIR, you must either use libtool, and" - echo "specify the full pathname of the library, or use \`-LLIBDIR'" - echo "flag during linking and do at least one of the following:" - if test -n "$shlibpath_var"; then - echo " - add LIBDIR to the \`$shlibpath_var' environment variable" - echo " during execution" - fi - if test -n "$runpath_var"; then - echo " - add LIBDIR to the \`$runpath_var' environment variable" - echo " during linking" - fi - if test -n "$hardcode_libdir_flag_spec"; then - libdir=LIBDIR - eval flag=\"$hardcode_libdir_flag_spec\" - - echo " - use the \`$flag' linker flag" - fi - if test -n "$admincmds"; then - echo " - have your system administrator run these commands:$admincmds" - fi - if test -f /etc/ld.so.conf; then - echo " - have your system administrator add LIBDIR to \`/etc/ld.so.conf'" - fi - echo - echo "See any operating system documentation about shared libraries for" - echo "more information, such as the ld(1) and ld.so(8) manual pages." - echo "----------------------------------------------------------------------" - exit 0 - ;; - - # libtool execute mode - execute) - modename="$modename: execute" - - # The first argument is the command name. - cmd="$nonopt" - if test -z "$cmd"; then - $echo "$modename: you must specify a COMMAND" 1>&2 - $echo "$help" - exit 1 - fi - - # Handle -dlopen flags immediately. - for file in $execute_dlfiles; do - if test ! -f "$file"; then - $echo "$modename: \`$file' is not a file" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - dir= - case "$file" in - *.la) - # Check to see that this really is a libtool archive. - if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : - else - $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - # Read the libtool library. - dlname= - library_names= - - # If there is no directory component, then add one. - case "$file" in - */* | *\\*) . $file ;; - *) . ./$file ;; - esac - - # Skip this library if it cannot be dlopened. - if test -z "$dlname"; then - # Warn if it was a shared library. - test -n "$library_names" && $echo "$modename: warning: \`$file' was not linked with \`-export-dynamic'" - continue - fi - - dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` - test "X$dir" = "X$file" && dir=. - - if test -f "$dir/$objdir/$dlname"; then - dir="$dir/$objdir" - else - $echo "$modename: cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" 1>&2 - exit 1 - fi - ;; - - *.lo) - # Just add the directory containing the .lo file. - dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` - test "X$dir" = "X$file" && dir=. - ;; - - *) - $echo "$modename: warning \`-dlopen' is ignored for non-libtool libraries and objects" 1>&2 - continue - ;; - esac - - # Get the absolute pathname. - absdir=`cd "$dir" && pwd` - test -n "$absdir" && dir="$absdir" - - # Now add the directory to shlibpath_var. - if eval "test -z \"\$$shlibpath_var\""; then - eval "$shlibpath_var=\"\$dir\"" - else - eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\"" - fi - done - - # This variable tells wrapper scripts just to set shlibpath_var - # rather than running their programs. - libtool_execute_magic="$magic" - - # Check if any of the arguments is a wrapper script. - args= - for file - do - case "$file" in - -*) ;; - *) - # Do a test to see if this is really a libtool program. - if (sed -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then - # If there is no directory component, then add one. - case "$file" in - */* | *\\*) . $file ;; - *) . ./$file ;; - esac - - # Transform arg to wrapped name. - file="$progdir/$program" - fi - ;; - esac - # Quote arguments (to preserve shell metacharacters). - file=`$echo "X$file" | $Xsed -e "$sed_quote_subst"` - args="$args \"$file\"" - done - - if test -z "$run"; then - if test -n "$shlibpath_var"; then - # Export the shlibpath_var. - eval "export $shlibpath_var" - fi - - # Restore saved enviroment variables - if test "${save_LC_ALL+set}" = set; then - LC_ALL="$save_LC_ALL"; export LC_ALL - fi - if test "${save_LANG+set}" = set; then - LANG="$save_LANG"; export LANG - fi - - # Now actually exec the command. - eval "exec \$cmd$args" - - $echo "$modename: cannot exec \$cmd$args" - exit 1 - else - # Display what would be done. - if test -n "$shlibpath_var"; then - eval "\$echo \"\$shlibpath_var=\$$shlibpath_var\"" - $echo "export $shlibpath_var" - fi - $echo "$cmd$args" - exit 0 - fi - ;; - - # libtool uninstall mode - uninstall) - modename="$modename: uninstall" - rm="$nonopt" - files= - - for arg - do - case "$arg" in - -*) rm="$rm $arg" ;; - *) files="$files $arg" ;; - esac - done - - if test -z "$rm"; then - $echo "$modename: you must specify an RM program" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - for file in $files; do - dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` - test "X$dir" = "X$file" && dir=. - name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` - - rmfiles="$file" - - case "$name" in - *.la) - # Possibly a libtool archive, so verify it. - if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then - . $dir/$name - - # Delete the libtool libraries and symlinks. - for n in $library_names; do - rmfiles="$rmfiles $dir/$n" - done - test -n "$old_library" && rmfiles="$rmfiles $dir/$old_library" - - $show "$rm $rmfiles" - $run $rm $rmfiles - - if test -n "$library_names"; then - # Do each command in the postuninstall commands. - eval cmds=\"$postuninstall_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" - done - IFS="$save_ifs" - fi - - if test -n "$old_library"; then - # Do each command in the old_postuninstall commands. - eval cmds=\"$old_postuninstall_cmds\" - IFS="${IFS= }"; save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" - done - IFS="$save_ifs" - fi - - # FIXME: should reinstall the best remaining shared library. - fi - ;; - - *.lo) - if test "$build_old_libs" = yes; then - oldobj=`$echo "X$name" | $Xsed -e "$lo2o"` - rmfiles="$rmfiles $dir/$oldobj" - fi - $show "$rm $rmfiles" - $run $rm $rmfiles - ;; - - *) - $show "$rm $rmfiles" - $run $rm $rmfiles - ;; - esac - done - exit 0 - ;; - - "") - $echo "$modename: you must specify a MODE" 1>&2 - $echo "$generic_help" 1>&2 - exit 1 - ;; - esac - - $echo "$modename: invalid operation mode \`$mode'" 1>&2 - $echo "$generic_help" 1>&2 - exit 1 -fi # test -z "$show_help" - -# We need to display help for each of the modes. -case "$mode" in -"") $echo \ -"Usage: $modename [OPTION]... [MODE-ARG]... - -Provide generalized library-building support services. - - --config show all configuration variables - --debug enable verbose shell tracing --n, --dry-run display commands without modifying any files - --features display basic configuration information and exit - --finish same as \`--mode=finish' - --help display this help message and exit - --mode=MODE use operation mode MODE [default=inferred from MODE-ARGS] - --quiet same as \`--silent' - --silent don't print informational messages - --version print version information - -MODE must be one of the following: - - compile compile a source file into a libtool object - execute automatically set library path, then run a program - finish complete the installation of libtool libraries - install install libraries or executables - link create a library or an executable - uninstall remove libraries from an installed directory - -MODE-ARGS vary depending on the MODE. Try \`$modename --help --mode=MODE' for -a more detailed description of MODE." - exit 0 - ;; - -compile) - $echo \ -"Usage: $modename [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE - -Compile a source file into a libtool library object. - -This mode accepts the following additional options: - - -o OUTPUT-FILE set the output file name to OUTPUT-FILE - -static always build a \`.o' file suitable for static linking - -COMPILE-COMMAND is a command to be used in creating a \`standard' object file -from the given SOURCEFILE. - -The output file name is determined by removing the directory component from -SOURCEFILE, then substituting the C source code suffix \`.c' with the -library object suffix, \`.lo'." - ;; - -execute) - $echo \ -"Usage: $modename [OPTION]... --mode=execute COMMAND [ARGS]... - -Automatically set library path, then run a program. - -This mode accepts the following additional options: - - -dlopen FILE add the directory containing FILE to the library path - -This mode sets the library path environment variable according to \`-dlopen' -flags. - -If any of the ARGS are libtool executable wrappers, then they are translated -into their corresponding uninstalled binary, and any of their required library -directories are added to the library path. - -Then, COMMAND is executed, with ARGS as arguments." - ;; - -finish) - $echo \ -"Usage: $modename [OPTION]... --mode=finish [LIBDIR]... - -Complete the installation of libtool libraries. - -Each LIBDIR is a directory that contains libtool libraries. - -The commands that this mode executes may require superuser privileges. Use -the \`--dry-run' option if you just want to see what would be executed." - ;; - -install) - $echo \ -"Usage: $modename [OPTION]... --mode=install INSTALL-COMMAND... - -Install executables or libraries. - -INSTALL-COMMAND is the installation command. The first component should be -either the \`install' or \`cp' program. - -The rest of the components are interpreted as arguments to that command (only -BSD-compatible install options are recognized)." - ;; - -link) - $echo \ -"Usage: $modename [OPTION]... --mode=link LINK-COMMAND... - -Link object files or libraries together to form another library, or to -create an executable program. - -LINK-COMMAND is a command using the C compiler that you would use to create -a program from several object files. - -The following components of LINK-COMMAND are treated specially: - - -all-static do not do any dynamic linking at all - -avoid-version do not add a version suffix if possible - -dlopen FILE \`-dlpreopen' FILE if it cannot be dlopened at runtime - -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols - -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) - -export-symbols SYMFILE - try to export only the symbols listed in SYMFILE - -export-symbols-regex REGEX - try to export only the symbols matching REGEX - -LLIBDIR search LIBDIR for required installed libraries - -lNAME OUTPUT-FILE requires the installed library libNAME - -module build a library that can dlopened - -no-undefined declare that a library does not refer to external symbols - -o OUTPUT-FILE create OUTPUT-FILE from the specified objects - -release RELEASE specify package release information - -rpath LIBDIR the created library will eventually be installed in LIBDIR - -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries - -static do not do any dynamic linking of libtool libraries - -version-info CURRENT[:REVISION[:AGE]] - specify library version info [each variable defaults to 0] - -All other options (arguments beginning with \`-') are ignored. - -Every other argument is treated as a filename. Files ending in \`.la' are -treated as uninstalled libtool libraries, other files are standard or library -object files. - -If the OUTPUT-FILE ends in \`.la', then a libtool library is created, -only library objects (\`.lo' files) may be specified, and \`-rpath' is -required, except when creating a convenience library. - -If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created -using \`ar' and \`ranlib', or on Windows using \`lib'. - -If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file -is created, otherwise an executable program is created." - ;; - -uninstall) - $echo \ -"Usage: $modename [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... - -Remove libraries from an installation directory. - -RM is the name of the program to use to delete files associated with each FILE -(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed -to RM. - -If FILE is a libtool library, all the files associated with it are deleted. -Otherwise, only FILE itself is deleted using RM." - ;; - -*) - $echo "$modename: invalid operation mode \`$mode'" 1>&2 - $echo "$help" 1>&2 - exit 1 - ;; -esac - -echo -$echo "Try \`$modename --help' for more information about other modes." - -exit 0 - -# Local Variables: -# mode:shell-script -# sh-indentation:2 -# End: diff --git a/pcre/maketables.c b/pcre/maketables.c deleted file mode 100644 index c0f06c03..00000000 --- a/pcre/maketables.c +++ /dev/null @@ -1,132 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* -PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - -Written by: Philip Hazel - - Copyright (c) 1997-2000 University of Cambridge - ------------------------------------------------------------------------------ -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. ------------------------------------------------------------------------------ - -See the file Tech.Notes for some information on the internals. -*/ - - -/* This file is compiled on its own as part of the PCRE library. However, -it is also included in the compilation of dftables.c, in which case the macro -DFTABLES is defined. */ - -#ifndef DFTABLES -#include "internal.h" -#endif - - - -/************************************************* -* Create PCRE character tables * -*************************************************/ - -/* This function builds a set of character tables for use by PCRE and returns -a pointer to them. They are build using the ctype functions, and consequently -their contents will depend upon the current locale setting. When compiled as -part of the library, the store is obtained via pcre_malloc(), but when compiled -inside dftables, use malloc(). - -Arguments: none -Returns: pointer to the contiguous block of data -*/ - -unsigned const char * -pcre_maketables(void) -{ -unsigned char *yield, *p; -int i; - -#ifndef DFTABLES -yield = (unsigned char*)(pcre_malloc)(tables_length); -#else -yield = (unsigned char*)malloc(tables_length); -#endif - -if (yield == NULL) return NULL; -p = yield; - -/* First comes the lower casing table */ - -for (i = 0; i < 256; i++) *p++ = tolower(i); - -/* Next the case-flipping table */ - -for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i); - -/* Then the character class tables. Don't try to be clever and save effort -on exclusive ones - in some locales things may be different. */ - -memset(p, 0, cbit_length); -for (i = 0; i < 256; i++) - { - if (isdigit(i)) - { - p[cbit_digit + i/8] |= 1 << (i&7); - p[cbit_word + i/8] |= 1 << (i&7); - } - if (isupper(i)) - { - p[cbit_upper + i/8] |= 1 << (i&7); - p[cbit_word + i/8] |= 1 << (i&7); - } - if (islower(i)) - { - p[cbit_lower + i/8] |= 1 << (i&7); - p[cbit_word + i/8] |= 1 << (i&7); - } - if (i == '_') p[cbit_word + i/8] |= 1 << (i&7); - if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7); - if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7); - if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7); - if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7); - if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7); - if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7); - } -p += cbit_length; - -/* Finally, the character type table */ - -for (i = 0; i < 256; i++) - { - int x = 0; - if (isspace(i)) x += ctype_space; - if (isalpha(i)) x += ctype_letter; - if (isdigit(i)) x += ctype_digit; - if (isxdigit(i)) x += ctype_xdigit; - if (isalnum(i) || i == '_') x += ctype_word; - if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta; - *p++ = x; - } - -return yield; -} - -/* End of maketables.c */ diff --git a/pcre/pcre-config b/pcre/pcre-config deleted file mode 100644 index ac9ccfe9..00000000 --- a/pcre/pcre-config +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/sh - -prefix=/usr/local -exec_prefix=${prefix} -exec_prefix_set=no - -usage="\ -Usage: pcre-config [--prefix] [--exec-prefix] [--version] [--libs] [--libs-posix] [--cflags] [--cflags-posix]" - -if test $# -eq 0; then - echo "${usage}" 1>&2 - exit 1 -fi - -while test $# -gt 0; do - case "$1" in - -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) optarg= ;; - esac - - case $1 in - --prefix=*) - prefix=$optarg - if test $exec_prefix_set = no ; then - exec_prefix=$optarg - fi - ;; - --prefix) - echo $prefix - ;; - --exec-prefix=*) - exec_prefix=$optarg - exec_prefix_set=yes - ;; - --exec-prefix) - echo $exec_prefix - ;; - --version) - echo 3.4 - ;; - --cflags | --cflags-posix) - if test ${prefix}/include != /usr/include ; then - includes=-I${prefix}/include - fi - echo $includes - ;; - --libs-posix) - echo -L${exec_prefix}/lib -lpcreposix -lpcre - ;; - --libs) - echo -L${exec_prefix}/lib -lpcre - ;; - *) - echo "${usage}" 1>&2 - exit 1 - ;; - esac - shift -done diff --git a/pcre/pcre-config.in b/pcre/pcre-config.in deleted file mode 100644 index 8daded9f..00000000 --- a/pcre/pcre-config.in +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/sh - -prefix=@prefix@ -exec_prefix=@exec_prefix@ -exec_prefix_set=no - -usage="\ -Usage: pcre-config [--prefix] [--exec-prefix] [--version] [--libs] [--libs-posix] [--cflags] [--cflags-posix]" - -if test $# -eq 0; then - echo "${usage}" 1>&2 - exit 1 -fi - -while test $# -gt 0; do - case "$1" in - -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) optarg= ;; - esac - - case $1 in - --prefix=*) - prefix=$optarg - if test $exec_prefix_set = no ; then - exec_prefix=$optarg - fi - ;; - --prefix) - echo $prefix - ;; - --exec-prefix=*) - exec_prefix=$optarg - exec_prefix_set=yes - ;; - --exec-prefix) - echo $exec_prefix - ;; - --version) - echo @PCRE_VERSION@ - ;; - --cflags | --cflags-posix) - if test @includedir@ != /usr/include ; then - includes=-I@includedir@ - fi - echo $includes - ;; - --libs-posix) - echo -L@libdir@ -lpcreposix -lpcre - ;; - --libs) - echo -L@libdir@ -lpcre - ;; - *) - echo "${usage}" 1>&2 - exit 1 - ;; - esac - shift -done diff --git a/pcre/pcre.c b/pcre/pcre.c deleted file mode 100644 index 9cd178e7..00000000 --- a/pcre/pcre.c +++ /dev/null @@ -1,5163 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* -This is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. See -the file Tech.Notes for some information on the internals. - -Written by: Philip Hazel - - Copyright (c) 1997-2000 University of Cambridge - ------------------------------------------------------------------------------ -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. ------------------------------------------------------------------------------ -*/ - - -/* Define DEBUG to get debugging output on stdout. */ - -/* #define DEBUG */ - -/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef -inline, and there are *still* stupid compilers about that don't like indented -pre-processor statements. I suppose it's only been 10 years... */ - -#ifdef DEBUG -#define DPRINTF(p) printf p -#else -#define DPRINTF(p) /*nothing*/ -#endif - -/* Include the internals header, which itself includes Standard C headers plus -the external pcre header. */ - -#include "internal.h" - - -/* Allow compilation as C++ source code, should anybody want to do that. */ - -#ifdef __cplusplus -#define class pcre_class -#endif - - -/* Number of items on the nested bracket stacks at compile time. This should -not be set greater than 200. */ - -#define BRASTACK_SIZE 200 - - -/* The number of bytes in a literal character string above which we can't add -any more is different when UTF-8 characters may be encountered. */ - -#ifdef SUPPORT_UTF8 -#define MAXLIT 250 -#else -#define MAXLIT 255 -#endif - - -/* Min and max values for the common repeats; for the maxima, 0 => infinity */ - -static const char rep_min[] = { 0, 0, 1, 1, 0, 0 }; -static const char rep_max[] = { 0, 0, 0, 0, 1, 1 }; - -/* Text forms of OP_ values and things, for debugging (not all used) */ - -#ifdef DEBUG -static const char *OP_names[] = { - "End", "\\A", "\\B", "\\b", "\\D", "\\d", - "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z", - "Opt", "^", "$", "Any", "chars", "not", - "*", "*?", "+", "+?", "?", "??", "{", "{", "{", - "*", "*?", "+", "+?", "?", "??", "{", "{", "{", - "*", "*?", "+", "+?", "?", "??", "{", "{", "{", - "*", "*?", "+", "+?", "?", "??", "{", "{", - "class", "Ref", "Recurse", - "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", - "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref", - "Brazero", "Braminzero", "Bra" -}; -#endif - -/* Table for handling escaped characters in the range '0'-'z'. Positive returns -are simple data values; negative values are for special things like \d and so -on. Zero means further processing is needed (for things like \x), or the escape -is invalid. */ - -static const short int escapes[] = { - 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ - 0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */ - '@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */ - 0, 0, 0, 0, 0, 0, 0, 0, /* H - O */ - 0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */ - 0, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */ - '`', 7, -ESC_b, 0, -ESC_d, 27, '\f', 0, /* ` - g */ - 0, 0, 0, 0, 0, 0, '\n', 0, /* h - o */ - 0, 0, '\r', -ESC_s, '\t', 0, 0, -ESC_w, /* p - w */ - 0, 0, -ESC_z /* x - z */ -}; - -/* Tables of names of POSIX character classes and their lengths. The list is -terminated by a zero length entry. The first three must be alpha, upper, lower, -as this is assumed for handling case independence. */ - -static const char *posix_names[] = { - "alpha", "lower", "upper", - "alnum", "ascii", "cntrl", "digit", "graph", - "print", "punct", "space", "word", "xdigit" }; - -static const uschar posix_name_lengths[] = { - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; - -/* Table of class bit maps for each POSIX class; up to three may be combined -to form the class. */ - -static const int posix_class_maps[] = { - cbit_lower, cbit_upper, -1, /* alpha */ - cbit_lower, -1, -1, /* lower */ - cbit_upper, -1, -1, /* upper */ - cbit_digit, cbit_lower, cbit_upper, /* alnum */ - cbit_print, cbit_cntrl, -1, /* ascii */ - cbit_cntrl, -1, -1, /* cntrl */ - cbit_digit, -1, -1, /* digit */ - cbit_graph, -1, -1, /* graph */ - cbit_print, -1, -1, /* print */ - cbit_punct, -1, -1, /* punct */ - cbit_space, -1, -1, /* space */ - cbit_word, -1, -1, /* word */ - cbit_xdigit,-1, -1 /* xdigit */ -}; - - -/* Definition to allow mutual recursion */ - -static BOOL - compile_regex(int, int, int *, uschar **, const uschar **, const char **, - BOOL, int, int *, int *, compile_data *); - -/* Structure for building a chain of data that actually lives on the -stack, for holding the values of the subject pointer at the start of each -subpattern, so as to detect when an empty string has been matched by a -subpattern - to break infinite loops. */ - -typedef struct eptrblock { - struct eptrblock *prev; - const uschar *saved_eptr; -} eptrblock; - -/* Flag bits for the match() function */ - -#define match_condassert 0x01 /* Called to check a condition assertion */ -#define match_isgroup 0x02 /* Set if start of bracketed group */ - - - -/************************************************* -* Global variables * -*************************************************/ - -/* PCRE is thread-clean and doesn't use any global variables in the normal -sense. However, it calls memory allocation and free functions via the two -indirections below, which are can be changed by the caller, but are shared -between all threads. */ - -void *(*pcre_malloc)(size_t) = malloc; -void (*pcre_free)(void *) = free; - - - -/************************************************* -* Macros and tables for character handling * -*************************************************/ - -/* When UTF-8 encoding is being used, a character is no longer just a single -byte. The macros for character handling generate simple sequences when used in -byte-mode, and more complicated ones for UTF-8 characters. */ - -#ifndef SUPPORT_UTF8 -#define GETCHARINC(c, eptr) c = *eptr++; -#define GETCHARLEN(c, eptr, len) c = *eptr; -#define BACKCHAR(eptr) - -#else /* SUPPORT_UTF8 */ - -/* Get the next UTF-8 character, advancing the pointer */ - -#define GETCHARINC(c, eptr) \ - c = *eptr++; \ - if (md->utf8 && (c & 0xc0) == 0xc0) \ - { \ - int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ - int s = 6 - a; /* Amount to shift next byte */ \ - c &= utf8_table3[a]; /* Low order bits from first byte */ \ - while (a-- > 0) \ - { \ - c |= (*eptr++ & 0x3f) << s; \ - s += 6; \ - } \ - } - -/* Get the next UTF-8 character, not advancing the pointer, setting length */ - -#define GETCHARLEN(c, eptr, len) \ - c = *eptr; \ - len = 1; \ - if (md->utf8 && (c & 0xc0) == 0xc0) \ - { \ - int i; \ - int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ - int s = 6 - a; /* Amount to shift next byte */ \ - c &= utf8_table3[a]; /* Low order bits from first byte */ \ - for (i = 1; i <= a; i++) \ - { \ - c |= (eptr[i] & 0x3f) << s; \ - s += 6; \ - } \ - len += a; \ - } - -/* If the pointer is not at the start of a character, move it back until -it is. */ - -#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--; - -#endif - - - -/************************************************* -* Default character tables * -*************************************************/ - -/* A default set of character tables is included in the PCRE binary. Its source -is built by the maketables auxiliary program, which uses the default C ctypes -functions, and put in the file chartables.c. These tables are used by PCRE -whenever the caller of pcre_compile() does not provide an alternate set of -tables. */ - -#include "chartables.c" - - - -#ifdef SUPPORT_UTF8 -/************************************************* -* Tables for UTF-8 support * -*************************************************/ - -/* These are the breakpoints for different numbers of bytes in a UTF-8 -character. */ - -static int utf8_table1[] = { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; - -/* These are the indicator bits and the mask for the data bits to set in the -first byte of a character, indexed by the number of additional bytes. */ - -static int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; -static int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; - -/* Table of the number of extra characters, indexed by the first character -masked with 0x3f. The highest number for a valid UTF-8 character is in fact -0x3d. */ - -static uschar utf8_table4[] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; - - -/************************************************* -* Convert character value to UTF-8 * -*************************************************/ - -/* This function takes an integer value in the range 0 - 0x7fffffff -and encodes it as a UTF-8 character in 0 to 6 bytes. - -Arguments: - cvalue the character value - buffer pointer to buffer for result - at least 6 bytes long - -Returns: number of characters placed in the buffer -*/ - -static int -ord2utf8(int cvalue, uschar *buffer) -{ -register int i, j; -for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) - if (cvalue <= utf8_table1[i]) break; -*buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]); -cvalue >>= 6 - i; -for (j = 0; j < i; j++) - { - *buffer++ = 0x80 | (cvalue & 0x3f); - cvalue >>= 6; - } -return i + 1; -} -#endif - - - -/************************************************* -* Return version string * -*************************************************/ - -#define STRING(a) # a -#define XSTRING(s) STRING(s) - -const char * -pcre_version(void) -{ -return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE); -} - - - - -/************************************************* -* (Obsolete) Return info about compiled pattern * -*************************************************/ - -/* This is the original "info" function. It picks potentially useful data out -of the private structure, but its interface was too rigid. It remains for -backwards compatibility. The public options are passed back in an int - though -the re->options field has been expanded to a long int, all the public options -at the low end of it, and so even on 16-bit systems this will still be OK. -Therefore, I haven't changed the API for pcre_info(). - -Arguments: - external_re points to compiled code - optptr where to pass back the options - first_char where to pass back the first character, - or -1 if multiline and all branches start ^, - or -2 otherwise - -Returns: number of capturing subpatterns - or negative values on error -*/ - -int -pcre_info(const pcre *external_re, int *optptr, int *first_char) -{ -const real_pcre *re = (const real_pcre *)external_re; -if (re == NULL) return PCRE_ERROR_NULL; -if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; -if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS); -if (first_char != NULL) - *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char : - ((re->options & PCRE_STARTLINE) != 0)? -1 : -2; -return re->top_bracket; -} - - - -/************************************************* -* Return info about compiled pattern * -*************************************************/ - -/* This is a newer "info" function which has an extensible interface so -that additional items can be added compatibly. - -Arguments: - external_re points to compiled code - external_study points to study data, or NULL - what what information is required - where where to put the information - -Returns: 0 if data returned, negative on error -*/ - -int -pcre_fullinfo(const pcre *external_re, const pcre_extra *study_data, int what, - void *where) -{ -const real_pcre *re = (const real_pcre *)external_re; -const real_pcre_extra *study = (const real_pcre_extra *)study_data; - -if (re == NULL || where == NULL) return PCRE_ERROR_NULL; -if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; - -switch (what) - { - case PCRE_INFO_OPTIONS: - *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS; - break; - - case PCRE_INFO_SIZE: - *((size_t *)where) = re->size; - break; - - case PCRE_INFO_CAPTURECOUNT: - *((int *)where) = re->top_bracket; - break; - - case PCRE_INFO_BACKREFMAX: - *((int *)where) = re->top_backref; - break; - - case PCRE_INFO_FIRSTCHAR: - *((int *)where) = - ((re->options & PCRE_FIRSTSET) != 0)? re->first_char : - ((re->options & PCRE_STARTLINE) != 0)? -1 : -2; - break; - - case PCRE_INFO_FIRSTTABLE: - *((const uschar **)where) = - (study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)? - study->start_bits : NULL; - break; - - case PCRE_INFO_LASTLITERAL: - *((int *)where) = - ((re->options & PCRE_REQCHSET) != 0)? re->req_char : -1; - break; - - default: return PCRE_ERROR_BADOPTION; - } - -return 0; -} - - - -#ifdef DEBUG -/************************************************* -* Debugging function to print chars * -*************************************************/ - -/* Print a sequence of chars in printable format, stopping at the end of the -subject if the requested. - -Arguments: - p points to characters - length number to print - is_subject TRUE if printing from within md->start_subject - md pointer to matching data block, if is_subject is TRUE - -Returns: nothing -*/ - -static void -pchars(const uschar *p, int length, BOOL is_subject, match_data *md) -{ -int c; -if (is_subject && length > md->end_subject - p) length = md->end_subject - p; -while (length-- > 0) - if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c); -} -#endif - - - - -/************************************************* -* Handle escapes * -*************************************************/ - -/* This function is called when a \ has been encountered. It either returns a -positive value for a simple escape such as \n, or a negative value which -encodes one of the more complicated things such as \d. When UTF-8 is enabled, -a positive value greater than 255 may be returned. On entry, ptr is pointing at -the \. On exit, it is on the final character of the escape sequence. - -Arguments: - ptrptr points to the pattern position pointer - errorptr points to the pointer to the error message - bracount number of previous extracting brackets - options the options bits - isclass TRUE if inside a character class - cd pointer to char tables block - -Returns: zero or positive => a data character - negative => a special escape sequence - on error, errorptr is set -*/ - -static int -check_escape(const uschar **ptrptr, const char **errorptr, int bracount, - int options, BOOL isclass, compile_data *cd) -{ -const uschar *ptr = *ptrptr; -int c, i; - -/* If backslash is at the end of the pattern, it's an error. */ - -c = *(++ptr); -if (c == 0) *errorptr = ERR1; - -/* Digits or letters may have special meaning; all others are literals. */ - -else if (c < '0' || c > 'z') {} - -/* Do an initial lookup in a table. A non-zero result is something that can be -returned immediately. Otherwise further processing may be required. */ - -else if ((i = escapes[c - '0']) != 0) c = i; - -/* Escapes that need further processing, or are illegal. */ - -else - { - const uschar *oldptr; - switch (c) - { - /* The handling of escape sequences consisting of a string of digits - starting with one that is not zero is not straightforward. By experiment, - the way Perl works seems to be as follows: - - Outside a character class, the digits are read as a decimal number. If the - number is less than 10, or if there are that many previous extracting - left brackets, then it is a back reference. Otherwise, up to three octal - digits are read to form an escaped byte. Thus \123 is likely to be octal - 123 (cf \0123, which is octal 012 followed by the literal 3). If the octal - value is greater than 377, the least significant 8 bits are taken. Inside a - character class, \ followed by a digit is always an octal number. */ - - case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - - if (!isclass) - { - oldptr = ptr; - c -= '0'; - while ((cd->ctypes[ptr[1]] & ctype_digit) != 0) - c = c * 10 + *(++ptr) - '0'; - if (c < 10 || c <= bracount) - { - c = -(ESC_REF + c); - break; - } - ptr = oldptr; /* Put the pointer back and fall through */ - } - - /* Handle an octal number following \. If the first digit is 8 or 9, Perl - generates a binary zero byte and treats the digit as a following literal. - Thus we have to pull back the pointer by one. */ - - if ((c = *ptr) >= '8') - { - ptr--; - c = 0; - break; - } - - /* \0 always starts an octal number, but we may drop through to here with a - larger first octal digit. */ - - case '0': - c -= '0'; - while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 && - ptr[1] != '8' && ptr[1] != '9') - c = c * 8 + *(++ptr) - '0'; - c &= 255; /* Take least significant 8 bits */ - break; - - /* \x is complicated when UTF-8 is enabled. \x{ddd} is a character number - which can be greater than 0xff, but only if the ddd are hex digits. */ - - case 'x': -#ifdef SUPPORT_UTF8 - if (ptr[1] == '{' && (options & PCRE_UTF8) != 0) - { - const uschar *pt = ptr + 2; - register int count = 0; - c = 0; - while ((cd->ctypes[*pt] & ctype_xdigit) != 0) - { - count++; - c = c * 16 + cd->lcc[*pt] - - (((cd->ctypes[*pt] & ctype_digit) != 0)? '0' : 'W'); - pt++; - } - if (*pt == '}') - { - if (c < 0 || count > 8) *errorptr = ERR34; - ptr = pt; - break; - } - /* If the sequence of hex digits does not end with '}', then we don't - recognize this construct; fall through to the normal \x handling. */ - } -#endif - - /* Read just a single hex char */ - - c = 0; - while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0) - { - ptr++; - c = c * 16 + cd->lcc[*ptr] - - (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W'); - } - break; - - /* Other special escapes not starting with a digit are straightforward */ - - case 'c': - c = *(++ptr); - if (c == 0) - { - *errorptr = ERR2; - return 0; - } - - /* A letter is upper-cased; then the 0x40 bit is flipped */ - - if (c >= 'a' && c <= 'z') c = cd->fcc[c]; - c ^= 0x40; - break; - - /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any - other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, - for Perl compatibility, it is a literal. This code looks a bit odd, but - there used to be some cases other than the default, and there may be again - in future, so I haven't "optimized" it. */ - - default: - if ((options & PCRE_EXTRA) != 0) switch(c) - { - default: - *errorptr = ERR3; - break; - } - break; - } - } - -*ptrptr = ptr; -return c; -} - - - -/************************************************* -* Check for counted repeat * -*************************************************/ - -/* This function is called when a '{' is encountered in a place where it might -start a quantifier. It looks ahead to see if it really is a quantifier or not. -It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd} -where the ddds are digits. - -Arguments: - p pointer to the first char after '{' - cd pointer to char tables block - -Returns: TRUE or FALSE -*/ - -static BOOL -is_counted_repeat(const uschar *p, compile_data *cd) -{ -if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE; -while ((cd->ctypes[*p] & ctype_digit) != 0) p++; -if (*p == '}') return TRUE; - -if (*p++ != ',') return FALSE; -if (*p == '}') return TRUE; - -if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE; -while ((cd->ctypes[*p] & ctype_digit) != 0) p++; -return (*p == '}'); -} - - - -/************************************************* -* Read repeat counts * -*************************************************/ - -/* Read an item of the form {n,m} and return the values. This is called only -after is_counted_repeat() has confirmed that a repeat-count quantifier exists, -so the syntax is guaranteed to be correct, but we need to check the values. - -Arguments: - p pointer to first char after '{' - minp pointer to int for min - maxp pointer to int for max - returned as -1 if no max - errorptr points to pointer to error message - cd pointer to character tables clock - -Returns: pointer to '}' on success; - current ptr on error, with errorptr set -*/ - -static const uschar * -read_repeat_counts(const uschar *p, int *minp, int *maxp, - const char **errorptr, compile_data *cd) -{ -int min = 0; -int max = -1; - -while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0'; - -if (*p == '}') max = min; else - { - if (*(++p) != '}') - { - max = 0; - while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; - if (max < min) - { - *errorptr = ERR4; - return p; - } - } - } - -/* Do paranoid checks, then fill in the required variables, and pass back the -pointer to the terminating '}'. */ - -if (min < 0 || min > 65535 || max < -1 || max > 65535) - *errorptr = ERR5; -else - { - *minp = min; - *maxp = max; - } -return p; -} - - - -/************************************************* -* Find the fixed length of a pattern * -*************************************************/ - -/* Scan a pattern and compute the fixed length of subject that will match it, -if the length is fixed. This is needed for dealing with backward assertions. - -Arguments: - code points to the start of the pattern (the bracket) - options the compiling options - -Returns: the fixed length, or -1 if there is no fixed length -*/ - -static int -find_fixedlength(uschar *code, int options) -{ -int length = -1; - -register int branchlength = 0; -register uschar *cc = code + 3; - -/* Scan along the opcodes for this branch. If we get to the end of the -branch, check the length against that of the other branches. */ - -for (;;) - { - int d; - register int op = *cc; - if (op >= OP_BRA) op = OP_BRA; - - switch (op) - { - case OP_BRA: - case OP_ONCE: - case OP_COND: - d = find_fixedlength(cc, options); - if (d < 0) return -1; - branchlength += d; - do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT); - cc += 3; - break; - - /* Reached end of a branch; if it's a ket it is the end of a nested - call. If it's ALT it is an alternation in a nested call. If it is - END it's the end of the outer call. All can be handled by the same code. */ - - case OP_ALT: - case OP_KET: - case OP_KETRMAX: - case OP_KETRMIN: - case OP_END: - if (length < 0) length = branchlength; - else if (length != branchlength) return -1; - if (*cc != OP_ALT) return length; - cc += 3; - branchlength = 0; - break; - - /* Skip over assertive subpatterns */ - - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT); - cc += 3; - break; - - /* Skip over things that don't match chars */ - - case OP_REVERSE: - cc++; - /* Fall through */ - - case OP_CREF: - case OP_OPT: - cc++; - /* Fall through */ - - case OP_SOD: - case OP_EOD: - case OP_EODN: - case OP_CIRC: - case OP_DOLL: - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - cc++; - break; - - /* Handle char strings. In UTF-8 mode we must count characters, not bytes. - This requires a scan of the string, unfortunately. We assume valid UTF-8 - strings, so all we do is reduce the length by one for byte whose bits are - 10xxxxxx. */ - - case OP_CHARS: - branchlength += *(++cc); -#ifdef SUPPORT_UTF8 - for (d = 1; d <= *cc; d++) - if ((cc[d] & 0xc0) == 0x80) branchlength--; -#endif - cc += *cc + 1; - break; - - /* Handle exact repetitions */ - - case OP_EXACT: - case OP_TYPEEXACT: - branchlength += (cc[1] << 8) + cc[2]; - cc += 4; - break; - - /* Handle single-char matchers */ - - case OP_NOT_DIGIT: - case OP_DIGIT: - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - case OP_ANY: - branchlength++; - cc++; - break; - - - /* Check a class for variable quantification */ - - case OP_CLASS: - cc += (*cc == OP_REF)? 2 : 33; - - switch (*cc) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRQUERY: - case OP_CRMINQUERY: - return -1; - - case OP_CRRANGE: - case OP_CRMINRANGE: - if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4]) return -1; - branchlength += (cc[1] << 8) + cc[2]; - cc += 5; - break; - - default: - branchlength++; - } - break; - - /* Anything else is variable length */ - - default: - return -1; - } - } -/* Control never gets here */ -} - - - - -/************************************************* -* Check for POSIX class syntax * -*************************************************/ - -/* This function is called when the sequence "[:" or "[." or "[=" is -encountered in a character class. It checks whether this is followed by an -optional ^ and then a sequence of letters, terminated by a matching ":]" or -".]" or "=]". - -Argument: - ptr pointer to the initial [ - endptr where to return the end pointer - cd pointer to compile data - -Returns: TRUE or FALSE -*/ - -static BOOL -check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd) -{ -int terminator; /* Don't combine these lines; the Solaris cc */ -terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ -if (*(++ptr) == '^') ptr++; -while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++; -if (*ptr == terminator && ptr[1] == ']') - { - *endptr = ptr; - return TRUE; - } -return FALSE; -} - - - - -/************************************************* -* Check POSIX class name * -*************************************************/ - -/* This function is called to check the name given in a POSIX-style class entry -such as [:alnum:]. - -Arguments: - ptr points to the first letter - len the length of the name - -Returns: a value representing the name, or -1 if unknown -*/ - -static int -check_posix_name(const uschar *ptr, int len) -{ -register int yield = 0; -while (posix_name_lengths[yield] != 0) - { - if (len == posix_name_lengths[yield] && - strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield; - yield++; - } -return -1; -} - - - - -/************************************************* -* Compile one branch * -*************************************************/ - -/* Scan the pattern, compiling it into the code vector. - -Arguments: - options the option bits - brackets points to number of brackets used - code points to the pointer to the current code point - ptrptr points to the current pattern pointer - errorptr points to pointer to error message - optchanged set to the value of the last OP_OPT item compiled - reqchar set to the last literal character required, else -1 - countlits set to count of mandatory literal characters - cd contains pointers to tables - -Returns: TRUE on success - FALSE, with *errorptr set on error -*/ - -static BOOL -compile_branch(int options, int *brackets, uschar **codeptr, - const uschar **ptrptr, const char **errorptr, int *optchanged, - int *reqchar, int *countlits, compile_data *cd) -{ -int repeat_type, op_type; -int repeat_min, repeat_max; -int bravalue, length; -int greedy_default, greedy_non_default; -int prevreqchar; -int condcount = 0; -int subcountlits = 0; -register int c; -register uschar *code = *codeptr; -uschar *tempcode; -const uschar *ptr = *ptrptr; -const uschar *tempptr; -uschar *previous = NULL; -uschar class[32]; - -/* Set up the default and non-default settings for greediness */ - -greedy_default = ((options & PCRE_UNGREEDY) != 0); -greedy_non_default = greedy_default ^ 1; - -/* Initialize no required char, and count of literals */ - -*reqchar = prevreqchar = -1; -*countlits = 0; - -/* Switch on next character until the end of the branch */ - -for (;; ptr++) - { - BOOL negate_class; - int class_charcount; - int class_lastchar; - int newoptions; - int condref; - int subreqchar; - - c = *ptr; - if ((options & PCRE_EXTENDED) != 0) - { - if ((cd->ctypes[c] & ctype_space) != 0) continue; - if (c == '#') - { - /* The space before the ; is to avoid a warning on a silly compiler - on the Macintosh. */ - while ((c = *(++ptr)) != 0 && c != '\n') ; - continue; - } - } - - switch(c) - { - /* The branch terminates at end of string, |, or ). */ - - case 0: - case '|': - case ')': - *codeptr = code; - *ptrptr = ptr; - return TRUE; - - /* Handle single-character metacharacters */ - - case '^': - previous = NULL; - *code++ = OP_CIRC; - break; - - case '$': - previous = NULL; - *code++ = OP_DOLL; - break; - - case '.': - previous = code; - *code++ = OP_ANY; - break; - - /* Character classes. These always build a 32-byte bitmap of the permitted - characters, except in the special case where there is only one character. - For negated classes, we build the map as usual, then invert it at the end. - */ - - case '[': - previous = code; - *code++ = OP_CLASS; - - /* If the first character is '^', set the negation flag and skip it. */ - - if ((c = *(++ptr)) == '^') - { - negate_class = TRUE; - c = *(++ptr); - } - else negate_class = FALSE; - - /* Keep a count of chars so that we can optimize the case of just a single - character. */ - - class_charcount = 0; - class_lastchar = -1; - - /* Initialize the 32-char bit map to all zeros. We have to build the - map in a temporary bit of store, in case the class contains only 1 - character, because in that case the compiled code doesn't use the - bit map. */ - - memset(class, 0, 32 * sizeof(uschar)); - - /* Process characters until ] is reached. By writing this as a "do" it - means that an initial ] is taken as a data character. */ - - do - { - if (c == 0) - { - *errorptr = ERR6; - goto FAILED; - } - - /* Handle POSIX class names. Perl allows a negation extension of the - form [:^name]. A square bracket that doesn't match the syntax is - treated as a literal. We also recognize the POSIX constructions - [.ch.] and [=ch=] ("collating elements") and fault them, as Perl - 5.6 does. */ - - if (c == '[' && - (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') && - check_posix_syntax(ptr, &tempptr, cd)) - { - BOOL local_negate = FALSE; - int posix_class, i; - register const uschar *cbits = cd->cbits; - - if (ptr[1] != ':') - { - *errorptr = ERR31; - goto FAILED; - } - - ptr += 2; - if (*ptr == '^') - { - local_negate = TRUE; - ptr++; - } - - posix_class = check_posix_name(ptr, tempptr - ptr); - if (posix_class < 0) - { - *errorptr = ERR30; - goto FAILED; - } - - /* If matching is caseless, upper and lower are converted to - alpha. This relies on the fact that the class table starts with - alpha, lower, upper as the first 3 entries. */ - - if ((options & PCRE_CASELESS) != 0 && posix_class <= 2) - posix_class = 0; - - /* Or into the map we are building up to 3 of the static class - tables, or their negations. */ - - posix_class *= 3; - for (i = 0; i < 3; i++) - { - int taboffset = posix_class_maps[posix_class + i]; - if (taboffset < 0) break; - if (local_negate) - for (c = 0; c < 32; c++) class[c] |= ~cbits[c+taboffset]; - else - for (c = 0; c < 32; c++) class[c] |= cbits[c+taboffset]; - } - - ptr = tempptr + 1; - class_charcount = 10; /* Set > 1; assumes more than 1 per class */ - continue; - } - - /* Backslash may introduce a single character, or it may introduce one - of the specials, which just set a flag. Escaped items are checked for - validity in the pre-compiling pass. The sequence \b is a special case. - Inside a class (and only there) it is treated as backspace. Elsewhere - it marks a word boundary. Other escapes have preset maps ready to - or into the one we are building. We assume they have more than one - character in them, so set class_count bigger than one. */ - - if (c == '\\') - { - c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); - if (-c == ESC_b) c = '\b'; - else if (c < 0) - { - register const uschar *cbits = cd->cbits; - class_charcount = 10; - switch (-c) - { - case ESC_d: - for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit]; - continue; - - case ESC_D: - for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit]; - continue; - - case ESC_w: - for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_word]; - continue; - - case ESC_W: - for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_word]; - continue; - - case ESC_s: - for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space]; - continue; - - case ESC_S: - for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space]; - continue; - - default: - *errorptr = ERR7; - goto FAILED; - } - } - - /* Fall through if single character, but don't at present allow - chars > 255 in UTF-8 mode. */ - -#ifdef SUPPORT_UTF8 - if (c > 255) - { - *errorptr = ERR33; - goto FAILED; - } -#endif - } - - /* A single character may be followed by '-' to form a range. However, - Perl does not permit ']' to be the end of the range. A '-' character - here is treated as a literal. */ - - if (ptr[1] == '-' && ptr[2] != ']') - { - int d; - ptr += 2; - d = *ptr; - - if (d == 0) - { - *errorptr = ERR6; - goto FAILED; - } - - /* The second part of a range can be a single-character escape, but - not any of the other escapes. Perl 5.6 treats a hyphen as a literal - in such circumstances. */ - - if (d == '\\') - { - const uschar *oldptr = ptr; - d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); - -#ifdef SUPPORT_UTF8 - if (d > 255) - { - *errorptr = ERR33; - goto FAILED; - } -#endif - /* \b is backslash; any other special means the '-' was literal */ - - if (d < 0) - { - if (d == -ESC_b) d = '\b'; else - { - ptr = oldptr - 2; - goto SINGLE_CHARACTER; /* A few lines below */ - } - } - } - - if (d < c) - { - *errorptr = ERR8; - goto FAILED; - } - - for (; c <= d; c++) - { - class[c/8] |= (1 << (c&7)); - if ((options & PCRE_CASELESS) != 0) - { - int uc = cd->fcc[c]; /* flip case */ - class[uc/8] |= (1 << (uc&7)); - } - class_charcount++; /* in case a one-char range */ - class_lastchar = c; - } - continue; /* Go get the next char in the class */ - } - - /* Handle a lone single character - we can get here for a normal - non-escape char, or after \ that introduces a single character. */ - - SINGLE_CHARACTER: - - class [c/8] |= (1 << (c&7)); - if ((options & PCRE_CASELESS) != 0) - { - c = cd->fcc[c]; /* flip case */ - class[c/8] |= (1 << (c&7)); - } - class_charcount++; - class_lastchar = c; - } - - /* Loop until ']' reached; the check for end of string happens inside the - loop. This "while" is the end of the "do" above. */ - - while ((c = *(++ptr)) != ']'); - - /* If class_charcount is 1 and class_lastchar is not negative, we saw - precisely one character. This doesn't need the whole 32-byte bit map. - We turn it into a 1-character OP_CHAR if it's positive, or OP_NOT if - it's negative. */ - - if (class_charcount == 1 && class_lastchar >= 0) - { - if (negate_class) - { - code[-1] = OP_NOT; - } - else - { - code[-1] = OP_CHARS; - *code++ = 1; - } - *code++ = class_lastchar; - } - - /* Otherwise, negate the 32-byte map if necessary, and copy it into - the code vector. */ - - else - { - if (negate_class) - for (c = 0; c < 32; c++) code[c] = ~class[c]; - else - memcpy(code, class, 32); - code += 32; - } - break; - - /* Various kinds of repeat */ - - case '{': - if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR; - ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd); - if (*errorptr != NULL) goto FAILED; - goto REPEAT; - - case '*': - repeat_min = 0; - repeat_max = -1; - goto REPEAT; - - case '+': - repeat_min = 1; - repeat_max = -1; - goto REPEAT; - - case '?': - repeat_min = 0; - repeat_max = 1; - - REPEAT: - if (previous == NULL) - { - *errorptr = ERR9; - goto FAILED; - } - - /* If the next character is '?' this is a minimizing repeat, by default, - but if PCRE_UNGREEDY is set, it works the other way round. Advance to the - next character. */ - - if (ptr[1] == '?') - { repeat_type = greedy_non_default; ptr++; } - else repeat_type = greedy_default; - - /* If previous was a string of characters, chop off the last one and use it - as the subject of the repeat. If there was only one character, we can - abolish the previous item altogether. A repeat with a zero minimum wipes - out any reqchar setting, backing up to the previous value. We must also - adjust the countlits value. */ - - if (*previous == OP_CHARS) - { - int len = previous[1]; - - if (repeat_min == 0) *reqchar = prevreqchar; - *countlits += repeat_min - 1; - - if (len == 1) - { - c = previous[2]; - code = previous; - } - else - { - c = previous[len+1]; - previous[1]--; - code--; - } - op_type = 0; /* Use single-char op codes */ - goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ - } - - /* If previous was a single negated character ([^a] or similar), we use - one of the special opcodes, replacing it. The code is shared with single- - character repeats by adding a suitable offset into repeat_type. */ - - else if ((int)*previous == OP_NOT) - { - op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */ - c = previous[1]; - code = previous; - goto OUTPUT_SINGLE_REPEAT; - } - - /* If previous was a character type match (\d or similar), abolish it and - create a suitable repeat item. The code is shared with single-character - repeats by adding a suitable offset into repeat_type. */ - - else if ((int)*previous < OP_EODN || *previous == OP_ANY) - { - op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ - c = *previous; - code = previous; - - OUTPUT_SINGLE_REPEAT: - - /* If the maximum is zero then the minimum must also be zero; Perl allows - this case, so we do too - by simply omitting the item altogether. */ - - if (repeat_max == 0) goto END_REPEAT; - - /* Combine the op_type with the repeat_type */ - - repeat_type += op_type; - - /* A minimum of zero is handled either as the special case * or ?, or as - an UPTO, with the maximum given. */ - - if (repeat_min == 0) - { - if (repeat_max == -1) *code++ = OP_STAR + repeat_type; - else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type; - else - { - *code++ = OP_UPTO + repeat_type; - *code++ = repeat_max >> 8; - *code++ = (repeat_max & 255); - } - } - - /* The case {1,} is handled as the special case + */ - - else if (repeat_min == 1 && repeat_max == -1) - *code++ = OP_PLUS + repeat_type; - - /* The case {n,n} is just an EXACT, while the general case {n,m} is - handled as an EXACT followed by an UPTO. An EXACT of 1 is optimized. */ - - else - { - if (repeat_min != 1) - { - *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */ - *code++ = repeat_min >> 8; - *code++ = (repeat_min & 255); - } - - /* If the mininum is 1 and the previous item was a character string, - we either have to put back the item that got cancelled if the string - length was 1, or add the character back onto the end of a longer - string. For a character type nothing need be done; it will just get - put back naturally. Note that the final character is always going to - get added below. */ - - else if (*previous == OP_CHARS) - { - if (code == previous) code += 2; else previous[1]++; - } - - /* For a single negated character we also have to put back the - item that got cancelled. */ - - else if (*previous == OP_NOT) code++; - - /* If the maximum is unlimited, insert an OP_STAR. */ - - if (repeat_max < 0) - { - *code++ = c; - *code++ = OP_STAR + repeat_type; - } - - /* Else insert an UPTO if the max is greater than the min. */ - - else if (repeat_max != repeat_min) - { - *code++ = c; - repeat_max -= repeat_min; - *code++ = OP_UPTO + repeat_type; - *code++ = repeat_max >> 8; - *code++ = (repeat_max & 255); - } - } - - /* The character or character type itself comes last in all cases. */ - - *code++ = c; - } - - /* If previous was a character class or a back reference, we put the repeat - stuff after it, but just skip the item if the repeat was {0,0}. */ - - else if (*previous == OP_CLASS || *previous == OP_REF) - { - if (repeat_max == 0) - { - code = previous; - goto END_REPEAT; - } - if (repeat_min == 0 && repeat_max == -1) - *code++ = OP_CRSTAR + repeat_type; - else if (repeat_min == 1 && repeat_max == -1) - *code++ = OP_CRPLUS + repeat_type; - else if (repeat_min == 0 && repeat_max == 1) - *code++ = OP_CRQUERY + repeat_type; - else - { - *code++ = OP_CRRANGE + repeat_type; - *code++ = repeat_min >> 8; - *code++ = repeat_min & 255; - if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */ - *code++ = repeat_max >> 8; - *code++ = repeat_max & 255; - } - } - - /* If previous was a bracket group, we may have to replicate it in certain - cases. */ - - else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE || - (int)*previous == OP_COND) - { - register int i; - int ketoffset = 0; - int len = code - previous; - uschar *bralink = NULL; - - /* If the maximum repeat count is unlimited, find the end of the bracket - by scanning through from the start, and compute the offset back to it - from the current code pointer. There may be an OP_OPT setting following - the final KET, so we can't find the end just by going back from the code - pointer. */ - - if (repeat_max == -1) - { - register uschar *ket = previous; - do ket += (ket[1] << 8) + ket[2]; while (*ket != OP_KET); - ketoffset = code - ket; - } - - /* The case of a zero minimum is special because of the need to stick - OP_BRAZERO in front of it, and because the group appears once in the - data, whereas in other cases it appears the minimum number of times. For - this reason, it is simplest to treat this case separately, as otherwise - the code gets far too mess. There are several special subcases when the - minimum is zero. */ - - if (repeat_min == 0) - { - /* If we set up a required char from the bracket, we must back off - to the previous value and reset the countlits value too. */ - - if (subcountlits > 0) - { - *reqchar = prevreqchar; - *countlits -= subcountlits; - } - - /* If the maximum is also zero, we just omit the group from the output - altogether. */ - - if (repeat_max == 0) - { - code = previous; - goto END_REPEAT; - } - - /* If the maximum is 1 or unlimited, we just have to stick in the - BRAZERO and do no more at this point. */ - - if (repeat_max <= 1) - { - memmove(previous+1, previous, len); - code++; - *previous++ = OP_BRAZERO + repeat_type; - } - - /* If the maximum is greater than 1 and limited, we have to replicate - in a nested fashion, sticking OP_BRAZERO before each set of brackets. - The first one has to be handled carefully because it's the original - copy, which has to be moved up. The remainder can be handled by code - that is common with the non-zero minimum case below. We just have to - adjust the value or repeat_max, since one less copy is required. */ - - else - { - int offset; - memmove(previous+4, previous, len); - code += 4; - *previous++ = OP_BRAZERO + repeat_type; - *previous++ = OP_BRA; - - /* We chain together the bracket offset fields that have to be - filled in later when the ends of the brackets are reached. */ - - offset = (bralink == NULL)? 0 : previous - bralink; - bralink = previous; - *previous++ = offset >> 8; - *previous++ = offset & 255; - } - - repeat_max--; - } - - /* If the minimum is greater than zero, replicate the group as many - times as necessary, and adjust the maximum to the number of subsequent - copies that we need. */ - - else - { - for (i = 1; i < repeat_min; i++) - { - memcpy(code, previous, len); - code += len; - } - if (repeat_max > 0) repeat_max -= repeat_min; - } - - /* This code is common to both the zero and non-zero minimum cases. If - the maximum is limited, it replicates the group in a nested fashion, - remembering the bracket starts on a stack. In the case of a zero minimum, - the first one was set up above. In all cases the repeat_max now specifies - the number of additional copies needed. */ - - if (repeat_max >= 0) - { - for (i = repeat_max - 1; i >= 0; i--) - { - *code++ = OP_BRAZERO + repeat_type; - - /* All but the final copy start a new nesting, maintaining the - chain of brackets outstanding. */ - - if (i != 0) - { - int offset; - *code++ = OP_BRA; - offset = (bralink == NULL)? 0 : code - bralink; - bralink = code; - *code++ = offset >> 8; - *code++ = offset & 255; - } - - memcpy(code, previous, len); - code += len; - } - - /* Now chain through the pending brackets, and fill in their length - fields (which are holding the chain links pro tem). */ - - while (bralink != NULL) - { - int oldlinkoffset; - int offset = code - bralink + 1; - uschar *bra = code - offset; - oldlinkoffset = (bra[1] << 8) + bra[2]; - bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; - *code++ = OP_KET; - *code++ = bra[1] = offset >> 8; - *code++ = bra[2] = (offset & 255); - } - } - - /* If the maximum is unlimited, set a repeater in the final copy. We - can't just offset backwards from the current code point, because we - don't know if there's been an options resetting after the ket. The - correct offset was computed above. */ - - else code[-ketoffset] = OP_KETRMAX + repeat_type; - } - - /* Else there's some kind of shambles */ - - else - { - *errorptr = ERR11; - goto FAILED; - } - - /* In all case we no longer have a previous item. */ - - END_REPEAT: - previous = NULL; - break; - - - /* Start of nested bracket sub-expression, or comment or lookahead or - lookbehind or option setting or condition. First deal with special things - that can come after a bracket; all are introduced by ?, and the appearance - of any of them means that this is not a referencing group. They were - checked for validity in the first pass over the string, so we don't have to - check for syntax errors here. */ - - case '(': - newoptions = options; - condref = -1; - - if (*(++ptr) == '?') - { - int set, unset; - int *optset; - - switch (*(++ptr)) - { - case '#': /* Comment; skip to ket */ - ptr++; - while (*ptr != ')') ptr++; - continue; - - case ':': /* Non-extracting bracket */ - bravalue = OP_BRA; - ptr++; - break; - - case '(': - bravalue = OP_COND; /* Conditional group */ - if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0) - { - condref = *ptr - '0'; - while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; - if (condref == 0) - { - *errorptr = ERR35; - goto FAILED; - } - ptr++; - } - else ptr--; - break; - - case '=': /* Positive lookahead */ - bravalue = OP_ASSERT; - ptr++; - break; - - case '!': /* Negative lookahead */ - bravalue = OP_ASSERT_NOT; - ptr++; - break; - - case '<': /* Lookbehinds */ - switch (*(++ptr)) - { - case '=': /* Positive lookbehind */ - bravalue = OP_ASSERTBACK; - ptr++; - break; - - case '!': /* Negative lookbehind */ - bravalue = OP_ASSERTBACK_NOT; - ptr++; - break; - - default: /* Syntax error */ - *errorptr = ERR24; - goto FAILED; - } - break; - - case '>': /* One-time brackets */ - bravalue = OP_ONCE; - ptr++; - break; - - case 'R': /* Pattern recursion */ - *code++ = OP_RECURSE; - ptr++; - continue; - - default: /* Option setting */ - set = unset = 0; - optset = &set; - - while (*ptr != ')' && *ptr != ':') - { - switch (*ptr++) - { - case '-': optset = &unset; break; - - case 'i': *optset |= PCRE_CASELESS; break; - case 'm': *optset |= PCRE_MULTILINE; break; - case 's': *optset |= PCRE_DOTALL; break; - case 'x': *optset |= PCRE_EXTENDED; break; - case 'U': *optset |= PCRE_UNGREEDY; break; - case 'X': *optset |= PCRE_EXTRA; break; - - default: - *errorptr = ERR12; - goto FAILED; - } - } - - /* Set up the changed option bits, but don't change anything yet. */ - - newoptions = (options | set) & (~unset); - - /* If the options ended with ')' this is not the start of a nested - group with option changes, so the options change at this level. At top - level there is nothing else to be done (the options will in fact have - been set from the start of compiling as a result of the first pass) but - at an inner level we must compile code to change the ims options if - necessary, and pass the new setting back so that it can be put at the - start of any following branches, and when this group ends, a resetting - item can be compiled. */ - - if (*ptr == ')') - { - if ((options & PCRE_INGROUP) != 0 && - (options & PCRE_IMS) != (newoptions & PCRE_IMS)) - { - *code++ = OP_OPT; - *code++ = *optchanged = newoptions & PCRE_IMS; - } - options = newoptions; /* Change options at this level */ - previous = NULL; /* This item can't be repeated */ - continue; /* It is complete */ - } - - /* If the options ended with ':' we are heading into a nested group - with possible change of options. Such groups are non-capturing and are - not assertions of any kind. All we need to do is skip over the ':'; - the newoptions value is handled below. */ - - bravalue = OP_BRA; - ptr++; - } - } - - /* Else we have a referencing group; adjust the opcode. */ - - else - { - if (++(*brackets) > EXTRACT_MAX) - { - *errorptr = ERR13; - goto FAILED; - } - bravalue = OP_BRA + *brackets; - } - - /* Process nested bracketed re. Assertions may not be repeated, but other - kinds can be. We copy code into a non-register variable in order to be able - to pass its address because some compilers complain otherwise. Pass in a - new setting for the ims options if they have changed. */ - - previous = (bravalue >= OP_ONCE)? code : NULL; - *code = bravalue; - tempcode = code; - - if (!compile_regex( - options | PCRE_INGROUP, /* Set for all nested groups */ - ((options & PCRE_IMS) != (newoptions & PCRE_IMS))? - newoptions & PCRE_IMS : -1, /* Pass ims options if changed */ - brackets, /* Bracket level */ - &tempcode, /* Where to put code (updated) */ - &ptr, /* Input pointer (updated) */ - errorptr, /* Where to put an error message */ - (bravalue == OP_ASSERTBACK || - bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ - condref, /* Condition reference number */ - &subreqchar, /* For possible last char */ - &subcountlits, /* For literal count */ - cd)) /* Tables block */ - goto FAILED; - - /* At the end of compiling, code is still pointing to the start of the - group, while tempcode has been updated to point past the end of the group - and any option resetting that may follow it. The pattern pointer (ptr) - is on the bracket. */ - - /* If this is a conditional bracket, check that there are no more than - two branches in the group. */ - - if (bravalue == OP_COND) - { - uschar *tc = code; - condcount = 0; - - do { - condcount++; - tc += (tc[1] << 8) | tc[2]; - } - while (*tc != OP_KET); - - if (condcount > 2) - { - *errorptr = ERR27; - goto FAILED; - } - } - - /* Handle updating of the required character. If the subpattern didn't - set one, leave it as it was. Otherwise, update it for normal brackets of - all kinds, forward assertions, and conditions with two branches. Don't - update the literal count for forward assertions, however. If the bracket - is followed by a quantifier with zero repeat, we have to back off. Hence - the definition of prevreqchar and subcountlits outside the main loop so - that they can be accessed for the back off. */ - - if (subreqchar > 0 && - (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_ASSERT || - (bravalue == OP_COND && condcount == 2))) - { - prevreqchar = *reqchar; - *reqchar = subreqchar; - if (bravalue != OP_ASSERT) *countlits += subcountlits; - } - - /* Now update the main code pointer to the end of the group. */ - - code = tempcode; - - /* Error if hit end of pattern */ - - if (*ptr != ')') - { - *errorptr = ERR14; - goto FAILED; - } - break; - - /* Check \ for being a real metacharacter; if not, fall through and handle - it as a data character at the start of a string. Escape items are checked - for validity in the pre-compiling pass. */ - - case '\\': - tempptr = ptr; - c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); - - /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values - are arranged to be the negation of the corresponding OP_values. For the - back references, the values are ESC_REF plus the reference number. Only - back references and those types that consume a character may be repeated. - We can test for values between ESC_b and ESC_Z for the latter; this may - have to change if any new ones are ever created. */ - - if (c < 0) - { - if (-c >= ESC_REF) - { - previous = code; - *code++ = OP_REF; - *code++ = -c - ESC_REF; - } - else - { - previous = (-c > ESC_b && -c < ESC_Z)? code : NULL; - *code++ = -c; - } - continue; - } - - /* Data character: reset and fall through */ - - ptr = tempptr; - c = '\\'; - - /* Handle a run of data characters until a metacharacter is encountered. - The first character is guaranteed not to be whitespace or # when the - extended flag is set. */ - - NORMAL_CHAR: - default: - previous = code; - *code = OP_CHARS; - code += 2; - length = 0; - - do - { - if ((options & PCRE_EXTENDED) != 0) - { - if ((cd->ctypes[c] & ctype_space) != 0) continue; - if (c == '#') - { - /* The space before the ; is to avoid a warning on a silly compiler - on the Macintosh. */ - while ((c = *(++ptr)) != 0 && c != '\n') ; - if (c == 0) break; - continue; - } - } - - /* Backslash may introduce a data char or a metacharacter. Escaped items - are checked for validity in the pre-compiling pass. Stop the string - before a metaitem. */ - - if (c == '\\') - { - tempptr = ptr; - c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); - if (c < 0) { ptr = tempptr; break; } - - /* If a character is > 127 in UTF-8 mode, we have to turn it into - two or more characters in the UTF-8 encoding. */ - -#ifdef SUPPORT_UTF8 - if (c > 127 && (options & PCRE_UTF8) != 0) - { - uschar buffer[8]; - int len = ord2utf8(c, buffer); - for (c = 0; c < len; c++) *code++ = buffer[c]; - length += len; - continue; - } -#endif - } - - /* Ordinary character or single-char escape */ - - *code++ = c; - length++; - } - - /* This "while" is the end of the "do" above. */ - - while (*ptr && length < MAXLIT && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0); - - /* Update the last character and the count of literals */ - - prevreqchar = (length > 1)? code[-2] : *reqchar; - *reqchar = code[-1]; - *countlits += length; - - /* Compute the length and set it in the data vector, and advance to - the next state. */ - - previous[1] = length; - if (length < MAXLIT) ptr--; - break; - } - } /* end of big loop */ - -/* Control never reaches here by falling through, only by a goto for all the -error states. Pass back the position in the pattern so that it can be displayed -to the user for diagnosing the error. */ - -FAILED: -*ptrptr = ptr; -return FALSE; -} - - - - -/************************************************* -* Compile sequence of alternatives * -*************************************************/ - -/* On entry, ptr is pointing past the bracket character, but on return -it points to the closing bracket, or vertical bar, or end of string. -The code variable is pointing at the byte into which the BRA operator has been -stored. If the ims options are changed at the start (for a (?ims: group) or -during any branch, we need to insert an OP_OPT item at the start of every -following branch to ensure they get set correctly at run time, and also pass -the new options into every subsequent branch compile. - -Argument: - options the option bits - optchanged new ims options to set as if (?ims) were at the start, or -1 - for no change - brackets -> int containing the number of extracting brackets used - codeptr -> the address of the current code pointer - ptrptr -> the address of the current pattern pointer - errorptr -> pointer to error message - lookbehind TRUE if this is a lookbehind assertion - condref >= 0 for OPT_CREF setting at start of conditional group - reqchar -> place to put the last required character, or a negative number - countlits -> place to put the shortest literal count of any branch - cd points to the data block with tables pointers - -Returns: TRUE on success -*/ - -static BOOL -compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, - const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref, - int *reqchar, int *countlits, compile_data *cd) -{ -const uschar *ptr = *ptrptr; -uschar *code = *codeptr; -uschar *last_branch = code; -uschar *start_bracket = code; -uschar *reverse_count = NULL; -int oldoptions = options & PCRE_IMS; -int branchreqchar, branchcountlits; - -*reqchar = -1; -*countlits = INT_MAX; -code += 3; - -/* At the start of a reference-based conditional group, insert the reference -number as an OP_CREF item. */ - -if (condref >= 0) - { - *code++ = OP_CREF; - *code++ = condref; - } - -/* Loop for each alternative branch */ - -for (;;) - { - int length; - - /* Handle change of options */ - - if (optchanged >= 0) - { - *code++ = OP_OPT; - *code++ = optchanged; - options = (options & ~PCRE_IMS) | optchanged; - } - - /* Set up dummy OP_REVERSE if lookbehind assertion */ - - if (lookbehind) - { - *code++ = OP_REVERSE; - reverse_count = code; - *code++ = 0; - *code++ = 0; - } - - /* Now compile the branch */ - - if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged, - &branchreqchar, &branchcountlits, cd)) - { - *ptrptr = ptr; - return FALSE; - } - - /* Fill in the length of the last branch */ - - length = code - last_branch; - last_branch[1] = length >> 8; - last_branch[2] = length & 255; - - /* Save the last required character if all branches have the same; a current - value of -1 means unset, while -2 means "previous branch had no last required - char". */ - - if (*reqchar != -2) - { - if (branchreqchar >= 0) - { - if (*reqchar == -1) *reqchar = branchreqchar; - else if (*reqchar != branchreqchar) *reqchar = -2; - } - else *reqchar = -2; - } - - /* Keep the shortest literal count */ - - if (branchcountlits < *countlits) *countlits = branchcountlits; - DPRINTF(("literal count = %d min=%d\n", branchcountlits, *countlits)); - - /* If lookbehind, check that this branch matches a fixed-length string, - and put the length into the OP_REVERSE item. Temporarily mark the end of - the branch with OP_END. */ - - if (lookbehind) - { - *code = OP_END; - length = find_fixedlength(last_branch, options); - DPRINTF(("fixed length = %d\n", length)); - if (length < 0) - { - *errorptr = ERR25; - *ptrptr = ptr; - return FALSE; - } - reverse_count[0] = (length >> 8); - reverse_count[1] = length & 255; - } - - /* Reached end of expression, either ')' or end of pattern. Insert a - terminating ket and the length of the whole bracketed item, and return, - leaving the pointer at the terminating char. If any of the ims options - were changed inside the group, compile a resetting op-code following. */ - - if (*ptr != '|') - { - length = code - start_bracket; - *code++ = OP_KET; - *code++ = length >> 8; - *code++ = length & 255; - if (optchanged >= 0) - { - *code++ = OP_OPT; - *code++ = oldoptions; - } - *codeptr = code; - *ptrptr = ptr; - return TRUE; - } - - /* Another branch follows; insert an "or" node and advance the pointer. */ - - *code = OP_ALT; - last_branch = code; - code += 3; - ptr++; - } -/* Control never reaches here */ -} - - - - -/************************************************* -* Find first significant op code * -*************************************************/ - -/* This is called by several functions that scan a compiled expression looking -for a fixed first character, or an anchoring op code etc. It skips over things -that do not influence this. For one application, a change of caseless option is -important. - -Arguments: - code pointer to the start of the group - options pointer to external options - optbit the option bit whose changing is significant, or - zero if none are - optstop TRUE to return on option change, otherwise change the options - value and continue - -Returns: pointer to the first significant opcode -*/ - -static const uschar* -first_significant_code(const uschar *code, int *options, int optbit, - BOOL optstop) -{ -for (;;) - { - switch ((int)*code) - { - case OP_OPT: - if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit)) - { - if (optstop) return code; - *options = (int)code[1]; - } - code += 2; - break; - - case OP_CREF: - code += 2; - break; - - case OP_WORD_BOUNDARY: - case OP_NOT_WORD_BOUNDARY: - code++; - break; - - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - do code += (code[1] << 8) + code[2]; while (*code == OP_ALT); - code += 3; - break; - - default: - return code; - } - } -/* Control never reaches here */ -} - - - - -/************************************************* -* Check for anchored expression * -*************************************************/ - -/* Try to find out if this is an anchored regular expression. Consider each -alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket -all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then -it's anchored. However, if this is a multiline pattern, then only OP_SOD -counts, since OP_CIRC can match in the middle. - -A branch is also implicitly anchored if it starts with .* and DOTALL is set, -because that will try the rest of the pattern at all possible matching points, -so there is no point trying them again. - -Arguments: - code points to start of expression (the bracket) - options points to the options setting - -Returns: TRUE or FALSE -*/ - -static BOOL -is_anchored(register const uschar *code, int *options) -{ -do { - const uschar *scode = first_significant_code(code + 3, options, - PCRE_MULTILINE, FALSE); - register int op = *scode; - if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) - { if (!is_anchored(scode, options)) return FALSE; } - else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) && - (*options & PCRE_DOTALL) != 0) - { if (scode[1] != OP_ANY) return FALSE; } - else if (op != OP_SOD && - ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) - return FALSE; - code += (code[1] << 8) + code[2]; - } -while (*code == OP_ALT); -return TRUE; -} - - - -/************************************************* -* Check for starting with ^ or .* * -*************************************************/ - -/* This is called to find out if every branch starts with ^ or .* so that -"first char" processing can be done to speed things up in multiline -matching and for non-DOTALL patterns that start with .* (which must start at -the beginning or after \n). - -Argument: points to start of expression (the bracket) -Returns: TRUE or FALSE -*/ - -static BOOL -is_startline(const uschar *code) -{ -do { - const uschar *scode = first_significant_code(code + 3, NULL, 0, FALSE); - register int op = *scode; - if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) - { if (!is_startline(scode)) return FALSE; } - else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) - { if (scode[1] != OP_ANY) return FALSE; } - else if (op != OP_CIRC) return FALSE; - code += (code[1] << 8) + code[2]; - } -while (*code == OP_ALT); -return TRUE; -} - - - -/************************************************* -* Check for fixed first char * -*************************************************/ - -/* Try to find out if there is a fixed first character. This is called for -unanchored expressions, as it speeds up their processing quite considerably. -Consider each alternative branch. If they all start with the same char, or with -a bracket all of whose alternatives start with the same char (recurse ad lib), -then we return that char, otherwise -1. - -Arguments: - code points to start of expression (the bracket) - options pointer to the options (used to check casing changes) - -Returns: -1 or the fixed first char -*/ - -static int -find_firstchar(const uschar *code, int *options) -{ -register int c = -1; -do { - int d; - const uschar *scode = first_significant_code(code + 3, options, - PCRE_CASELESS, TRUE); - register int op = *scode; - - if (op >= OP_BRA) op = OP_BRA; - - switch(op) - { - default: - return -1; - - case OP_BRA: - case OP_ASSERT: - case OP_ONCE: - case OP_COND: - if ((d = find_firstchar(scode, options)) < 0) return -1; - if (c < 0) c = d; else if (c != d) return -1; - break; - - case OP_EXACT: /* Fall through */ - scode++; - - case OP_CHARS: /* Fall through */ - scode++; - - case OP_PLUS: - case OP_MINPLUS: - if (c < 0) c = scode[1]; else if (c != scode[1]) return -1; - break; - } - - code += (code[1] << 8) + code[2]; - } -while (*code == OP_ALT); -return c; -} - - - - - -/************************************************* -* Compile a Regular Expression * -*************************************************/ - -/* This function takes a string and returns a pointer to a block of store -holding a compiled version of the expression. - -Arguments: - pattern the regular expression - options various option bits - errorptr pointer to pointer to error text - erroroffset ptr offset in pattern where error was detected - tables pointer to character tables or NULL - -Returns: pointer to compiled data block, or NULL on error, - with errorptr and erroroffset set -*/ - -pcre * -pcre_compile(const char *pattern, int options, const char **errorptr, - int *erroroffset, const unsigned char *tables) -{ -real_pcre *re; -int length = 3; /* For initial BRA plus length */ -int runlength; -int c, reqchar, countlits; -int bracount = 0; -int top_backref = 0; -int branch_extra = 0; -int branch_newextra; -unsigned int brastackptr = 0; -size_t size; -uschar *code; -const uschar *ptr; -compile_data compile_block; -int brastack[BRASTACK_SIZE]; -uschar bralenstack[BRASTACK_SIZE]; -const size_t pattern_length = strlen(pattern); - -#ifdef DEBUG -uschar *code_base, *code_end; -#endif - -/* Can't support UTF8 unless PCRE has been compiled to include the code. */ - -#ifndef SUPPORT_UTF8 -if ((options & PCRE_UTF8) != 0) - { - *errorptr = ERR32; - return NULL; - } -#endif - -/* We can't pass back an error message if errorptr is NULL; I guess the best we -can do is just return NULL. */ - -if (errorptr == NULL) return NULL; -*errorptr = NULL; - -/* However, we can give a message for this error */ - -if (erroroffset == NULL) - { - *errorptr = ERR16; - return NULL; - } -*erroroffset = 0; - -if ((options & ~PUBLIC_OPTIONS) != 0) - { - *errorptr = ERR17; - return NULL; - } - -/* Set up pointers to the individual character tables */ - -if (tables == NULL) tables = pcre_default_tables; -compile_block.lcc = tables + lcc_offset; -compile_block.fcc = tables + fcc_offset; -compile_block.cbits = tables + cbits_offset; -compile_block.ctypes = tables + ctypes_offset; - -/* Reflect pattern for debugging output */ - -DPRINTF(("------------------------------------------------------------------\n")); -DPRINTF(("%s\n", pattern)); - -/* The first thing to do is to make a pass over the pattern to compute the -amount of store required to hold the compiled code. This does not have to be -perfect as long as errors are overestimates. At the same time we can detect any -internal flag settings. Make an attempt to correct for any counted white space -if an "extended" flag setting appears late in the pattern. We can't be so -clever for #-comments. */ - -ptr = (const uschar *)(pattern - 1); -while ((c = *(++ptr)) != 0) - { - int min, max; - int class_charcount; - - if ((options & PCRE_EXTENDED) != 0) - { - if ((compile_block.ctypes[c] & ctype_space) != 0) continue; - if (c == '#') - { - /* The space before the ; is to avoid a warning on a silly compiler - on the Macintosh. */ - while ((c = *(++ptr)) != 0 && c != '\n') ; - continue; - } - } - - switch(c) - { - /* A backslashed item may be an escaped "normal" character or a - character type. For a "normal" character, put the pointers and - character back so that tests for whitespace etc. in the input - are done correctly. */ - - case '\\': - { - const uschar *save_ptr = ptr; - c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block); - if (*errorptr != NULL) goto PCRE_ERROR_RETURN; - if (c >= 0) - { - ptr = save_ptr; - c = '\\'; - goto NORMAL_CHAR; - } - } - length++; - - /* A back reference needs an additional char, plus either one or 5 - bytes for a repeat. We also need to keep the value of the highest - back reference. */ - - if (c <= -ESC_REF) - { - int refnum = -c - ESC_REF; - if (refnum > top_backref) top_backref = refnum; - length++; /* For single back reference */ - if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) - { - ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); - if (*errorptr != NULL) goto PCRE_ERROR_RETURN; - if ((min == 0 && (max == 1 || max == -1)) || - (min == 1 && max == -1)) - length++; - else length += 5; - if (ptr[1] == '?') ptr++; - } - } - continue; - - case '^': - case '.': - case '$': - case '*': /* These repeats won't be after brackets; */ - case '+': /* those are handled separately */ - case '?': - length++; - continue; - - /* This covers the cases of repeats after a single char, metachar, class, - or back reference. */ - - case '{': - if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR; - ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block); - if (*errorptr != NULL) goto PCRE_ERROR_RETURN; - if ((min == 0 && (max == 1 || max == -1)) || - (min == 1 && max == -1)) - length++; - else - { - length--; /* Uncount the original char or metachar */ - if (min == 1) length++; else if (min > 0) length += 4; - if (max > 0) length += 4; else length += 2; - } - if (ptr[1] == '?') ptr++; - continue; - - /* An alternation contains an offset to the next branch or ket. If any ims - options changed in the previous branch(es), and/or if we are in a - lookbehind assertion, extra space will be needed at the start of the - branch. This is handled by branch_extra. */ - - case '|': - length += 3 + branch_extra; - continue; - - /* A character class uses 33 characters. Don't worry about character types - that aren't allowed in classes - they'll get picked up during the compile. - A character class that contains only one character uses 2 or 3 bytes, - depending on whether it is negated or not. Notice this where we can. */ - - case '[': - class_charcount = 0; - if (*(++ptr) == '^') ptr++; - do - { - if (*ptr == '\\') - { - int ch = check_escape(&ptr, errorptr, bracount, options, TRUE, - &compile_block); - if (*errorptr != NULL) goto PCRE_ERROR_RETURN; - if (-ch == ESC_b) class_charcount++; else class_charcount = 10; - } - else class_charcount++; - ptr++; - if (*ptr == 0) - { - *errorptr = ERR6; - goto PCRE_ERROR_RETURN; - } - } - while (*ptr != ']'); - - /* Repeats for negated single chars are handled by the general code */ - - if (class_charcount == 1) length += 3; else - { - length += 33; - - /* A repeat needs either 1 or 5 bytes. */ - - if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) - { - ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); - if (*errorptr != NULL) goto PCRE_ERROR_RETURN; - if ((min == 0 && (max == 1 || max == -1)) || - (min == 1 && max == -1)) - length++; - else length += 5; - if (ptr[1] == '?') ptr++; - } - } - continue; - - /* Brackets may be genuine groups or special things */ - - case '(': - branch_newextra = 0; - - /* Handle special forms of bracket, which all start (? */ - - if (ptr[1] == '?') - { - int set, unset; - int *optset; - - switch (c = ptr[2]) - { - /* Skip over comments entirely */ - case '#': - ptr += 3; - while (*ptr != 0 && *ptr != ')') ptr++; - if (*ptr == 0) - { - *errorptr = ERR18; - goto PCRE_ERROR_RETURN; - } - continue; - - /* Non-referencing groups and lookaheads just move the pointer on, and - then behave like a non-special bracket, except that they don't increment - the count of extracting brackets. Ditto for the "once only" bracket, - which is in Perl from version 5.005. */ - - case ':': - case '=': - case '!': - case '>': - ptr += 2; - break; - - /* A recursive call to the regex is an extension, to provide the - facility which can be obtained by $(?p{perl-code}) in Perl 5.6. */ - - case 'R': - if (ptr[3] != ')') - { - *errorptr = ERR29; - goto PCRE_ERROR_RETURN; - } - ptr += 3; - length += 1; - break; - - /* Lookbehinds are in Perl from version 5.005 */ - - case '<': - if (ptr[3] == '=' || ptr[3] == '!') - { - ptr += 3; - branch_newextra = 3; - length += 3; /* For the first branch */ - break; - } - *errorptr = ERR24; - goto PCRE_ERROR_RETURN; - - /* Conditionals are in Perl from version 5.005. The bracket must either - be followed by a number (for bracket reference) or by an assertion - group. */ - - case '(': - if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0) - { - ptr += 4; - length += 2; - while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++; - if (*ptr != ')') - { - *errorptr = ERR26; - goto PCRE_ERROR_RETURN; - } - } - else /* An assertion must follow */ - { - ptr++; /* Can treat like ':' as far as spacing is concerned */ - if (ptr[2] != '?' || - (ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') ) - { - ptr += 2; /* To get right offset in message */ - *errorptr = ERR28; - goto PCRE_ERROR_RETURN; - } - } - break; - - /* Else loop checking valid options until ) is met. Anything else is an - error. If we are without any brackets, i.e. at top level, the settings - act as if specified in the options, so massage the options immediately. - This is for backward compatibility with Perl 5.004. */ - - default: - set = unset = 0; - optset = &set; - ptr += 2; - - for (;; ptr++) - { - c = *ptr; - switch (c) - { - case 'i': - *optset |= PCRE_CASELESS; - continue; - - case 'm': - *optset |= PCRE_MULTILINE; - continue; - - case 's': - *optset |= PCRE_DOTALL; - continue; - - case 'x': - *optset |= PCRE_EXTENDED; - continue; - - case 'X': - *optset |= PCRE_EXTRA; - continue; - - case 'U': - *optset |= PCRE_UNGREEDY; - continue; - - case '-': - optset = &unset; - continue; - - /* A termination by ')' indicates an options-setting-only item; - this is global at top level; otherwise nothing is done here and - it is handled during the compiling process on a per-bracket-group - basis. */ - - case ')': - if (brastackptr == 0) - { - options = (options | set) & (~unset); - set = unset = 0; /* To save length */ - } - /* Fall through */ - - /* A termination by ':' indicates the start of a nested group with - the given options set. This is again handled at compile time, but - we must allow for compiled space if any of the ims options are - set. We also have to allow for resetting space at the end of - the group, which is why 4 is added to the length and not just 2. - If there are several changes of options within the same group, this - will lead to an over-estimate on the length, but this shouldn't - matter very much. We also have to allow for resetting options at - the start of any alternations, which we do by setting - branch_newextra to 2. Finally, we record whether the case-dependent - flag ever changes within the regex. This is used by the "required - character" code. */ - - case ':': - if (((set|unset) & PCRE_IMS) != 0) - { - length += 4; - branch_newextra = 2; - if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED; - } - goto END_OPTIONS; - - /* Unrecognized option character */ - - default: - *errorptr = ERR12; - goto PCRE_ERROR_RETURN; - } - } - - /* If we hit a closing bracket, that's it - this is a freestanding - option-setting. We need to ensure that branch_extra is updated if - necessary. The only values branch_newextra can have here are 0 or 2. - If the value is 2, then branch_extra must either be 2 or 5, depending - on whether this is a lookbehind group or not. */ - - END_OPTIONS: - if (c == ')') - { - if (branch_newextra == 2 && (branch_extra == 0 || branch_extra == 3)) - branch_extra += branch_newextra; - continue; - } - - /* If options were terminated by ':' control comes here. Fall through - to handle the group below. */ - } - } - - /* Extracting brackets must be counted so we can process escapes in a - Perlish way. */ - - else bracount++; - - /* Non-special forms of bracket. Save length for computing whole length - at end if there's a repeat that requires duplication of the group. Also - save the current value of branch_extra, and start the new group with - the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3 - for a lookbehind assertion. */ - - if (brastackptr >= sizeof(brastack)/sizeof(int)) - { - *errorptr = ERR19; - goto PCRE_ERROR_RETURN; - } - - bralenstack[brastackptr] = branch_extra; - branch_extra = branch_newextra; - - brastack[brastackptr++] = length; - length += 3; - continue; - - /* Handle ket. Look for subsequent max/min; for certain sets of values we - have to replicate this bracket up to that many times. If brastackptr is - 0 this is an unmatched bracket which will generate an error, but take care - not to try to access brastack[-1] when computing the length and restoring - the branch_extra value. */ - - case ')': - length += 3; - { - int minval = 1; - int maxval = 1; - int duplength; - - if (brastackptr > 0) - { - duplength = length - brastack[--brastackptr]; - branch_extra = bralenstack[brastackptr]; - } - else duplength = 0; - - /* Leave ptr at the final char; for read_repeat_counts this happens - automatically; for the others we need an increment. */ - - if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block)) - { - ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr, - &compile_block); - if (*errorptr != NULL) goto PCRE_ERROR_RETURN; - } - else if (c == '*') { minval = 0; maxval = -1; ptr++; } - else if (c == '+') { maxval = -1; ptr++; } - else if (c == '?') { minval = 0; ptr++; } - - /* If the minimum is zero, we have to allow for an OP_BRAZERO before the - group, and if the maximum is greater than zero, we have to replicate - maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting - bracket set - hence the 7. */ - - if (minval == 0) - { - length++; - if (maxval > 0) length += (maxval - 1) * (duplength + 7); - } - - /* When the minimum is greater than zero, 1 we have to replicate up to - minval-1 times, with no additions required in the copies. Then, if - there is a limited maximum we have to replicate up to maxval-1 times - allowing for a BRAZERO item before each optional copy and nesting - brackets for all but one of the optional copies. */ - - else - { - length += (minval - 1) * duplength; - if (maxval > minval) /* Need this test as maxval=-1 means no limit */ - length += (maxval - minval) * (duplength + 7) - 6; - } - } - continue; - - /* Non-special character. For a run of such characters the length required - is the number of characters + 2, except that the maximum run length is 255. - We won't get a skipped space or a non-data escape or the start of a # - comment as the first character, so the length can't be zero. */ - - NORMAL_CHAR: - default: - length += 2; - runlength = 0; - do - { - if ((options & PCRE_EXTENDED) != 0) - { - if ((compile_block.ctypes[c] & ctype_space) != 0) continue; - if (c == '#') - { - /* The space before the ; is to avoid a warning on a silly compiler - on the Macintosh. */ - while ((c = *(++ptr)) != 0 && c != '\n') ; - continue; - } - } - - /* Backslash may introduce a data char or a metacharacter; stop the - string before the latter. */ - - if (c == '\\') - { - const uschar *saveptr = ptr; - c = check_escape(&ptr, errorptr, bracount, options, FALSE, - &compile_block); - if (*errorptr != NULL) goto PCRE_ERROR_RETURN; - if (c < 0) { ptr = saveptr; break; } - -#ifdef SUPPORT_UTF8 - if (c > 127 && (options & PCRE_UTF8) != 0) - { - int i; - for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) - if (c <= utf8_table1[i]) break; - runlength += i; - } -#endif - } - - /* Ordinary character or single-char escape */ - - runlength++; - - if ((const char *)ptr > pattern + pattern_length) - { - *errorptr = "internal error"; - goto PCRE_ERROR_RETURN; - } - } - - /* This "while" is the end of the "do" above. */ - - while (runlength < MAXLIT && - (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0); - - ptr--; - length += runlength; - continue; - } - } - -length += 4; /* For final KET and END */ - -if (length > 65539) - { - *errorptr = ERR20; - return NULL; - } - -/* Compute the size of data block needed and get it, either from malloc or -externally provided function. We specify "code[0]" in the offsetof() expression -rather than just "code", because it has been reported that one broken compiler -fails on "code" because it is also an independent variable. It should make no -difference to the value of the offsetof(). */ - -size = length + offsetof(real_pcre, code[0]); -re = (real_pcre *)(pcre_malloc)(size); - -if (re == NULL) - { - *errorptr = ERR21; - return NULL; - } - -/* Put in the magic number, and save the size, options, and table pointer */ - -re->magic_number = MAGIC_NUMBER; -re->size = size; -re->options = options; -re->tables = tables; - -/* Set up a starting, non-extracting bracket, then compile the expression. On -error, *errorptr will be set non-NULL, so we don't need to look at the result -of the function here. */ - -ptr = (const uschar *)pattern; -code = re->code; -*code = OP_BRA; -bracount = 0; -(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1, - &reqchar, &countlits, &compile_block); -re->top_bracket = bracount; -re->top_backref = top_backref; - -/* If not reached end of pattern on success, there's an excess bracket. */ - -if (*errorptr == NULL && *ptr != 0) *errorptr = ERR22; - -/* Fill in the terminating state and check for disastrous overflow, but -if debugging, leave the test till after things are printed out. */ - -*code++ = OP_END; - -#ifndef DEBUG -if (code - re->code > length) *errorptr = ERR23; -#endif - -/* Give an error if there's back reference to a non-existent capturing -subpattern. */ - -if (top_backref > re->top_bracket) *errorptr = ERR15; - -/* Failed to compile */ - -if (*errorptr != NULL) - { - (pcre_free)(re); - PCRE_ERROR_RETURN: - *erroroffset = ptr - (const uschar *)pattern; - return NULL; - } - -/* If the anchored option was not passed, set flag if we can determine that the -pattern is anchored by virtue of ^ characters or \A or anything else (such as -starting with .* when DOTALL is set). - -Otherwise, see if we can determine what the first character has to be, because -that speeds up unanchored matches no end. If not, see if we can set the -PCRE_STARTLINE flag. This is helpful for multiline matches when all branches -start with ^. and also when all branches start with .* for non-DOTALL matches. -*/ - -if ((options & PCRE_ANCHORED) == 0) - { - int temp_options = options; - if (is_anchored(re->code, &temp_options)) - re->options |= PCRE_ANCHORED; - else - { - int ch = find_firstchar(re->code, &temp_options); - if (ch >= 0) - { - re->first_char = ch; - re->options |= PCRE_FIRSTSET; - } - else if (is_startline(re->code)) - re->options |= PCRE_STARTLINE; - } - } - -/* Save the last required character if there are at least two literal -characters on all paths, or if there is no first character setting. */ - -if (reqchar >= 0 && (countlits > 1 || (re->options & PCRE_FIRSTSET) == 0)) - { - re->req_char = reqchar; - re->options |= PCRE_REQCHSET; - } - -/* Print out the compiled data for debugging */ - -#ifdef DEBUG - -printf("Length = %d top_bracket = %d top_backref = %d\n", - length, re->top_bracket, re->top_backref); - -if (re->options != 0) - { - printf("%s%s%s%s%s%s%s%s%s\n", - ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "", - ((re->options & PCRE_CASELESS) != 0)? "caseless " : "", - ((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "", - ((re->options & PCRE_EXTENDED) != 0)? "extended " : "", - ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "", - ((re->options & PCRE_DOTALL) != 0)? "dotall " : "", - ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "", - ((re->options & PCRE_EXTRA) != 0)? "extra " : "", - ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : ""); - } - -if ((re->options & PCRE_FIRSTSET) != 0) - { - if (isprint(re->first_char)) printf("First char = %c\n", re->first_char); - else printf("First char = \\x%02x\n", re->first_char); - } - -if ((re->options & PCRE_REQCHSET) != 0) - { - if (isprint(re->req_char)) printf("Req char = %c\n", re->req_char); - else printf("Req char = \\x%02x\n", re->req_char); - } - -code_end = code; -code_base = code = re->code; - -while (code < code_end) - { - int charlength; - - printf("%3d ", code - code_base); - - if (*code >= OP_BRA) - { - printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA); - code += 2; - } - - else switch(*code) - { - case OP_OPT: - printf(" %.2x %s", code[1], OP_names[*code]); - code++; - break; - - case OP_COND: - printf("%3d Cond", (code[1] << 8) + code[2]); - code += 2; - break; - - case OP_CREF: - printf(" %.2d %s", code[1], OP_names[*code]); - code++; - break; - - case OP_CHARS: - charlength = *(++code); - printf("%3d ", charlength); - while (charlength-- > 0) - if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c); - break; - - case OP_KETRMAX: - case OP_KETRMIN: - case OP_ALT: - case OP_KET: - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - case OP_ONCE: - printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]); - code += 2; - break; - - case OP_REVERSE: - printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]); - code += 2; - break; - - case OP_STAR: - case OP_MINSTAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_QUERY: - case OP_MINQUERY: - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - if (*code >= OP_TYPESTAR) - printf(" %s", OP_names[code[1]]); - else if (isprint(c = code[1])) printf(" %c", c); - else printf(" \\x%02x", c); - printf("%s", OP_names[*code++]); - break; - - case OP_EXACT: - case OP_UPTO: - case OP_MINUPTO: - if (isprint(c = code[3])) printf(" %c{", c); - else printf(" \\x%02x{", c); - if (*code != OP_EXACT) printf("0,"); - printf("%d}", (code[1] << 8) + code[2]); - if (*code == OP_MINUPTO) printf("?"); - code += 3; - break; - - case OP_TYPEEXACT: - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - printf(" %s{", OP_names[code[3]]); - if (*code != OP_TYPEEXACT) printf(","); - printf("%d}", (code[1] << 8) + code[2]); - if (*code == OP_TYPEMINUPTO) printf("?"); - code += 3; - break; - - case OP_NOT: - if (isprint(c = *(++code))) printf(" [^%c]", c); - else printf(" [^\\x%02x]", c); - break; - - case OP_NOTSTAR: - case OP_NOTMINSTAR: - case OP_NOTPLUS: - case OP_NOTMINPLUS: - case OP_NOTQUERY: - case OP_NOTMINQUERY: - if (isprint(c = code[1])) printf(" [^%c]", c); - else printf(" [^\\x%02x]", c); - printf("%s", OP_names[*code++]); - break; - - case OP_NOTEXACT: - case OP_NOTUPTO: - case OP_NOTMINUPTO: - if (isprint(c = code[3])) printf(" [^%c]{", c); - else printf(" [^\\x%02x]{", c); - if (*code != OP_NOTEXACT) printf(","); - printf("%d}", (code[1] << 8) + code[2]); - if (*code == OP_NOTMINUPTO) printf("?"); - code += 3; - break; - - case OP_REF: - printf(" \\%d", *(++code)); - code ++; - goto CLASS_REF_REPEAT; - - case OP_CLASS: - { - int i, min, max; - code++; - printf(" ["); - - for (i = 0; i < 256; i++) - { - if ((code[i/8] & (1 << (i&7))) != 0) - { - int j; - for (j = i+1; j < 256; j++) - if ((code[j/8] & (1 << (j&7))) == 0) break; - if (i == '-' || i == ']') printf("\\"); - if (isprint(i)) printf("%c", i); else printf("\\x%02x", i); - if (--j > i) - { - printf("-"); - if (j == '-' || j == ']') printf("\\"); - if (isprint(j)) printf("%c", j); else printf("\\x%02x", j); - } - i = j; - } - } - printf("]"); - code += 32; - - CLASS_REF_REPEAT: - - switch(*code) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - printf("%s", OP_names[*code]); - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - min = (code[1] << 8) + code[2]; - max = (code[3] << 8) + code[4]; - if (max == 0) printf("{%d,}", min); - else printf("{%d,%d}", min, max); - if (*code == OP_CRMINRANGE) printf("?"); - code += 4; - break; - - default: - code--; - } - } - break; - - /* Anything else is just a one-node item */ - - default: - printf(" %s", OP_names[*code]); - break; - } - - code++; - printf("\n"); - } -printf("------------------------------------------------------------------\n"); - -/* This check is done here in the debugging case so that the code that -was compiled can be seen. */ - -if (code - re->code > length) - { - *errorptr = ERR23; - (pcre_free)(re); - *erroroffset = ptr - (uschar *)pattern; - return NULL; - } -#endif - -return (pcre *)re; -} - - - -/************************************************* -* Match a back-reference * -*************************************************/ - -/* If a back reference hasn't been set, the length that is passed is greater -than the number of characters left in the string, so the match fails. - -Arguments: - offset index into the offset vector - eptr points into the subject - length length to be matched - md points to match data block - ims the ims flags - -Returns: TRUE if matched -*/ - -static BOOL -match_ref(int offset, register const uschar *eptr, int length, match_data *md, - unsigned long int ims) -{ -const uschar *p = md->start_subject + md->offset_vector[offset]; - -#ifdef DEBUG -if (eptr >= md->end_subject) - printf("matching subject "); -else - { - printf("matching subject "); - pchars(eptr, length, TRUE, md); - } -printf(" against backref "); -pchars(p, length, FALSE, md); -printf("\n"); -#endif - -/* Always fail if not enough characters left */ - -if (length > md->end_subject - eptr) return FALSE; - -/* Separate the caselesss case for speed */ - -if ((ims & PCRE_CASELESS) != 0) - { - while (length-- > 0) - if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; - } -else - { while (length-- > 0) if (*p++ != *eptr++) return FALSE; } - -return TRUE; -} - - - -/************************************************* -* Match from current position * -*************************************************/ - -/* On entry ecode points to the first opcode, and eptr to the first character -in the subject string, while eptrb holds the value of eptr at the start of the -last bracketed group - used for breaking infinite loops matching zero-length -strings. - -Arguments: - eptr pointer in subject - ecode position in code - offset_top current top pointer - md pointer to "static" info for the match - ims current /i, /m, and /s options - eptrb pointer to chain of blocks containing eptr at start of - brackets - for testing for empty matches - flags can contain - match_condassert - this is an assertion condition - match_isgroup - this is the start of a bracketed group - -Returns: TRUE if matched -*/ - -static BOOL -match(register const uschar *eptr, register const uschar *ecode, - int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb, - int flags) -{ -unsigned long int original_ims = ims; /* Save for resetting on ')' */ -eptrblock newptrb; - -/* At the start of a bracketed group, add the current subject pointer to the -stack of such pointers, to be re-instated at the end of the group when we hit -the closing ket. When match() is called in other circumstances, we don't add to -the stack. */ - -if ((flags & match_isgroup) != 0) - { - newptrb.prev = eptrb; - newptrb.saved_eptr = eptr; - eptrb = &newptrb; - } - -/* Now start processing the operations. */ - -for (;;) - { - int op = (int)*ecode; - int min, max, ctype; - register int i; - register int c; - BOOL minimize = FALSE; - - /* Opening capturing bracket. If there is space in the offset vector, save - the current subject position in the working slot at the top of the vector. We - mustn't change the current values of the data slot, because they may be set - from a previous iteration of this group, and be referred to by a reference - inside the group. - - If the bracket fails to match, we need to restore this value and also the - values of the final offsets, in case they were set by a previous iteration of - the same bracket. - - If there isn't enough space in the offset vector, treat this as if it were a - non-capturing bracket. Don't worry about setting the flag for the error case - here; that is handled in the code for KET. */ - - if (op > OP_BRA) - { - int number = op - OP_BRA; - int offset = number << 1; - -#ifdef DEBUG - printf("start bracket %d subject=", number); - pchars(eptr, 16, TRUE, md); - printf("\n"); -#endif - - if (offset < md->offset_max) - { - int save_offset1 = md->offset_vector[offset]; - int save_offset2 = md->offset_vector[offset+1]; - int save_offset3 = md->offset_vector[md->offset_end - number]; - - DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); - md->offset_vector[md->offset_end - number] = eptr - md->start_subject; - - do - { - if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup)) - return TRUE; - ecode += (ecode[1] << 8) + ecode[2]; - } - while (*ecode == OP_ALT); - - DPRINTF(("bracket %d failed\n", number)); - - md->offset_vector[offset] = save_offset1; - md->offset_vector[offset+1] = save_offset2; - md->offset_vector[md->offset_end - number] = save_offset3; - return FALSE; - } - - /* Insufficient room for saving captured contents */ - - else op = OP_BRA; - } - - /* Other types of node can be handled by a switch */ - - switch(op) - { - case OP_BRA: /* Non-capturing bracket: optimized */ - DPRINTF(("start bracket 0\n")); - do - { - if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup)) - return TRUE; - ecode += (ecode[1] << 8) + ecode[2]; - } - while (*ecode == OP_ALT); - DPRINTF(("bracket 0 failed\n")); - return FALSE; - - /* Conditional group: compilation checked that there are no more than - two branches. If the condition is false, skipping the first branch takes us - past the end if there is only one branch, but that's OK because that is - exactly what going to the ket would do. */ - - case OP_COND: - if (ecode[3] == OP_CREF) /* Condition is extraction test */ - { - int offset = ecode[4] << 1; /* Doubled reference number */ - return match(eptr, - ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)? - 5 : 3 + (ecode[1] << 8) + ecode[2]), - offset_top, md, ims, eptrb, match_isgroup); - } - - /* The condition is an assertion. Call match() to evaluate it - setting - the final argument TRUE causes it to stop at the end of an assertion. */ - - else - { - if (match(eptr, ecode+3, offset_top, md, ims, NULL, - match_condassert | match_isgroup)) - { - ecode += 3 + (ecode[4] << 8) + ecode[5]; - while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2]; - } - else ecode += (ecode[1] << 8) + ecode[2]; - return match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup); - } - /* Control never reaches here */ - - /* Skip over conditional reference data if encountered (should not be) */ - - case OP_CREF: - ecode += 2; - break; - - /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched - an empty string - recursion will then try other alternatives, if any. */ - - case OP_END: - if (md->notempty && eptr == md->start_match) return FALSE; - md->end_match_ptr = eptr; /* Record where we ended */ - md->end_offset_top = offset_top; /* and how many extracts were taken */ - return TRUE; - - /* Change option settings */ - - case OP_OPT: - ims = ecode[1]; - ecode += 2; - DPRINTF(("ims set to %02lx\n", ims)); - break; - - /* Assertion brackets. Check the alternative branches in turn - the - matching won't pass the KET for an assertion. If any one branch matches, - the assertion is true. Lookbehind assertions have an OP_REVERSE item at the - start of each branch to move the current point backwards, so the code at - this level is identical to the lookahead case. */ - - case OP_ASSERT: - case OP_ASSERTBACK: - do - { - if (match(eptr, ecode+3, offset_top, md, ims, NULL, match_isgroup)) break; - ecode += (ecode[1] << 8) + ecode[2]; - } - while (*ecode == OP_ALT); - if (*ecode == OP_KET) return FALSE; - - /* If checking an assertion for a condition, return TRUE. */ - - if ((flags & match_condassert) != 0) return TRUE; - - /* Continue from after the assertion, updating the offsets high water - mark, since extracts may have been taken during the assertion. */ - - do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT); - ecode += 3; - offset_top = md->end_offset_top; - continue; - - /* Negative assertion: all branches must fail to match */ - - case OP_ASSERT_NOT: - case OP_ASSERTBACK_NOT: - do - { - if (match(eptr, ecode+3, offset_top, md, ims, NULL, match_isgroup)) - return FALSE; - ecode += (ecode[1] << 8) + ecode[2]; - } - while (*ecode == OP_ALT); - - if ((flags & match_condassert) != 0) return TRUE; - - ecode += 3; - continue; - - /* Move the subject pointer back. This occurs only at the start of - each branch of a lookbehind assertion. If we are too close to the start to - move back, this match function fails. When working with UTF-8 we move - back a number of characters, not bytes. */ - - case OP_REVERSE: -#ifdef SUPPORT_UTF8 - c = (ecode[1] << 8) + ecode[2]; - for (i = 0; i < c; i++) - { - eptr--; - BACKCHAR(eptr) - } -#else - eptr -= (ecode[1] << 8) + ecode[2]; -#endif - - if (eptr < md->start_subject) return FALSE; - ecode += 3; - break; - - /* Recursion matches the current regex, nested. If there are any capturing - brackets started but not finished, we have to save their starting points - and reinstate them after the recursion. However, we don't know how many - such there are (offset_top records the completed total) so we just have - to save all the potential data. There may be up to 99 such values, which - is a bit large to put on the stack, but using malloc for small numbers - seems expensive. As a compromise, the stack is used when there are fewer - than 16 values to store; otherwise malloc is used. A problem is what to do - if the malloc fails ... there is no way of returning to the top level with - an error. Save the top 15 values on the stack, and accept that the rest - may be wrong. */ - - case OP_RECURSE: - { - BOOL rc; - int *save; - int stacksave[15]; - - c = md->offset_max; - - if (c < 16) save = stacksave; else - { - save = (int *)(pcre_malloc)((c+1) * sizeof(int)); - if (save == NULL) - { - save = stacksave; - c = 15; - } - } - - for (i = 1; i <= c; i++) - save[i] = md->offset_vector[md->offset_end - i]; - rc = match(eptr, md->start_pattern, offset_top, md, ims, eptrb, - match_isgroup); - for (i = 1; i <= c; i++) - md->offset_vector[md->offset_end - i] = save[i]; - if (save != stacksave) (pcre_free)(save); - if (!rc) return FALSE; - - /* In case the recursion has set more capturing values, save the final - number, then move along the subject till after the recursive match, - and advance one byte in the pattern code. */ - - offset_top = md->end_offset_top; - eptr = md->end_match_ptr; - ecode++; - } - break; - - /* "Once" brackets are like assertion brackets except that after a match, - the point in the subject string is not moved back. Thus there can never be - a move back into the brackets. Check the alternative branches in turn - the - matching won't pass the KET for this kind of subpattern. If any one branch - matches, we carry on as at the end of a normal bracket, leaving the subject - pointer. */ - - case OP_ONCE: - { - const uschar *prev = ecode; - const uschar *saved_eptr = eptr; - - do - { - if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup)) - break; - ecode += (ecode[1] << 8) + ecode[2]; - } - while (*ecode == OP_ALT); - - /* If hit the end of the group (which could be repeated), fail */ - - if (*ecode != OP_ONCE && *ecode != OP_ALT) return FALSE; - - /* Continue as from after the assertion, updating the offsets high water - mark, since extracts may have been taken. */ - - do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT); - - offset_top = md->end_offset_top; - eptr = md->end_match_ptr; - - /* For a non-repeating ket, just continue at this level. This also - happens for a repeating ket if no characters were matched in the group. - This is the forcible breaking of infinite loops as implemented in Perl - 5.005. If there is an options reset, it will get obeyed in the normal - course of events. */ - - if (*ecode == OP_KET || eptr == saved_eptr) - { - ecode += 3; - break; - } - - /* The repeating kets try the rest of the pattern or restart from the - preceding bracket, in the appropriate order. We need to reset any options - that changed within the bracket before re-running it, so check the next - opcode. */ - - if (ecode[3] == OP_OPT) - { - ims = (ims & ~PCRE_IMS) | ecode[4]; - DPRINTF(("ims set to %02lx at group repeat\n", ims)); - } - - if (*ecode == OP_KETRMIN) - { - if (match(eptr, ecode+3, offset_top, md, ims, eptrb, 0) || - match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup)) - return TRUE; - } - else /* OP_KETRMAX */ - { - if (match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup) || - match(eptr, ecode+3, offset_top, md, ims, eptrb, 0)) return TRUE; - } - } - return FALSE; - - /* An alternation is the end of a branch; scan along to find the end of the - bracketed group and go to there. */ - - case OP_ALT: - do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT); - break; - - /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating - that it may occur zero times. It may repeat infinitely, or not at all - - i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper - repeat limits are compiled as a number of copies, with the optional ones - preceded by BRAZERO or BRAMINZERO. */ - - case OP_BRAZERO: - { - const uschar *next = ecode+1; - if (match(eptr, next, offset_top, md, ims, eptrb, match_isgroup)) - return TRUE; - do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); - ecode = next + 3; - } - break; - - case OP_BRAMINZERO: - { - const uschar *next = ecode+1; - do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); - if (match(eptr, next+3, offset_top, md, ims, eptrb, match_isgroup)) - return TRUE; - ecode++; - } - break; - - /* End of a group, repeated or non-repeating. If we are at the end of - an assertion "group", stop matching and return TRUE, but record the - current high water mark for use by positive assertions. Do this also - for the "once" (not-backup up) groups. */ - - case OP_KET: - case OP_KETRMIN: - case OP_KETRMAX: - { - const uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; - const uschar *saved_eptr = eptrb->saved_eptr; - - eptrb = eptrb->prev; /* Back up the stack of bracket start pointers */ - - if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || - *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT || - *prev == OP_ONCE) - { - md->end_match_ptr = eptr; /* For ONCE */ - md->end_offset_top = offset_top; - return TRUE; - } - - /* In all other cases except a conditional group we have to check the - group number back at the start and if necessary complete handling an - extraction by setting the offsets and bumping the high water mark. */ - - if (*prev != OP_COND) - { - int number = *prev - OP_BRA; - int offset = number << 1; - -#ifdef DEBUG - printf("end bracket %d", number); - printf("\n"); -#endif - - if (number > 0) - { - if (offset >= md->offset_max) md->offset_overflow = TRUE; else - { - md->offset_vector[offset] = - md->offset_vector[md->offset_end - number]; - md->offset_vector[offset+1] = eptr - md->start_subject; - if (offset_top <= offset) offset_top = offset + 2; - } - } - } - - /* Reset the value of the ims flags, in case they got changed during - the group. */ - - ims = original_ims; - DPRINTF(("ims reset to %02lx\n", ims)); - - /* For a non-repeating ket, just continue at this level. This also - happens for a repeating ket if no characters were matched in the group. - This is the forcible breaking of infinite loops as implemented in Perl - 5.005. If there is an options reset, it will get obeyed in the normal - course of events. */ - - if (*ecode == OP_KET || eptr == saved_eptr) - { - ecode += 3; - break; - } - - /* The repeating kets try the rest of the pattern or restart from the - preceding bracket, in the appropriate order. */ - - if (*ecode == OP_KETRMIN) - { - if (match(eptr, ecode+3, offset_top, md, ims, eptrb, 0) || - match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup)) - return TRUE; - } - else /* OP_KETRMAX */ - { - if (match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup) || - match(eptr, ecode+3, offset_top, md, ims, eptrb, 0)) return TRUE; - } - } - return FALSE; - - /* Start of subject unless notbol, or after internal newline if multiline */ - - case OP_CIRC: - if (md->notbol && eptr == md->start_subject) return FALSE; - if ((ims & PCRE_MULTILINE) != 0) - { - if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE; - ecode++; - break; - } - /* ... else fall through */ - - /* Start of subject assertion */ - - case OP_SOD: - if (eptr != md->start_subject) return FALSE; - ecode++; - break; - - /* Assert before internal newline if multiline, or before a terminating - newline unless endonly is set, else end of subject unless noteol is set. */ - - case OP_DOLL: - if ((ims & PCRE_MULTILINE) != 0) - { - if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; } - else { if (md->noteol) return FALSE; } - ecode++; - break; - } - else - { - if (md->noteol) return FALSE; - if (!md->endonly) - { - if (eptr < md->end_subject - 1 || - (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE; - - ecode++; - break; - } - } - /* ... else fall through */ - - /* End of subject assertion (\z) */ - - case OP_EOD: - if (eptr < md->end_subject) return FALSE; - ecode++; - break; - - /* End of subject or ending \n assertion (\Z) */ - - case OP_EODN: - if (eptr < md->end_subject - 1 || - (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE; - ecode++; - break; - - /* Word boundary assertions */ - - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - { - BOOL prev_is_word = (eptr != md->start_subject) && - ((md->ctypes[eptr[-1]] & ctype_word) != 0); - BOOL cur_is_word = (eptr < md->end_subject) && - ((md->ctypes[*eptr] & ctype_word) != 0); - if ((*ecode++ == OP_WORD_BOUNDARY)? - cur_is_word == prev_is_word : cur_is_word != prev_is_word) - return FALSE; - } - break; - - /* Match a single character type; inline for speed */ - - case OP_ANY: - if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n') - return FALSE; - if (eptr++ >= md->end_subject) return FALSE; -#ifdef SUPPORT_UTF8 - if (md->utf8) - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; -#endif - ecode++; - break; - - case OP_NOT_DIGIT: - if (eptr >= md->end_subject || - (md->ctypes[*eptr++] & ctype_digit) != 0) - return FALSE; - ecode++; - break; - - case OP_DIGIT: - if (eptr >= md->end_subject || - (md->ctypes[*eptr++] & ctype_digit) == 0) - return FALSE; - ecode++; - break; - - case OP_NOT_WHITESPACE: - if (eptr >= md->end_subject || - (md->ctypes[*eptr++] & ctype_space) != 0) - return FALSE; - ecode++; - break; - - case OP_WHITESPACE: - if (eptr >= md->end_subject || - (md->ctypes[*eptr++] & ctype_space) == 0) - return FALSE; - ecode++; - break; - - case OP_NOT_WORDCHAR: - if (eptr >= md->end_subject || - (md->ctypes[*eptr++] & ctype_word) != 0) - return FALSE; - ecode++; - break; - - case OP_WORDCHAR: - if (eptr >= md->end_subject || - (md->ctypes[*eptr++] & ctype_word) == 0) - return FALSE; - ecode++; - break; - - /* Match a back reference, possibly repeatedly. Look past the end of the - item to see if there is repeat information following. The code is similar - to that for character classes, but repeated for efficiency. Then obey - similar code to character type repeats - written out again for speed. - However, if the referenced string is the empty string, always treat - it as matched, any number of times (otherwise there could be infinite - loops). */ - - case OP_REF: - { - int length; - int offset = ecode[1] << 1; /* Doubled reference number */ - ecode += 2; /* Advance past the item */ - - /* If the reference is unset, set the length to be longer than the amount - of subject left; this ensures that every attempt at a match fails. We - can't just fail here, because of the possibility of quantifiers with zero - minima. */ - - length = (offset >= offset_top || md->offset_vector[offset] < 0)? - md->end_subject - eptr + 1 : - md->offset_vector[offset+1] - md->offset_vector[offset]; - - /* Set up for repetition, or handle the non-repeated case */ - - switch (*ecode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - c = *ecode++ - OP_CRSTAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - minimize = (*ecode == OP_CRMINRANGE); - min = (ecode[1] << 8) + ecode[2]; - max = (ecode[3] << 8) + ecode[4]; - if (max == 0) max = INT_MAX; - ecode += 5; - break; - - default: /* No repeat follows */ - if (!match_ref(offset, eptr, length, md, ims)) return FALSE; - eptr += length; - continue; /* With the main loop */ - } - - /* If the length of the reference is zero, just continue with the - main loop. */ - - if (length == 0) continue; - - /* First, ensure the minimum number of matches are present. We get back - the length of the reference string explicitly rather than passing the - address of eptr, so that eptr can be a register variable. */ - - for (i = 1; i <= min; i++) - { - if (!match_ref(offset, eptr, length, md, ims)) return FALSE; - eptr += length; - } - - /* If min = max, continue at the same level without recursion. - They are not both allowed to be zero. */ - - if (min == max) continue; - - /* If minimizing, keep trying and advancing the pointer */ - - if (minimize) - { - for (i = min;; i++) - { - if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) - return TRUE; - if (i >= max || !match_ref(offset, eptr, length, md, ims)) - return FALSE; - eptr += length; - } - /* Control never gets here */ - } - - /* If maximizing, find the longest string and work backwards */ - - else - { - const uschar *pp = eptr; - for (i = min; i < max; i++) - { - if (!match_ref(offset, eptr, length, md, ims)) break; - eptr += length; - } - while (eptr >= pp) - { - if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) - return TRUE; - eptr -= length; - } - return FALSE; - } - } - /* Control never gets here */ - - - - /* Match a character class, possibly repeatedly. Look past the end of the - item to see if there is repeat information following. Then obey similar - code to character type repeats - written out again for speed. */ - - case OP_CLASS: - { - const uschar *data = ecode + 1; /* Save for matching */ - ecode += 33; /* Advance past the item */ - - switch (*ecode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - c = *ecode++ - OP_CRSTAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - minimize = (*ecode == OP_CRMINRANGE); - min = (ecode[1] << 8) + ecode[2]; - max = (ecode[3] << 8) + ecode[4]; - if (max == 0) max = INT_MAX; - ecode += 5; - break; - - default: /* No repeat follows */ - min = max = 1; - break; - } - - /* First, ensure the minimum number of matches are present. */ - - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) return FALSE; - GETCHARINC(c, eptr) /* Get character; increment eptr */ - -#ifdef SUPPORT_UTF8 - /* We do not yet support class members > 255 */ - if (c > 255) return FALSE; -#endif - - if ((data[c/8] & (1 << (c&7))) != 0) continue; - return FALSE; - } - - /* If max == min we can continue with the main loop without the - need to recurse. */ - - if (min == max) continue; - - /* If minimizing, keep testing the rest of the expression and advancing - the pointer while it matches the class. */ - - if (minimize) - { - for (i = min;; i++) - { - if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) - return TRUE; - if (i >= max || eptr >= md->end_subject) return FALSE; - GETCHARINC(c, eptr) /* Get character; increment eptr */ - -#ifdef SUPPORT_UTF8 - /* We do not yet support class members > 255 */ - if (c > 255) return FALSE; -#endif - if ((data[c/8] & (1 << (c&7))) != 0) continue; - return FALSE; - } - /* Control never gets here */ - } - - /* If maximizing, find the longest possible run, then work backwards. */ - - else - { - const uschar *pp = eptr; - int len = 1; - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) break; - GETCHARLEN(c, eptr, len) /* Get character, set length if UTF-8 */ - -#ifdef SUPPORT_UTF8 - /* We do not yet support class members > 255 */ - if (c > 255) break; -#endif - if ((data[c/8] & (1 << (c&7))) == 0) break; - eptr += len; - } - - while (eptr >= pp) - { - if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) - return TRUE; - -#ifdef SUPPORT_UTF8 - BACKCHAR(eptr) -#endif - } - return FALSE; - } - } - /* Control never gets here */ - - /* Match a run of characters */ - - case OP_CHARS: - { - register int length = ecode[1]; - ecode += 2; - -#ifdef DEBUG /* Sigh. Some compilers never learn. */ - if (eptr >= md->end_subject) - printf("matching subject against pattern "); - else - { - printf("matching subject "); - pchars(eptr, length, TRUE, md); - printf(" against pattern "); - } - pchars(ecode, length, FALSE, md); - printf("\n"); -#endif - - if (length > md->end_subject - eptr) return FALSE; - if ((ims & PCRE_CASELESS) != 0) - { - while (length-- > 0) - if (md->lcc[*ecode++] != md->lcc[*eptr++]) - return FALSE; - } - else - { - while (length-- > 0) if (*ecode++ != *eptr++) return FALSE; - } - } - break; - - /* Match a single character repeatedly; different opcodes share code. */ - - case OP_EXACT: - min = max = (ecode[1] << 8) + ecode[2]; - ecode += 3; - goto REPEATCHAR; - - case OP_UPTO: - case OP_MINUPTO: - min = 0; - max = (ecode[1] << 8) + ecode[2]; - minimize = *ecode == OP_MINUPTO; - ecode += 3; - goto REPEATCHAR; - - case OP_STAR: - case OP_MINSTAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_QUERY: - case OP_MINQUERY: - c = *ecode++ - OP_STAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - - /* Common code for all repeated single-character matches. We can give - up quickly if there are fewer than the minimum number of characters left in - the subject. */ - - REPEATCHAR: - if (min > md->end_subject - eptr) return FALSE; - c = *ecode++; - - /* The code is duplicated for the caseless and caseful cases, for speed, - since matching characters is likely to be quite common. First, ensure the - minimum number of matches are present. If min = max, continue at the same - level without recursing. Otherwise, if minimizing, keep trying the rest of - the expression and advancing one matching character if failing, up to the - maximum. Alternatively, if maximizing, find the maximum number of - characters and work backwards. */ - - DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max, - max, eptr)); - - if ((ims & PCRE_CASELESS) != 0) - { - c = md->lcc[c]; - for (i = 1; i <= min; i++) - if (c != md->lcc[*eptr++]) return FALSE; - if (min == max) continue; - if (minimize) - { - for (i = min;; i++) - { - if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) - return TRUE; - if (i >= max || eptr >= md->end_subject || - c != md->lcc[*eptr++]) - return FALSE; - } - /* Control never gets here */ - } - else - { - const uschar *pp = eptr; - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || c != md->lcc[*eptr]) break; - eptr++; - } - while (eptr >= pp) - if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) - return TRUE; - return FALSE; - } - /* Control never gets here */ - } - - /* Caseful comparisons */ - - else - { - for (i = 1; i <= min; i++) if (c != *eptr++) return FALSE; - if (min == max) continue; - if (minimize) - { - for (i = min;; i++) - { - if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) - return TRUE; - if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE; - } - /* Control never gets here */ - } - else - { - const uschar *pp = eptr; - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || c != *eptr) break; - eptr++; - } - while (eptr >= pp) - if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) - return TRUE; - return FALSE; - } - } - /* Control never gets here */ - - /* Match a negated single character */ - - case OP_NOT: - if (eptr >= md->end_subject) return FALSE; - ecode++; - if ((ims & PCRE_CASELESS) != 0) - { - if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE; - } - else - { - if (*ecode++ == *eptr++) return FALSE; - } - break; - - /* Match a negated single character repeatedly. This is almost a repeat of - the code for a repeated single character, but I haven't found a nice way of - commoning these up that doesn't require a test of the positive/negative - option for each character match. Maybe that wouldn't add very much to the - time taken, but character matching *is* what this is all about... */ - - case OP_NOTEXACT: - min = max = (ecode[1] << 8) + ecode[2]; - ecode += 3; - goto REPEATNOTCHAR; - - case OP_NOTUPTO: - case OP_NOTMINUPTO: - min = 0; - max = (ecode[1] << 8) + ecode[2]; - minimize = *ecode == OP_NOTMINUPTO; - ecode += 3; - goto REPEATNOTCHAR; - - case OP_NOTSTAR: - case OP_NOTMINSTAR: - case OP_NOTPLUS: - case OP_NOTMINPLUS: - case OP_NOTQUERY: - case OP_NOTMINQUERY: - c = *ecode++ - OP_NOTSTAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - - /* Common code for all repeated single-character matches. We can give - up quickly if there are fewer than the minimum number of characters left in - the subject. */ - - REPEATNOTCHAR: - if (min > md->end_subject - eptr) return FALSE; - c = *ecode++; - - /* The code is duplicated for the caseless and caseful cases, for speed, - since matching characters is likely to be quite common. First, ensure the - minimum number of matches are present. If min = max, continue at the same - level without recursing. Otherwise, if minimizing, keep trying the rest of - the expression and advancing one matching character if failing, up to the - maximum. Alternatively, if maximizing, find the maximum number of - characters and work backwards. */ - - DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max, - max, eptr)); - - if ((ims & PCRE_CASELESS) != 0) - { - c = md->lcc[c]; - for (i = 1; i <= min; i++) - if (c == md->lcc[*eptr++]) return FALSE; - if (min == max) continue; - if (minimize) - { - for (i = min;; i++) - { - if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) - return TRUE; - if (i >= max || eptr >= md->end_subject || - c == md->lcc[*eptr++]) - return FALSE; - } - /* Control never gets here */ - } - else - { - const uschar *pp = eptr; - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || c == md->lcc[*eptr]) break; - eptr++; - } - while (eptr >= pp) - if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) - return TRUE; - return FALSE; - } - /* Control never gets here */ - } - - /* Caseful comparisons */ - - else - { - for (i = 1; i <= min; i++) if (c == *eptr++) return FALSE; - if (min == max) continue; - if (minimize) - { - for (i = min;; i++) - { - if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) - return TRUE; - if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE; - } - /* Control never gets here */ - } - else - { - const uschar *pp = eptr; - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || c == *eptr) break; - eptr++; - } - while (eptr >= pp) - if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) - return TRUE; - return FALSE; - } - } - /* Control never gets here */ - - /* Match a single character type repeatedly; several different opcodes - share code. This is very similar to the code for single characters, but we - repeat it in the interests of efficiency. */ - - case OP_TYPEEXACT: - min = max = (ecode[1] << 8) + ecode[2]; - minimize = TRUE; - ecode += 3; - goto REPEATTYPE; - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - min = 0; - max = (ecode[1] << 8) + ecode[2]; - minimize = *ecode == OP_TYPEMINUPTO; - ecode += 3; - goto REPEATTYPE; - - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - c = *ecode++ - OP_TYPESTAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - - /* Common code for all repeated single character type matches */ - - REPEATTYPE: - ctype = *ecode++; /* Code for the character type */ - - /* First, ensure the minimum number of matches are present. Use inline - code for maximizing the speed, and do the type test once at the start - (i.e. keep it out of the loop). Also we can test that there are at least - the minimum number of bytes before we start, except when doing '.' in - UTF8 mode. Leave the test in in all cases; in the special case we have - to test after each character. */ - - if (min > md->end_subject - eptr) return FALSE; - if (min > 0) switch(ctype) - { - case OP_ANY: -#ifdef SUPPORT_UTF8 - if (md->utf8) - { - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject || - (*eptr++ == '\n' && (ims & PCRE_DOTALL) == 0)) - return FALSE; - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; - } - break; - } -#endif - /* Non-UTF8 can be faster */ - if ((ims & PCRE_DOTALL) == 0) - { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; } - else eptr += min; - break; - - case OP_NOT_DIGIT: - for (i = 1; i <= min; i++) - if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE; - break; - - case OP_DIGIT: - for (i = 1; i <= min; i++) - if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE; - break; - - case OP_NOT_WHITESPACE: - for (i = 1; i <= min; i++) - if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE; - break; - - case OP_WHITESPACE: - for (i = 1; i <= min; i++) - if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE; - break; - - case OP_NOT_WORDCHAR: - for (i = 1; i <= min; i++) - if ((md->ctypes[*eptr++] & ctype_word) != 0) - return FALSE; - break; - - case OP_WORDCHAR: - for (i = 1; i <= min; i++) - if ((md->ctypes[*eptr++] & ctype_word) == 0) - return FALSE; - break; - } - - /* If min = max, continue at the same level without recursing */ - - if (min == max) continue; - - /* If minimizing, we have to test the rest of the pattern before each - subsequent match. */ - - if (minimize) - { - for (i = min;; i++) - { - if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) return TRUE; - if (i >= max || eptr >= md->end_subject) return FALSE; - - c = *eptr++; - switch(ctype) - { - case OP_ANY: - if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE; -#ifdef SUPPORT_UTF8 - if (md->utf8) - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; -#endif - break; - - case OP_NOT_DIGIT: - if ((md->ctypes[c] & ctype_digit) != 0) return FALSE; - break; - - case OP_DIGIT: - if ((md->ctypes[c] & ctype_digit) == 0) return FALSE; - break; - - case OP_NOT_WHITESPACE: - if ((md->ctypes[c] & ctype_space) != 0) return FALSE; - break; - - case OP_WHITESPACE: - if ((md->ctypes[c] & ctype_space) == 0) return FALSE; - break; - - case OP_NOT_WORDCHAR: - if ((md->ctypes[c] & ctype_word) != 0) return FALSE; - break; - - case OP_WORDCHAR: - if ((md->ctypes[c] & ctype_word) == 0) return FALSE; - break; - } - } - /* Control never gets here */ - } - - /* If maximizing it is worth using inline code for speed, doing the type - test once at the start (i.e. keep it out of the loop). */ - - else - { - const uschar *pp = eptr; - switch(ctype) - { - case OP_ANY: - - /* Special code is required for UTF8, but when the maximum is unlimited - we don't need it. */ - -#ifdef SUPPORT_UTF8 - if (md->utf8 && max < INT_MAX) - { - if ((ims & PCRE_DOTALL) == 0) - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || *eptr++ == '\n') break; - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; - } - } - else - { - for (i = min; i < max; i++) - { - eptr++; - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; - } - } - break; - } -#endif - /* Non-UTF8 can be faster */ - if ((ims & PCRE_DOTALL) == 0) - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || *eptr == '\n') break; - eptr++; - } - } - else - { - c = max - min; - if (c > md->end_subject - eptr) c = md->end_subject - eptr; - eptr += c; - } - break; - - case OP_NOT_DIGIT: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0) - break; - eptr++; - } - break; - - case OP_DIGIT: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0) - break; - eptr++; - } - break; - - case OP_NOT_WHITESPACE: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0) - break; - eptr++; - } - break; - - case OP_WHITESPACE: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0) - break; - eptr++; - } - break; - - case OP_NOT_WORDCHAR: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0) - break; - eptr++; - } - break; - - case OP_WORDCHAR: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0) - break; - eptr++; - } - break; - } - - while (eptr >= pp) - { - if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) - return TRUE; -#ifdef SUPPORT_UTF8 - if (md->utf8) - while (eptr > pp && (*eptr & 0xc0) == 0x80) eptr--; -#endif - } - return FALSE; - } - /* Control never gets here */ - - /* There's been some horrible disaster. */ - - default: - DPRINTF(("Unknown opcode %d\n", *ecode)); - md->errorcode = PCRE_ERROR_UNKNOWN_NODE; - return FALSE; - } - - /* Do not stick any code in here without much thought; it is assumed - that "continue" in the code above comes out to here to repeat the main - loop. */ - - } /* End of main loop */ -/* Control never reaches here */ -} - - - - -/************************************************* -* Execute a Regular Expression * -*************************************************/ - -/* This function applies a compiled re to a subject string and picks out -portions of the string if it matches. Two elements in the vector are set for -each substring: the offsets to the start and end of the substring. - -Arguments: - external_re points to the compiled expression - external_extra points to "hints" from pcre_study() or is NULL - subject points to the subject string - length length of subject string (may contain binary zeros) - start_offset where to start in the subject string - options option bits - offsets points to a vector of ints to be filled in with offsets - offsetcount the number of elements in the vector - -Returns: > 0 => success; value is the number of elements filled in - = 0 => success, but offsets is not big enough - -1 => failed to match - < -1 => some kind of unexpected problem -*/ - -int -pcre_exec(const pcre *external_re, const pcre_extra *external_extra, - const char *subject, int length, int start_offset, int options, int *offsets, - int offsetcount) -{ -int resetcount, ocount; -int first_char = -1; -int req_char = -1; -int req_char2 = -1; -unsigned long int ims = 0; -match_data match_block; -const uschar *start_bits = NULL; -const uschar *start_match = (const uschar *)subject + start_offset; -const uschar *end_subject; -const uschar *req_char_ptr = start_match - 1; -const real_pcre *re = (const real_pcre *)external_re; -const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; -BOOL using_temporary_offsets = FALSE; -BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; -BOOL startline = (re->options & PCRE_STARTLINE) != 0; - -if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; - -if (re == NULL || subject == NULL || - (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; -if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; - -match_block.start_pattern = re->code; -match_block.start_subject = (const uschar *)subject; -match_block.end_subject = match_block.start_subject + length; -end_subject = match_block.end_subject; - -match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; -match_block.utf8 = (re->options & PCRE_UTF8) != 0; - -match_block.notbol = (options & PCRE_NOTBOL) != 0; -match_block.noteol = (options & PCRE_NOTEOL) != 0; -match_block.notempty = (options & PCRE_NOTEMPTY) != 0; - -match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ - -match_block.lcc = re->tables + lcc_offset; -match_block.ctypes = re->tables + ctypes_offset; - -/* The ims options can vary during the matching as a result of the presence -of (?ims) items in the pattern. They are kept in a local variable so that -restoring at the exit of a group is easy. */ - -ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL); - -/* If the expression has got more back references than the offsets supplied can -hold, we get a temporary bit of working store to use during the matching. -Otherwise, we can use the vector supplied, rounding down its size to a multiple -of 3. */ - -ocount = offsetcount - (offsetcount % 3); - -if (re->top_backref > 0 && re->top_backref >= ocount/3) - { - ocount = re->top_backref * 3 + 3; - match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int)); - if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY; - using_temporary_offsets = TRUE; - DPRINTF(("Got memory to hold back references\n")); - } -else match_block.offset_vector = offsets; - -match_block.offset_end = ocount; -match_block.offset_max = (2*ocount)/3; -match_block.offset_overflow = FALSE; - -/* Compute the minimum number of offsets that we need to reset each time. Doing -this makes a huge difference to execution time when there aren't many brackets -in the pattern. */ - -resetcount = 2 + re->top_bracket * 2; -if (resetcount > offsetcount) resetcount = ocount; - -/* Reset the working variable associated with each extraction. These should -never be used unless previously set, but they get saved and restored, and so we -initialize them to avoid reading uninitialized locations. */ - -if (match_block.offset_vector != NULL) - { - register int *iptr = match_block.offset_vector + ocount; - register int *iend = iptr - resetcount/2 + 1; - while (--iptr >= iend) *iptr = -1; - } - -/* Set up the first character to match, if available. The first_char value is -never set for an anchored regular expression, but the anchoring may be forced -at run time, so we have to test for anchoring. The first char may be unset for -an unanchored pattern, of course. If there's no first char and the pattern was -studied, there may be a bitmap of possible first characters. */ - -if (!anchored) - { - if ((re->options & PCRE_FIRSTSET) != 0) - { - first_char = re->first_char; - if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char]; - } - else - if (!startline && extra != NULL && - (extra->options & PCRE_STUDY_MAPPED) != 0) - start_bits = extra->start_bits; - } - -/* For anchored or unanchored matches, there may be a "last known required -character" set. If the PCRE_CASELESS is set, implying that the match starts -caselessly, or if there are any changes of this flag within the regex, set up -both cases of the character. Otherwise set the two values the same, which will -avoid duplicate testing (which takes significant time). This covers the vast -majority of cases. It will be suboptimal when the case flag changes in a regex -and the required character in fact is caseful. */ - -if ((re->options & PCRE_REQCHSET) != 0) - { - req_char = re->req_char; - req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0)? - (re->tables + fcc_offset)[req_char] : req_char; - } - -/* Loop for handling unanchored repeated matching attempts; for anchored regexs -the loop runs just once. */ - -do - { - int rc; - register int *iptr = match_block.offset_vector; - register int *iend = iptr + resetcount; - - /* Reset the maximum number of extractions we might see. */ - - while (iptr < iend) *iptr++ = -1; - - /* Advance to a unique first char if possible */ - - if (first_char >= 0) - { - if ((ims & PCRE_CASELESS) != 0) - while (start_match < end_subject && - match_block.lcc[*start_match] != first_char) - start_match++; - else - while (start_match < end_subject && *start_match != first_char) - start_match++; - } - - /* Or to just after \n for a multiline match if possible */ - - else if (startline) - { - if (start_match > match_block.start_subject + start_offset) - { - while (start_match < end_subject && start_match[-1] != '\n') - start_match++; - } - } - - /* Or to a non-unique first char after study */ - - else if (start_bits != NULL) - { - while (start_match < end_subject) - { - register int c = *start_match; - if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break; - } - } - -#ifdef DEBUG /* Sigh. Some compilers never learn. */ - printf(">>>> Match against: "); - pchars(start_match, end_subject - start_match, TRUE, &match_block); - printf("\n"); -#endif - - /* If req_char is set, we know that that character must appear in the subject - for the match to succeed. If the first character is set, req_char must be - later in the subject; otherwise the test starts at the match point. This - optimization can save a huge amount of backtracking in patterns with nested - unlimited repeats that aren't going to match. We don't know what the state of - case matching may be when this character is hit, so test for it in both its - cases if necessary. However, the different cased versions will not be set up - unless PCRE_CASELESS was given or the casing state changes within the regex. - Writing separate code makes it go faster, as does using an autoincrement and - backing off on a match. */ - - if (req_char >= 0) - { - register const uschar *p = start_match + ((first_char >= 0)? 1 : 0); - - /* We don't need to repeat the search if we haven't yet reached the - place we found it at last time. */ - - if (p > req_char_ptr) - { - /* Do a single test if no case difference is set up */ - - if (req_char == req_char2) - { - while (p < end_subject) - { - if (*p++ == req_char) { p--; break; } - } - } - - /* Otherwise test for either case */ - - else - { - while (p < end_subject) - { - register int pp = *p++; - if (pp == req_char || pp == req_char2) { p--; break; } - } - } - - /* If we can't find the required character, break the matching loop */ - - if (p >= end_subject) break; - - /* If we have found the required character, save the point where we - found it, so that we don't search again next time round the loop if - the start hasn't passed this character yet. */ - - req_char_ptr = p; - } - } - - /* When a match occurs, substrings will be set for all internal extractions; - we just need to set up the whole thing as substring 0 before returning. If - there were too many extractions, set the return code to zero. In the case - where we had to get some local store to hold offsets for backreferences, copy - those back references that we can. In this case there need not be overflow - if certain parts of the pattern were not used. */ - - match_block.start_match = start_match; - if (!match(start_match, re->code, 2, &match_block, ims, NULL, match_isgroup)) - continue; - - /* Copy the offset information from temporary store if necessary */ - - if (using_temporary_offsets) - { - if (offsetcount >= 4) - { - memcpy(offsets + 2, match_block.offset_vector + 2, - (offsetcount - 2) * sizeof(int)); - DPRINTF(("Copied offsets from temporary memory\n")); - } - if (match_block.end_offset_top > offsetcount) - match_block.offset_overflow = TRUE; - - DPRINTF(("Freeing temporary memory\n")); - (pcre_free)(match_block.offset_vector); - } - - rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2; - - if (match_block.offset_end < 2) rc = 0; else - { - offsets[0] = start_match - match_block.start_subject; - offsets[1] = match_block.end_match_ptr - match_block.start_subject; - } - - DPRINTF((">>>> returning %d\n", rc)); - return rc; - } - -/* This "while" is the end of the "do" above */ - -while (!anchored && - match_block.errorcode == PCRE_ERROR_NOMATCH && - start_match++ < end_subject); - -if (using_temporary_offsets) - { - DPRINTF(("Freeing temporary memory\n")); - (pcre_free)(match_block.offset_vector); - } - -DPRINTF((">>>> returning %d\n", match_block.errorcode)); - -return match_block.errorcode; -} - -/* End of pcre.c */ diff --git a/pcre/pcre.def b/pcre/pcre.def deleted file mode 100644 index 0e8cf3f4..00000000 --- a/pcre/pcre.def +++ /dev/null @@ -1,19 +0,0 @@ -EXPORTS - -pcre_malloc DATA -pcre_free DATA - -pcre_compile -pcre_copy_substring -pcre_exec -pcre_get_substring -pcre_get_substring_list -pcre_info -pcre_maketables -pcre_study -pcre_version - -regcomp -regexec -regerror -regfree diff --git a/pcre/pcre.h b/pcre/pcre.h deleted file mode 100644 index d27ba859..00000000 --- a/pcre/pcre.h +++ /dev/null @@ -1,110 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* Copyright (c) 1997-2000 University of Cambridge */ - -#ifndef _PCRE_H -#define _PCRE_H - -/* The file pcre.h is build by "configure". Do not edit it; instead -make changes to pcre.in. */ - -#define PCRE_MAJOR 3 -#define PCRE_MINOR 4 -#define PCRE_DATE 22-Aug-2000 - -/* Win32 uses DLL by default */ - -#ifdef _WIN32 -# ifdef STATIC_PCRE -# define PCRE_DL_IMPORT -# else -# define PCRE_DL_IMPORT __declspec(dllimport) -# endif -#else -# define PCRE_DL_IMPORT -#endif - -/* Have to include stdlib.h in order to ensure that size_t is defined; -it is needed here for malloc. */ - -#include - -/* Allow for C++ users */ - -#ifdef __cplusplus -extern "C" { -#endif - -/* Options */ - -#define PCRE_CASELESS 0x0001 -#define PCRE_MULTILINE 0x0002 -#define PCRE_DOTALL 0x0004 -#define PCRE_EXTENDED 0x0008 -#define PCRE_ANCHORED 0x0010 -#define PCRE_DOLLAR_ENDONLY 0x0020 -#define PCRE_EXTRA 0x0040 -#define PCRE_NOTBOL 0x0080 -#define PCRE_NOTEOL 0x0100 -#define PCRE_UNGREEDY 0x0200 -#define PCRE_NOTEMPTY 0x0400 -#define PCRE_UTF8 0x0800 - -/* Exec-time and get-time error codes */ - -#define PCRE_ERROR_NOMATCH (-1) -#define PCRE_ERROR_NULL (-2) -#define PCRE_ERROR_BADOPTION (-3) -#define PCRE_ERROR_BADMAGIC (-4) -#define PCRE_ERROR_UNKNOWN_NODE (-5) -#define PCRE_ERROR_NOMEMORY (-6) -#define PCRE_ERROR_NOSUBSTRING (-7) - -/* Request types for pcre_fullinfo() */ - -#define PCRE_INFO_OPTIONS 0 -#define PCRE_INFO_SIZE 1 -#define PCRE_INFO_CAPTURECOUNT 2 -#define PCRE_INFO_BACKREFMAX 3 -#define PCRE_INFO_FIRSTCHAR 4 -#define PCRE_INFO_FIRSTTABLE 5 -#define PCRE_INFO_LASTLITERAL 6 - -/* Types */ - -typedef void pcre; -typedef void pcre_extra; - -/* Store get and free functions. These can be set to alternative malloc/free -functions if required. Some magic is required for Win32 DLL; it is null on -other OS. */ - -PCRE_DL_IMPORT extern void *(*pcre_malloc)(size_t); -PCRE_DL_IMPORT extern void (*pcre_free)(void *); - -#undef PCRE_DL_IMPORT - -/* Functions */ - -extern pcre *pcre_compile(const char *, int, const char **, int *, - const unsigned char *); -extern int pcre_copy_substring(const char *, int *, int, int, char *, int); -extern int pcre_exec(const pcre *, const pcre_extra *, const char *, - int, int, int, int *, int); -extern void pcre_free_substring(const char *); -extern void pcre_free_substring_list(const char **); -extern int pcre_get_substring(const char *, int *, int, int, const char **); -extern int pcre_get_substring_list(const char *, int *, int, const char ***); -extern int pcre_info(const pcre *, int *, int *); -extern int pcre_fullinfo(const pcre *, const pcre_extra *, int, void *); -extern unsigned const char *pcre_maketables(void); -extern pcre_extra *pcre_study(const pcre *, int, const char **); -extern const char *pcre_version(void); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* End of pcre.h */ diff --git a/pcre/pcre.in b/pcre/pcre.in deleted file mode 100644 index d698f403..00000000 --- a/pcre/pcre.in +++ /dev/null @@ -1,110 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* Copyright (c) 1997-2000 University of Cambridge */ - -#ifndef _PCRE_H -#define _PCRE_H - -/* The file pcre.h is build by "configure". Do not edit it; instead -make changes to pcre.in. */ - -#define PCRE_MAJOR @PCRE_MAJOR@ -#define PCRE_MINOR @PCRE_MINOR@ -#define PCRE_DATE @PCRE_DATE@ - -/* Win32 uses DLL by default */ - -#ifdef _WIN32 -# ifdef STATIC_PCRE -# define PCRE_DL_IMPORT -# else -# define PCRE_DL_IMPORT __declspec(dllimport) -# endif -#else -# define PCRE_DL_IMPORT -#endif - -/* Have to include stdlib.h in order to ensure that size_t is defined; -it is needed here for malloc. */ - -#include - -/* Allow for C++ users */ - -#ifdef __cplusplus -extern "C" { -#endif - -/* Options */ - -#define PCRE_CASELESS 0x0001 -#define PCRE_MULTILINE 0x0002 -#define PCRE_DOTALL 0x0004 -#define PCRE_EXTENDED 0x0008 -#define PCRE_ANCHORED 0x0010 -#define PCRE_DOLLAR_ENDONLY 0x0020 -#define PCRE_EXTRA 0x0040 -#define PCRE_NOTBOL 0x0080 -#define PCRE_NOTEOL 0x0100 -#define PCRE_UNGREEDY 0x0200 -#define PCRE_NOTEMPTY 0x0400 -#define PCRE_UTF8 0x0800 - -/* Exec-time and get-time error codes */ - -#define PCRE_ERROR_NOMATCH (-1) -#define PCRE_ERROR_NULL (-2) -#define PCRE_ERROR_BADOPTION (-3) -#define PCRE_ERROR_BADMAGIC (-4) -#define PCRE_ERROR_UNKNOWN_NODE (-5) -#define PCRE_ERROR_NOMEMORY (-6) -#define PCRE_ERROR_NOSUBSTRING (-7) - -/* Request types for pcre_fullinfo() */ - -#define PCRE_INFO_OPTIONS 0 -#define PCRE_INFO_SIZE 1 -#define PCRE_INFO_CAPTURECOUNT 2 -#define PCRE_INFO_BACKREFMAX 3 -#define PCRE_INFO_FIRSTCHAR 4 -#define PCRE_INFO_FIRSTTABLE 5 -#define PCRE_INFO_LASTLITERAL 6 - -/* Types */ - -typedef void pcre; -typedef void pcre_extra; - -/* Store get and free functions. These can be set to alternative malloc/free -functions if required. Some magic is required for Win32 DLL; it is null on -other OS. */ - -PCRE_DL_IMPORT extern void *(*pcre_malloc)(size_t); -PCRE_DL_IMPORT extern void (*pcre_free)(void *); - -#undef PCRE_DL_IMPORT - -/* Functions */ - -extern pcre *pcre_compile(const char *, int, const char **, int *, - const unsigned char *); -extern int pcre_copy_substring(const char *, int *, int, int, char *, int); -extern int pcre_exec(const pcre *, const pcre_extra *, const char *, - int, int, int, int *, int); -extern void pcre_free_substring(const char *); -extern void pcre_free_substring_list(const char **); -extern int pcre_get_substring(const char *, int *, int, int, const char **); -extern int pcre_get_substring_list(const char *, int *, int, const char ***); -extern int pcre_info(const pcre *, int *, int *); -extern int pcre_fullinfo(const pcre *, const pcre_extra *, int, void *); -extern unsigned const char *pcre_maketables(void); -extern pcre_extra *pcre_study(const pcre *, int, const char **); -extern const char *pcre_version(void); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* End of pcre.h */ diff --git a/pcre/pcregrep.c b/pcre/pcregrep.c deleted file mode 100644 index e8c934ef..00000000 --- a/pcre/pcregrep.c +++ /dev/null @@ -1,228 +0,0 @@ -/************************************************* -* pcregrep program * -*************************************************/ - -/* This is a grep program that uses the PCRE regular expression library to do -its pattern matching. */ - -#include -#include -#include -#include -#include "config.h" -#include "pcre.h" - -#define FALSE 0 -#define TRUE 1 - -typedef int BOOL; - - - -/************************************************* -* Global variables * -*************************************************/ - -static pcre *pattern; -static pcre_extra *hints; - -static BOOL count_only = FALSE; -static BOOL filenames_only = FALSE; -static BOOL invert = FALSE; -static BOOL number = FALSE; -static BOOL silent = FALSE; -static BOOL whole_lines = FALSE; - - - -#if ! HAVE_STRERROR -/************************************************* -* Provide strerror() for non-ANSI libraries * -*************************************************/ - -/* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror() -in their libraries, but can provide the same facility by this simple -alternative function. */ - -extern int sys_nerr; -extern char *sys_errlist[]; - -char * -strerror(int n) -{ -if (n < 0 || n >= sys_nerr) return "unknown error number"; -return sys_errlist[n]; -} -#endif /* HAVE_STRERROR */ - - - -/************************************************* -* Grep an individual file * -*************************************************/ - -static int -pcregrep(FILE *in, char *name) -{ -int rc = 1; -int linenumber = 0; -int count = 0; -int offsets[99]; -char buffer[BUFSIZ]; - -while (fgets(buffer, sizeof(buffer), in) != NULL) - { - BOOL match; - int length = (int)strlen(buffer); - if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0; - linenumber++; - - match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0; - if (match && whole_lines && offsets[1] != length) match = FALSE; - - if (match != invert) - { - if (count_only) count++; - - else if (filenames_only) - { - fprintf(stdout, "%s\n", (name == NULL)? "" : name); - return 0; - } - - else if (silent) return 0; - - else - { - if (name != NULL) fprintf(stdout, "%s:", name); - if (number) fprintf(stdout, "%d:", linenumber); - fprintf(stdout, "%s\n", buffer); - } - - rc = 0; - } - } - -if (count_only) - { - if (name != NULL) fprintf(stdout, "%s:", name); - fprintf(stdout, "%d\n", count); - } - -return rc; -} - - - - -/************************************************* -* Usage function * -*************************************************/ - -static int -usage(int rc) -{ -fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n"); -return rc; -} - - - - -/************************************************* -* Main program * -*************************************************/ - -int -main(int argc, char **argv) -{ -int i; -int rc = 1; -int options = 0; -int errptr; -const char *error; -BOOL filenames = TRUE; - -/* Process the options */ - -for (i = 1; i < argc; i++) - { - char *s; - if (argv[i][0] != '-') break; - s = argv[i] + 1; - while (*s != 0) - { - switch (*s++) - { - case 'c': count_only = TRUE; break; - case 'h': filenames = FALSE; break; - case 'i': options |= PCRE_CASELESS; break; - case 'l': filenames_only = TRUE; - case 'n': number = TRUE; break; - case 's': silent = TRUE; break; - case 'v': invert = TRUE; break; - case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break; - - case 'V': - fprintf(stderr, "PCRE version %s\n", pcre_version()); - break; - - default: - fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]); - return usage(2); - } - } - } - -/* There must be at least a regexp argument */ - -if (i >= argc) return usage(0); - -/* Compile the regular expression. */ - -pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL); -if (pattern == NULL) - { - fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error); - return 2; - } - -/* Study the regular expression, as we will be running it may times */ - -hints = pcre_study(pattern, 0, &error); -if (error != NULL) - { - fprintf(stderr, "pcregrep: error while studing regex: %s\n", error); - return 2; - } - -/* If there are no further arguments, do the business on stdin and exit */ - -if (i >= argc) return pcregrep(stdin, NULL); - -/* Otherwise, work through the remaining arguments as files. If there is only -one, don't give its name on the output. */ - -if (i == argc - 1) filenames = FALSE; -if (filenames_only) filenames = TRUE; - -for (; i < argc; i++) - { - FILE *in = fopen(argv[i], "r"); - if (in == NULL) - { - fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno)); - rc = 2; - } - else - { - int frc = pcregrep(in, filenames? argv[i] : NULL); - if (frc == 0 && rc == 1) rc = 0; - fclose(in); - } - } - -return rc; -} - -/* End */ diff --git a/pcre/pcreposix.c b/pcre/pcreposix.c deleted file mode 100644 index 519d2dd5..00000000 --- a/pcre/pcreposix.c +++ /dev/null @@ -1,280 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* -This is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. See -the file Tech.Notes for some information on the internals. - -This module is a wrapper that provides a POSIX API to the underlying PCRE -functions. - -Written by: Philip Hazel - - Copyright (c) 1997-2000 University of Cambridge - ------------------------------------------------------------------------------ -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. ------------------------------------------------------------------------------ -*/ - -#include "internal.h" -#include "pcreposix.h" -#include "stdlib.h" - - - -/* Corresponding tables of PCRE error messages and POSIX error codes. */ - -static const char *estring[] = { - ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10, - ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20, - ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30, - ERR31 }; - -static int eint[] = { - REG_EESCAPE, /* "\\ at end of pattern" */ - REG_EESCAPE, /* "\\c at end of pattern" */ - REG_EESCAPE, /* "unrecognized character follows \\" */ - REG_BADBR, /* "numbers out of order in {} quantifier" */ - REG_BADBR, /* "number too big in {} quantifier" */ - REG_EBRACK, /* "missing terminating ] for character class" */ - REG_ECTYPE, /* "invalid escape sequence in character class" */ - REG_ERANGE, /* "range out of order in character class" */ - REG_BADRPT, /* "nothing to repeat" */ - REG_BADRPT, /* "operand of unlimited repeat could match the empty string" */ - REG_ASSERT, /* "internal error: unexpected repeat" */ - REG_BADPAT, /* "unrecognized character after (?" */ - REG_ESIZE, /* "too many capturing parenthesized sub-patterns" */ - REG_EPAREN, /* "missing )" */ - REG_ESUBREG, /* "back reference to non-existent subpattern" */ - REG_INVARG, /* "erroffset passed as NULL" */ - REG_INVARG, /* "unknown option bit(s) set" */ - REG_EPAREN, /* "missing ) after comment" */ - REG_ESIZE, /* "too many sets of parentheses" */ - REG_ESIZE, /* "regular expression too large" */ - REG_ESPACE, /* "failed to get memory" */ - REG_EPAREN, /* "unmatched brackets" */ - REG_ASSERT, /* "internal error: code overflow" */ - REG_BADPAT, /* "unrecognized character after (?<" */ - REG_BADPAT, /* "lookbehind assertion is not fixed length" */ - REG_BADPAT, /* "malformed number after (?(" */ - REG_BADPAT, /* "conditional group containe more than two branches" */ - REG_BADPAT, /* "assertion expected after (?(" */ - REG_BADPAT, /* "(?p must be followed by )" */ - REG_ECTYPE, /* "unknown POSIX class name" */ - REG_BADPAT, /* "POSIX collating elements are not supported" */ - REG_INVARG, /* "this version of PCRE is not compiled with PCRE_UTF8 support" */ - REG_BADPAT, /* "characters with values > 255 are not yet supported in classes" */ - REG_BADPAT, /* "character value in \x{...} sequence is too large" */ - REG_BADPAT /* "invalid condition (?(0)" */ -}; - -/* Table of texts corresponding to POSIX error codes */ - -static const char *pstring[] = { - "", /* Dummy for value 0 */ - "internal error", /* REG_ASSERT */ - "invalid repeat counts in {}", /* BADBR */ - "pattern error", /* BADPAT */ - "? * + invalid", /* BADRPT */ - "unbalanced {}", /* EBRACE */ - "unbalanced []", /* EBRACK */ - "collation error - not relevant", /* ECOLLATE */ - "bad class", /* ECTYPE */ - "bad escape sequence", /* EESCAPE */ - "empty expression", /* EMPTY */ - "unbalanced ()", /* EPAREN */ - "bad range inside []", /* ERANGE */ - "expression too big", /* ESIZE */ - "failed to get memory", /* ESPACE */ - "bad back reference", /* ESUBREG */ - "bad argument", /* INVARG */ - "match failed" /* NOMATCH */ -}; - - - - -/************************************************* -* Translate PCRE text code to int * -*************************************************/ - -/* PCRE compile-time errors are given as strings defined as macros. We can just -look them up in a table to turn them into POSIX-style error codes. */ - -static int -pcre_posix_error_code(const char *s) -{ -size_t i; -for (i = 0; i < sizeof(estring)/sizeof(char *); i++) - if (strcmp(s, estring[i]) == 0) return eint[i]; -return REG_ASSERT; -} - - - -/************************************************* -* Translate error code to string * -*************************************************/ - -size_t -regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) -{ -const char *message, *addmessage; -size_t length, addlength; - -message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))? - "unknown error code" : pstring[errcode]; -length = strlen(message) + 1; - -addmessage = " at offset "; -addlength = (preg != NULL && (int)preg->re_erroffset != -1)? - strlen(addmessage) + 6 : 0; - -if (errbuf_size > 0) - { - if (addlength > 0 && errbuf_size >= length + addlength) - sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset); - else - { - strncpy(errbuf, message, errbuf_size - 1); - errbuf[errbuf_size-1] = 0; - } - } - -return length + addlength; -} - - - - -/************************************************* -* Free store held by a regex * -*************************************************/ - -void -regfree(regex_t *preg) -{ -(pcre_free)(preg->re_pcre); -} - - - - -/************************************************* -* Compile a regular expression * -*************************************************/ - -/* -Arguments: - preg points to a structure for recording the compiled expression - pattern the pattern to compile - cflags compilation flags - -Returns: 0 on success - various non-zero codes on failure -*/ - -int -regcomp(regex_t *preg, const char *pattern, int cflags) -{ -const char *errorptr; -int erroffset; -int options = 0; - -if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS; -if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE; - -preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL); -preg->re_erroffset = erroffset; - -if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr); - -preg->re_nsub = pcre_info(preg->re_pcre, NULL, NULL); -return 0; -} - - - - -/************************************************* -* Match a regular expression * -*************************************************/ - -/* Unfortunately, PCRE requires 3 ints of working space for each captured -substring, so we have to get and release working store instead of just using -the POSIX structures as was done in earlier releases when PCRE needed only 2 -ints. */ - -int -regexec(regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[], int eflags) -{ -int rc; -int options = 0; -int *ovector = NULL; - -if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL; -if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL; - -preg->re_erroffset = (size_t)(-1); /* Only has meaning after compile */ - -if (nmatch > 0) - { - ovector = (int *)malloc(sizeof(int) * nmatch * 3); - if (ovector == NULL) return REG_ESPACE; - } - -rc = pcre_exec(preg->re_pcre, NULL, string, (int)strlen(string), 0, options, - ovector, nmatch * 3); - -if (rc == 0) rc = nmatch; /* All captured slots were filled in */ - -if (rc >= 0) - { - size_t i; - for (i = 0; i < (size_t)rc; i++) - { - pmatch[i].rm_so = ovector[i*2]; - pmatch[i].rm_eo = ovector[i*2+1]; - } - if (ovector != NULL) free(ovector); - for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; - return 0; - } - -else - { - if (ovector != NULL) free(ovector); - switch(rc) - { - case PCRE_ERROR_NOMATCH: return REG_NOMATCH; - case PCRE_ERROR_NULL: return REG_INVARG; - case PCRE_ERROR_BADOPTION: return REG_INVARG; - case PCRE_ERROR_BADMAGIC: return REG_INVARG; - case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT; - case PCRE_ERROR_NOMEMORY: return REG_ESPACE; - default: return REG_ASSERT; - } - } -} - -/* End of pcreposix.c */ diff --git a/pcre/pcreposix.h b/pcre/pcreposix.h deleted file mode 100644 index 7660acbd..00000000 --- a/pcre/pcreposix.h +++ /dev/null @@ -1,88 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* Copyright (c) 1997-2000 University of Cambridge */ - -#ifndef _PCREPOSIX_H -#define _PCREPOSIX_H - -/* This is the header for the POSIX wrapper interface to the PCRE Perl- -Compatible Regular Expression library. It defines the things POSIX says should -be there. I hope. */ - -/* Have to include stdlib.h in order to ensure that size_t is defined. */ - -#include - -/* Allow for C++ users */ - -#ifdef __cplusplus -extern "C" { -#endif - -/* Options defined by POSIX. */ - -#define REG_ICASE 0x01 -#define REG_NEWLINE 0x02 -#define REG_NOTBOL 0x04 -#define REG_NOTEOL 0x08 - -/* These are not used by PCRE, but by defining them we make it easier -to slot PCRE into existing programs that make POSIX calls. */ - -#define REG_EXTENDED 0 -#define REG_NOSUB 0 - -/* Error values. Not all these are relevant or used by the wrapper. */ - -enum { - REG_ASSERT = 1, /* internal error ? */ - REG_BADBR, /* invalid repeat counts in {} */ - REG_BADPAT, /* pattern error */ - REG_BADRPT, /* ? * + invalid */ - REG_EBRACE, /* unbalanced {} */ - REG_EBRACK, /* unbalanced [] */ - REG_ECOLLATE, /* collation error - not relevant */ - REG_ECTYPE, /* bad class */ - REG_EESCAPE, /* bad escape sequence */ - REG_EMPTY, /* empty expression */ - REG_EPAREN, /* unbalanced () */ - REG_ERANGE, /* bad range inside [] */ - REG_ESIZE, /* expression too big */ - REG_ESPACE, /* failed to get memory */ - REG_ESUBREG, /* bad back reference */ - REG_INVARG, /* bad argument */ - REG_NOMATCH /* match failed */ -}; - - -/* The structure representing a compiled regular expression. */ - -typedef struct { - void *re_pcre; - size_t re_nsub; - size_t re_erroffset; -} regex_t; - -/* The structure in which a captured offset is returned. */ - -typedef int regoff_t; - -typedef struct { - regoff_t rm_so; - regoff_t rm_eo; -} regmatch_t; - -/* The functions */ - -extern int regcomp(regex_t *, const char *, int); -extern int regexec(regex_t *, const char *, size_t, regmatch_t *, int); -extern size_t regerror(int, const regex_t *, char *, size_t); -extern void regfree(regex_t *); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* End of pcreposix.h */ diff --git a/pcre/pcretest.c b/pcre/pcretest.c deleted file mode 100644 index ee5df5f0..00000000 --- a/pcre/pcretest.c +++ /dev/null @@ -1,1225 +0,0 @@ -/************************************************* -* PCRE testing program * -*************************************************/ - -#include -#include -#include -#include -#include -#include - -/* Use the internal info for displaying the results of pcre_study(). */ - -#include "internal.h" - -/* It is possible to compile this test program without including support for -testing the POSIX interface, though this is not available via the standard -Makefile. */ - -#if !defined NOPOSIX -#include "pcreposix.h" -#endif - -#ifndef CLOCKS_PER_SEC -#ifdef CLK_TCK -#define CLOCKS_PER_SEC CLK_TCK -#else -#define CLOCKS_PER_SEC 100 -#endif -#endif - -#define LOOPREPEAT 20000 - - -static FILE *outfile; -static int log_store = 0; -static size_t gotten_store; - - - -static int utf8_table1[] = { - 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff}; - -static int utf8_table2[] = { - 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; - -static int utf8_table3[] = { - 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; - - -/************************************************* -* Convert character value to UTF-8 * -*************************************************/ - -/* This function takes an integer value in the range 0 - 0x7fffffff -and encodes it as a UTF-8 character in 0 to 6 bytes. - -Arguments: - cvalue the character value - buffer pointer to buffer for result - at least 6 bytes long - -Returns: number of characters placed in the buffer - -1 if input character is negative - 0 if input character is positive but too big (only when - int is longer than 32 bits) -*/ - -static int -ord2utf8(int cvalue, unsigned char *buffer) -{ -register int i, j; -for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) - if (cvalue <= utf8_table1[i]) break; -if (i >= sizeof(utf8_table1)/sizeof(int)) return 0; -if (cvalue < 0) return -1; -*buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]); -cvalue >>= 6 - i; -for (j = 0; j < i; j++) - { - *buffer++ = 0x80 | (cvalue & 0x3f); - cvalue >>= 6; - } -return i + 1; -} - - -/************************************************* -* Convert UTF-8 string to value * -*************************************************/ - -/* This function takes one or more bytes that represents a UTF-8 character, -and returns the value of the character. - -Argument: - buffer a pointer to the byte vector - vptr a pointer to an int to receive the value - -Returns: > 0 => the number of bytes consumed - -6 to 0 => malformed UTF-8 character at offset = (-return) -*/ - -int -utf82ord(unsigned char *buffer, int *vptr) -{ -int c = *buffer++; -int d = c; -int i, j, s; - -for (i = -1; i < 6; i++) /* i is number of additional bytes */ - { - if ((d & 0x80) == 0) break; - d <<= 1; - } - -if (i == -1) { *vptr = c; return 1; } /* ascii character */ -if (i == 0 || i == 6) return 0; /* invalid UTF-8 */ - -/* i now has a value in the range 1-5 */ - -d = c & utf8_table3[i]; -s = 6 - i; - -for (j = 0; j < i; j++) - { - c = *buffer++; - if ((c & 0xc0) != 0x80) return -(j+1); - d |= (c & 0x3f) << s; - s += 6; - } - -/* Check that encoding was the correct unique one */ - -for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++) - if (d <= utf8_table1[j]) break; -if (j != i) return -(i+1); - -/* Valid value */ - -*vptr = d; -return i+1; -} - - - - - - -/* Debugging function to print the internal form of the regex. This is the same -code as contained in pcre.c under the DEBUG macro. */ - -static const char *OP_names[] = { - "End", "\\A", "\\B", "\\b", "\\D", "\\d", - "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z", - "Opt", "^", "$", "Any", "chars", "not", - "*", "*?", "+", "+?", "?", "??", "{", "{", "{", - "*", "*?", "+", "+?", "?", "??", "{", "{", "{", - "*", "*?", "+", "+?", "?", "??", "{", "{", "{", - "*", "*?", "+", "+?", "?", "??", "{", "{", - "class", "Ref", "Recurse", - "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", - "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref", - "Brazero", "Braminzero", "Bra" -}; - - -static void print_internals(pcre *re) -{ -unsigned char *code = ((real_pcre *)re)->code; - -fprintf(outfile, "------------------------------------------------------------------\n"); - -for(;;) - { - int c; - int charlength; - - fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code)); - - if (*code >= OP_BRA) - { - fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA); - code += 2; - } - - else switch(*code) - { - case OP_END: - fprintf(outfile, " %s\n", OP_names[*code]); - fprintf(outfile, "------------------------------------------------------------------\n"); - return; - - case OP_OPT: - fprintf(outfile, " %.2x %s", code[1], OP_names[*code]); - code++; - break; - - case OP_COND: - fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]); - code += 2; - break; - - case OP_CREF: - fprintf(outfile, " %.2d %s", code[1], OP_names[*code]); - code++; - break; - - case OP_CHARS: - charlength = *(++code); - fprintf(outfile, "%3d ", charlength); - while (charlength-- > 0) - if (isprint(c = *(++code))) fprintf(outfile, "%c", c); - else fprintf(outfile, "\\x%02x", c); - break; - - case OP_KETRMAX: - case OP_KETRMIN: - case OP_ALT: - case OP_KET: - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - case OP_ONCE: - fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]); - code += 2; - break; - - case OP_REVERSE: - fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]); - code += 2; - break; - - case OP_STAR: - case OP_MINSTAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_QUERY: - case OP_MINQUERY: - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - if (*code >= OP_TYPESTAR) - fprintf(outfile, " %s", OP_names[code[1]]); - else if (isprint(c = code[1])) fprintf(outfile, " %c", c); - else fprintf(outfile, " \\x%02x", c); - fprintf(outfile, "%s", OP_names[*code++]); - break; - - case OP_EXACT: - case OP_UPTO: - case OP_MINUPTO: - if (isprint(c = code[3])) fprintf(outfile, " %c{", c); - else fprintf(outfile, " \\x%02x{", c); - if (*code != OP_EXACT) fprintf(outfile, ","); - fprintf(outfile, "%d}", (code[1] << 8) + code[2]); - if (*code == OP_MINUPTO) fprintf(outfile, "?"); - code += 3; - break; - - case OP_TYPEEXACT: - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - fprintf(outfile, " %s{", OP_names[code[3]]); - if (*code != OP_TYPEEXACT) fprintf(outfile, "0,"); - fprintf(outfile, "%d}", (code[1] << 8) + code[2]); - if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?"); - code += 3; - break; - - case OP_NOT: - if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c); - else fprintf(outfile, " [^\\x%02x]", c); - break; - - case OP_NOTSTAR: - case OP_NOTMINSTAR: - case OP_NOTPLUS: - case OP_NOTMINPLUS: - case OP_NOTQUERY: - case OP_NOTMINQUERY: - if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c); - else fprintf(outfile, " [^\\x%02x]", c); - fprintf(outfile, "%s", OP_names[*code++]); - break; - - case OP_NOTEXACT: - case OP_NOTUPTO: - case OP_NOTMINUPTO: - if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c); - else fprintf(outfile, " [^\\x%02x]{", c); - if (*code != OP_NOTEXACT) fprintf(outfile, ","); - fprintf(outfile, "%d}", (code[1] << 8) + code[2]); - if (*code == OP_NOTMINUPTO) fprintf(outfile, "?"); - code += 3; - break; - - case OP_REF: - fprintf(outfile, " \\%d", *(++code)); - code++; - goto CLASS_REF_REPEAT; - - case OP_CLASS: - { - int i, min, max; - code++; - fprintf(outfile, " ["); - - for (i = 0; i < 256; i++) - { - if ((code[i/8] & (1 << (i&7))) != 0) - { - int j; - for (j = i+1; j < 256; j++) - if ((code[j/8] & (1 << (j&7))) == 0) break; - if (i == '-' || i == ']') fprintf(outfile, "\\"); - if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i); - if (--j > i) - { - fprintf(outfile, "-"); - if (j == '-' || j == ']') fprintf(outfile, "\\"); - if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j); - } - i = j; - } - } - fprintf(outfile, "]"); - code += 32; - - CLASS_REF_REPEAT: - - switch(*code) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - fprintf(outfile, "%s", OP_names[*code]); - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - min = (code[1] << 8) + code[2]; - max = (code[3] << 8) + code[4]; - if (max == 0) fprintf(outfile, "{%d,}", min); - else fprintf(outfile, "{%d,%d}", min, max); - if (*code == OP_CRMINRANGE) fprintf(outfile, "?"); - code += 4; - break; - - default: - code--; - } - } - break; - - /* Anything else is just a one-node item */ - - default: - fprintf(outfile, " %s", OP_names[*code]); - break; - } - - code++; - fprintf(outfile, "\n"); - } -} - - - -/* Character string printing function. A "normal" and a UTF-8 version. */ - -static void pchars(unsigned char *p, int length, int utf8) -{ -int c; -while (length-- > 0) - { - if (utf8) - { - int rc = utf82ord(p, &c); - if (rc > 0) - { - length -= rc - 1; - p += rc; - if (c < 256 && isprint(c)) fprintf(outfile, "%c", c); - else fprintf(outfile, "\\x{%02x}", c); - continue; - } - } - - /* Not UTF-8, or malformed UTF-8 */ - - if (isprint(c = *(p++))) fprintf(outfile, "%c", c); - else fprintf(outfile, "\\x%02x", c); - } -} - - - -/* Alternative malloc function, to test functionality and show the size of the -compiled re. */ - -static void *new_malloc(size_t size) -{ -gotten_store = size; -if (log_store) - fprintf(outfile, "Memory allocation (code space): %d\n", - (int)((int)size - offsetof(real_pcre, code[0]))); -return malloc(size); -} - - - - -/* Get one piece of information from the pcre_fullinfo() function */ - -static void new_info(pcre *re, pcre_extra *study, int option, void *ptr) -{ -int rc; -if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0) - fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option); -} - - - - -/* Read lines from named file or stdin and write to named file or stdout; lines -consist of a regular expression, in delimiters and optionally followed by -options, followed by a set of test data, terminated by an empty line. */ - -int main(int argc, char **argv) -{ -FILE *infile = stdin; -int options = 0; -int study_options = 0; -int op = 1; -int timeit = 0; -int showinfo = 0; -int showstore = 0; -int posix = 0; -int debug = 0; -int done = 0; -unsigned char buffer[30000]; -unsigned char dbuffer[1024]; - -/* Static so that new_malloc can use it. */ - -outfile = stdout; - -/* Scan options */ - -while (argc > 1 && argv[op][0] == '-') - { - if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0) - showstore = 1; - else if (strcmp(argv[op], "-t") == 0) timeit = 1; - else if (strcmp(argv[op], "-i") == 0) showinfo = 1; - else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1; - else if (strcmp(argv[op], "-p") == 0) posix = 1; - else - { - printf("*** Unknown option %s\n", argv[op]); - printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [ []]\n"); - printf(" -d debug: show compiled code; implies -i\n" - " -i show information about compiled pattern\n" - " -p use POSIX interface\n" - " -s output store information\n" - " -t time compilation and execution\n"); - return 1; - } - op++; - argc--; - } - -/* Sort out the input and output files */ - -if (argc > 1) - { - infile = fopen(argv[op], "r"); - if (infile == NULL) - { - printf("** Failed to open %s\n", argv[op]); - return 1; - } - } - -if (argc > 2) - { - outfile = fopen(argv[op+1], "w"); - if (outfile == NULL) - { - printf("** Failed to open %s\n", argv[op+1]); - return 1; - } - } - -/* Set alternative malloc function */ - -pcre_malloc = new_malloc; - -/* Heading line, then prompt for first regex if stdin */ - -fprintf(outfile, "PCRE version %s\n\n", pcre_version()); - -/* Main loop */ - -while (!done) - { - pcre *re = NULL; - pcre_extra *extra = NULL; - -#if !defined NOPOSIX /* There are still compilers that require no indent */ - regex_t preg; - int do_posix = 0; -#endif - - const char *error; - unsigned char *p, *pp, *ppp; - unsigned const char *tables = NULL; - int do_study = 0; - int do_debug = debug; - int do_G = 0; - int do_g = 0; - int do_showinfo = showinfo; - int do_showrest = 0; - int utf8 = 0; - int erroroffset, len, delimiter; - - if (infile == stdin) printf(" re> "); - if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break; - if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); - - p = buffer; - while (isspace(*p)) p++; - if (*p == 0) continue; - - /* Get the delimiter and seek the end of the pattern; if is isn't - complete, read more. */ - - delimiter = *p++; - - if (isalnum(delimiter) || delimiter == '\\') - { - fprintf(outfile, "** Delimiter must not be alphameric or \\\n"); - goto SKIP_DATA; - } - - pp = p; - - for(;;) - { - while (*pp != 0) - { - if (*pp == '\\' && pp[1] != 0) pp++; - else if (*pp == delimiter) break; - pp++; - } - if (*pp != 0) break; - - len = sizeof(buffer) - (pp - buffer); - if (len < 256) - { - fprintf(outfile, "** Expression too long - missing delimiter?\n"); - goto SKIP_DATA; - } - - if (infile == stdin) printf(" > "); - if (fgets((char *)pp, len, infile) == NULL) - { - fprintf(outfile, "** Unexpected EOF\n"); - done = 1; - goto CONTINUE; - } - if (infile != stdin) fprintf(outfile, "%s", (char *)pp); - } - - /* If the first character after the delimiter is backslash, make - the pattern end with backslash. This is purely to provide a way - of testing for the error message when a pattern ends with backslash. */ - - if (pp[1] == '\\') *pp++ = '\\'; - - /* Terminate the pattern at the delimiter */ - - *pp++ = 0; - - /* Look for options after final delimiter */ - - options = 0; - study_options = 0; - log_store = showstore; /* default from command line */ - - while (*pp != 0) - { - switch (*pp++) - { - case 'g': do_g = 1; break; - case 'i': options |= PCRE_CASELESS; break; - case 'm': options |= PCRE_MULTILINE; break; - case 's': options |= PCRE_DOTALL; break; - case 'x': options |= PCRE_EXTENDED; break; - - case '+': do_showrest = 1; break; - case 'A': options |= PCRE_ANCHORED; break; - case 'D': do_debug = do_showinfo = 1; break; - case 'E': options |= PCRE_DOLLAR_ENDONLY; break; - case 'G': do_G = 1; break; - case 'I': do_showinfo = 1; break; - case 'M': log_store = 1; break; - -#if !defined NOPOSIX - case 'P': do_posix = 1; break; -#endif - - case 'S': do_study = 1; break; - case 'U': options |= PCRE_UNGREEDY; break; - case 'X': options |= PCRE_EXTRA; break; - case '8': options |= PCRE_UTF8; utf8 = 1; break; - - case 'L': - ppp = pp; - while (*ppp != '\n' && *ppp != ' ') ppp++; - *ppp = 0; - if (setlocale(LC_CTYPE, (const char *)pp) == NULL) - { - fprintf(outfile, "** Failed to set locale \"%s\"\n", pp); - goto SKIP_DATA; - } - tables = pcre_maketables(); - pp = ppp; - break; - - case '\n': case ' ': break; - default: - fprintf(outfile, "** Unknown option '%c'\n", pp[-1]); - goto SKIP_DATA; - } - } - - /* Handle compiling via the POSIX interface, which doesn't support the - timing, showing, or debugging options, nor the ability to pass over - local character tables. */ - -#if !defined NOPOSIX - if (posix || do_posix) - { - int rc; - int cflags = 0; - if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE; - if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE; - rc = regcomp(&preg, (char *)p, cflags); - - /* Compilation failed; go back for another re, skipping to blank line - if non-interactive. */ - - if (rc != 0) - { - (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer)); - fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer); - goto SKIP_DATA; - } - } - - /* Handle compiling via the native interface */ - - else -#endif /* !defined NOPOSIX */ - - { - if (timeit) - { - register int i; - clock_t time_taken; - clock_t start_time = clock(); - for (i = 0; i < LOOPREPEAT; i++) - { - re = pcre_compile((char *)p, options, &error, &erroroffset, tables); - if (re != NULL) free(re); - } - time_taken = clock() - start_time; - fprintf(outfile, "Compile time %.3f milliseconds\n", - ((double)time_taken * 1000.0) / - ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC)); - } - - re = pcre_compile((char *)p, options, &error, &erroroffset, tables); - - /* Compilation failed; go back for another re, skipping to blank line - if non-interactive. */ - - if (re == NULL) - { - fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset); - SKIP_DATA: - if (infile != stdin) - { - for (;;) - { - if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) - { - done = 1; - goto CONTINUE; - } - len = (int)strlen((char *)buffer); - while (len > 0 && isspace(buffer[len-1])) len--; - if (len == 0) break; - } - fprintf(outfile, "\n"); - } - goto CONTINUE; - } - - /* Compilation succeeded; print data if required. There are now two - info-returning functions. The old one has a limited interface and - returns only limited data. Check that it agrees with the newer one. */ - - if (do_showinfo) - { - int old_first_char, old_options, old_count; - int count, backrefmax, first_char, need_char; - size_t size; - - if (do_debug) print_internals(re); - - new_info(re, NULL, PCRE_INFO_OPTIONS, &options); - new_info(re, NULL, PCRE_INFO_SIZE, &size); - new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count); - new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax); - new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char); - new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char); - - old_count = pcre_info(re, &old_options, &old_first_char); - if (count < 0) fprintf(outfile, - "Error %d from pcre_info()\n", count); - else - { - if (old_count != count) fprintf(outfile, - "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count, - old_count); - - if (old_first_char != first_char) fprintf(outfile, - "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n", - first_char, old_first_char); - - if (old_options != options) fprintf(outfile, - "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options, - old_options); - } - - if (size != gotten_store) fprintf(outfile, - "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n", - size, gotten_store); - - fprintf(outfile, "Capturing subpattern count = %d\n", count); - if (backrefmax > 0) - fprintf(outfile, "Max back reference = %d\n", backrefmax); - if (options == 0) fprintf(outfile, "No options\n"); - else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n", - ((options & PCRE_ANCHORED) != 0)? " anchored" : "", - ((options & PCRE_CASELESS) != 0)? " caseless" : "", - ((options & PCRE_EXTENDED) != 0)? " extended" : "", - ((options & PCRE_MULTILINE) != 0)? " multiline" : "", - ((options & PCRE_DOTALL) != 0)? " dotall" : "", - ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", - ((options & PCRE_EXTRA) != 0)? " extra" : "", - ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "", - ((options & PCRE_UTF8) != 0)? " utf8" : ""); - - if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0) - fprintf(outfile, "Case state changes\n"); - - if (first_char == -1) - { - fprintf(outfile, "First char at start or follows \\n\n"); - } - else if (first_char < 0) - { - fprintf(outfile, "No first char\n"); - } - else - { - if (isprint(first_char)) - fprintf(outfile, "First char = \'%c\'\n", first_char); - else - fprintf(outfile, "First char = %d\n", first_char); - } - - if (need_char < 0) - { - fprintf(outfile, "No need char\n"); - } - else - { - if (isprint(need_char)) - fprintf(outfile, "Need char = \'%c\'\n", need_char); - else - fprintf(outfile, "Need char = %d\n", need_char); - } - } - - /* If /S was present, study the regexp to generate additional info to - help with the matching. */ - - if (do_study) - { - if (timeit) - { - register int i; - clock_t time_taken; - clock_t start_time = clock(); - for (i = 0; i < LOOPREPEAT; i++) - extra = pcre_study(re, study_options, &error); - time_taken = clock() - start_time; - if (extra != NULL) free(extra); - fprintf(outfile, " Study time %.3f milliseconds\n", - ((double)time_taken * 1000.0)/ - ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC)); - } - - extra = pcre_study(re, study_options, &error); - if (error != NULL) - fprintf(outfile, "Failed to study: %s\n", error); - else if (extra == NULL) - fprintf(outfile, "Study returned NULL\n"); - - else if (do_showinfo) - { - uschar *start_bits = NULL; - new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits); - if (start_bits == NULL) - fprintf(outfile, "No starting character set\n"); - else - { - int i; - int c = 24; - fprintf(outfile, "Starting character set: "); - for (i = 0; i < 256; i++) - { - if ((start_bits[i/8] & (1<<(i%8))) != 0) - { - if (c > 75) - { - fprintf(outfile, "\n "); - c = 2; - } - if (isprint(i) && i != ' ') - { - fprintf(outfile, "%c ", i); - c += 2; - } - else - { - fprintf(outfile, "\\x%02x ", i); - c += 5; - } - } - } - fprintf(outfile, "\n"); - } - } - } - } - - /* Read data lines and test them */ - - for (;;) - { - unsigned char *q; - unsigned char *bptr = dbuffer; - int count, c; - int copystrings = 0; - int getstrings = 0; - int getlist = 0; - int gmatched = 0; - int start_offset = 0; - int g_notempty = 0; - int offsets[45]; - int size_offsets = sizeof(offsets)/sizeof(int); - - options = 0; - - if (infile == stdin) printf("data> "); - if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) - { - done = 1; - goto CONTINUE; - } - if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); - - len = (int)strlen((char *)buffer); - while (len > 0 && isspace(buffer[len-1])) len--; - buffer[len] = 0; - if (len == 0) break; - - p = buffer; - while (isspace(*p)) p++; - - q = dbuffer; - while ((c = *p++) != 0) - { - int i = 0; - int n = 0; - if (c == '\\') switch ((c = *p++)) - { - case 'a': c = 7; break; - case 'b': c = '\b'; break; - case 'e': c = 27; break; - case 'f': c = '\f'; break; - case 'n': c = '\n'; break; - case 'r': c = '\r'; break; - case 't': c = '\t'; break; - case 'v': c = '\v'; break; - - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - c -= '0'; - while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') - c = c * 8 + *p++ - '0'; - break; - - case 'x': - - /* Handle \x{..} specially - new Perl thing for utf8 */ - - if (*p == '{') - { - unsigned char *pt = p; - c = 0; - while (isxdigit(*(++pt))) - c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W'); - if (*pt == '}') - { - unsigned char buffer[8]; - int ii, utn; - utn = ord2utf8(c, buffer); - for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii]; - c = buffer[ii]; /* Last byte */ - p = pt + 1; - break; - } - /* Not correct form; fall through */ - } - - /* Ordinary \x */ - - c = 0; - while (i++ < 2 && isxdigit(*p)) - { - c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W'); - p++; - } - break; - - case 0: /* Allows for an empty line */ - p--; - continue; - - case 'A': /* Option setting */ - options |= PCRE_ANCHORED; - continue; - - case 'B': - options |= PCRE_NOTBOL; - continue; - - case 'C': - while(isdigit(*p)) n = n * 10 + *p++ - '0'; - copystrings |= 1 << n; - continue; - - case 'G': - while(isdigit(*p)) n = n * 10 + *p++ - '0'; - getstrings |= 1 << n; - continue; - - case 'L': - getlist = 1; - continue; - - case 'N': - options |= PCRE_NOTEMPTY; - continue; - - case 'O': - while(isdigit(*p)) n = n * 10 + *p++ - '0'; - if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n; - continue; - - case 'Z': - options |= PCRE_NOTEOL; - continue; - } - *q++ = c; - } - *q = 0; - len = q - dbuffer; - - /* Handle matching via the POSIX interface, which does not - support timing. */ - -#if !defined NOPOSIX - if (posix || do_posix) - { - int rc; - int eflags = 0; - regmatch_t pmatch[sizeof(offsets)/sizeof(int)]; - if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL; - if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL; - - rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags); - - if (rc != 0) - { - (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer)); - fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer); - } - else - { - size_t i; - for (i = 0; i < size_offsets; i++) - { - if (pmatch[i].rm_so >= 0) - { - fprintf(outfile, "%2d: ", (int)i); - pchars(dbuffer + pmatch[i].rm_so, - pmatch[i].rm_eo - pmatch[i].rm_so, utf8); - fprintf(outfile, "\n"); - if (i == 0 && do_showrest) - { - fprintf(outfile, " 0+ "); - pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8); - fprintf(outfile, "\n"); - } - } - } - } - } - - /* Handle matching via the native interface - repeats for /g and /G */ - - else -#endif /* !defined NOPOSIX */ - - for (;; gmatched++) /* Loop for /g or /G */ - { - if (timeit) - { - register int i; - clock_t time_taken; - clock_t start_time = clock(); - for (i = 0; i < LOOPREPEAT; i++) - count = pcre_exec(re, extra, (char *)bptr, len, - start_offset, options | g_notempty, offsets, size_offsets); - time_taken = clock() - start_time; - fprintf(outfile, "Execute time %.3f milliseconds\n", - ((double)time_taken * 1000.0)/ - ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC)); - } - - count = pcre_exec(re, extra, (char *)bptr, len, - start_offset, options | g_notempty, offsets, size_offsets); - - if (count == 0) - { - fprintf(outfile, "Matched, but too many substrings\n"); - count = size_offsets/3; - } - - /* Matched */ - - if (count >= 0) - { - int i; - for (i = 0; i < count * 2; i += 2) - { - if (offsets[i] < 0) - fprintf(outfile, "%2d: \n", i/2); - else - { - fprintf(outfile, "%2d: ", i/2); - pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8); - fprintf(outfile, "\n"); - if (i == 0) - { - if (do_showrest) - { - fprintf(outfile, " 0+ "); - pchars(bptr + offsets[i+1], len - offsets[i+1], utf8); - fprintf(outfile, "\n"); - } - } - } - } - - for (i = 0; i < 32; i++) - { - if ((copystrings & (1 << i)) != 0) - { - char copybuffer[16]; - int rc = pcre_copy_substring((char *)bptr, offsets, count, - i, copybuffer, sizeof(copybuffer)); - if (rc < 0) - fprintf(outfile, "copy substring %d failed %d\n", i, rc); - else - fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc); - } - } - - for (i = 0; i < 32; i++) - { - if ((getstrings & (1 << i)) != 0) - { - const char *substring; - int rc = pcre_get_substring((char *)bptr, offsets, count, - i, &substring); - if (rc < 0) - fprintf(outfile, "get substring %d failed %d\n", i, rc); - else - { - fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc); - /* free((void *)substring); */ - pcre_free_substring(substring); - } - } - } - - if (getlist) - { - const char **stringlist; - int rc = pcre_get_substring_list((char *)bptr, offsets, count, - &stringlist); - if (rc < 0) - fprintf(outfile, "get substring list failed %d\n", rc); - else - { - for (i = 0; i < count; i++) - fprintf(outfile, "%2dL %s\n", i, stringlist[i]); - if (stringlist[i] != NULL) - fprintf(outfile, "string list not terminated by NULL\n"); - /* free((void *)stringlist); */ - pcre_free_substring_list(stringlist); - } - } - } - - /* Failed to match. If this is a /g or /G loop and we previously set - g_notempty after a null match, this is not necessarily the end. - We want to advance the start offset, and continue. Fudge the offset - values to achieve this. We won't be at the end of the string - that - was checked before setting g_notempty. */ - - else - { - if (g_notempty != 0) - { - offsets[0] = start_offset; - offsets[1] = start_offset + 1; - } - else - { - if (gmatched == 0) /* Error if no previous matches */ - { - if (count == -1) fprintf(outfile, "No match\n"); - else fprintf(outfile, "Error %d\n", count); - } - break; /* Out of the /g loop */ - } - } - - /* If not /g or /G we are done */ - - if (!do_g && !do_G) break; - - /* If we have matched an empty string, first check to see if we are at - the end of the subject. If so, the /g loop is over. Otherwise, mimic - what Perl's /g options does. This turns out to be rather cunning. First - we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the - same point. If this fails (picked up above) we advance to the next - character. */ - - g_notempty = 0; - if (offsets[0] == offsets[1]) - { - if (offsets[0] == len) break; - g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED; - } - - /* For /g, update the start offset, leaving the rest alone */ - - if (do_g) start_offset = offsets[1]; - - /* For /G, update the pointer and length */ - - else - { - bptr += offsets[1]; - len -= offsets[1]; - } - } /* End of loop for /g and /G */ - } /* End of loop for data lines */ - - CONTINUE: - -#if !defined NOPOSIX - if (posix || do_posix) regfree(&preg); -#endif - - if (re != NULL) free(re); - if (extra != NULL) free(extra); - if (tables != NULL) - { - free((void *)tables); - setlocale(LC_CTYPE, "C"); - } - } - -fprintf(outfile, "\n"); -return 0; -} - -/* End */ diff --git a/pcre/study.c b/pcre/study.c deleted file mode 100644 index 676db946..00000000 --- a/pcre/study.c +++ /dev/null @@ -1,397 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* -This is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. See -the file Tech.Notes for some information on the internals. - -Written by: Philip Hazel - - Copyright (c) 1997-2000 University of Cambridge - ------------------------------------------------------------------------------ -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. ------------------------------------------------------------------------------ -*/ - - -/* Include the internals header, which itself includes Standard C headers plus -the external pcre header. */ - -#include "internal.h" - - - -/************************************************* -* Set a bit and maybe its alternate case * -*************************************************/ - -/* Given a character, set its bit in the table, and also the bit for the other -version of a letter if we are caseless. - -Arguments: - start_bits points to the bit map - c is the character - caseless the caseless flag - cd the block with char table pointers - -Returns: nothing -*/ - -static void -set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd) -{ -start_bits[c/8] |= (1 << (c&7)); -if (caseless && (cd->ctypes[c] & ctype_letter) != 0) - start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7)); -} - - - -/************************************************* -* Create bitmap of starting chars * -*************************************************/ - -/* This function scans a compiled unanchored expression and attempts to build a -bitmap of the set of initial characters. If it can't, it returns FALSE. As time -goes by, we may be able to get more clever at doing this. - -Arguments: - code points to an expression - start_bits points to a 32-byte table, initialized to 0 - caseless the current state of the caseless flag - cd the block with char table pointers - -Returns: TRUE if table built, FALSE otherwise -*/ - -static BOOL -set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless, - compile_data *cd) -{ -register int c; - -/* This next statement and the later reference to dummy are here in order to -trick the optimizer of the IBM C compiler for OS/2 into generating correct -code. Apparently IBM isn't going to fix the problem, and we would rather not -disable optimization (in this module it actually makes a big difference, and -the pcre module can use all the optimization it can get). */ - -volatile int dummy; - -do - { - const uschar *tcode = code + 3; - BOOL try_next = TRUE; - - while (try_next) - { - try_next = FALSE; - - /* If a branch starts with a bracket or a positive lookahead assertion, - recurse to set bits from within them. That's all for this branch. */ - - if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT) - { - if (!set_start_bits(tcode, start_bits, caseless, cd)) - return FALSE; - } - - else switch(*tcode) - { - default: - return FALSE; - - /* Skip over lookbehind and negative lookahead assertions */ - - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - try_next = TRUE; - do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT); - tcode += 3; - break; - - /* Skip over an option setting, changing the caseless flag */ - - case OP_OPT: - caseless = (tcode[1] & PCRE_CASELESS) != 0; - tcode += 2; - try_next = TRUE; - break; - - /* BRAZERO does the bracket, but carries on. */ - - case OP_BRAZERO: - case OP_BRAMINZERO: - if (!set_start_bits(++tcode, start_bits, caseless, cd)) - return FALSE; - dummy = 1; - do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT); - tcode += 3; - try_next = TRUE; - break; - - /* Single-char * or ? sets the bit and tries the next item */ - - case OP_STAR: - case OP_MINSTAR: - case OP_QUERY: - case OP_MINQUERY: - set_bit(start_bits, tcode[1], caseless, cd); - tcode += 2; - try_next = TRUE; - break; - - /* Single-char upto sets the bit and tries the next */ - - case OP_UPTO: - case OP_MINUPTO: - set_bit(start_bits, tcode[3], caseless, cd); - tcode += 4; - try_next = TRUE; - break; - - /* At least one single char sets the bit and stops */ - - case OP_EXACT: /* Fall through */ - tcode++; - - case OP_CHARS: /* Fall through */ - tcode++; - - case OP_PLUS: - case OP_MINPLUS: - set_bit(start_bits, tcode[1], caseless, cd); - break; - - /* Single character type sets the bits and stops */ - - case OP_NOT_DIGIT: - for (c = 0; c < 32; c++) - start_bits[c] |= ~cd->cbits[c+cbit_digit]; - break; - - case OP_DIGIT: - for (c = 0; c < 32; c++) - start_bits[c] |= cd->cbits[c+cbit_digit]; - break; - - case OP_NOT_WHITESPACE: - for (c = 0; c < 32; c++) - start_bits[c] |= ~cd->cbits[c+cbit_space]; - break; - - case OP_WHITESPACE: - for (c = 0; c < 32; c++) - start_bits[c] |= cd->cbits[c+cbit_space]; - break; - - case OP_NOT_WORDCHAR: - for (c = 0; c < 32; c++) - start_bits[c] |= ~cd->cbits[c+cbit_word]; - break; - - case OP_WORDCHAR: - for (c = 0; c < 32; c++) - start_bits[c] |= cd->cbits[c+cbit_word]; - break; - - /* One or more character type fudges the pointer and restarts, knowing - it will hit a single character type and stop there. */ - - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - tcode++; - try_next = TRUE; - break; - - case OP_TYPEEXACT: - tcode += 3; - try_next = TRUE; - break; - - /* Zero or more repeats of character types set the bits and then - try again. */ - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - tcode += 2; /* Fall through */ - - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - switch(tcode[1]) - { - case OP_NOT_DIGIT: - for (c = 0; c < 32; c++) - start_bits[c] |= ~cd->cbits[c+cbit_digit]; - break; - - case OP_DIGIT: - for (c = 0; c < 32; c++) - start_bits[c] |= cd->cbits[c+cbit_digit]; - break; - - case OP_NOT_WHITESPACE: - for (c = 0; c < 32; c++) - start_bits[c] |= ~cd->cbits[c+cbit_space]; - break; - - case OP_WHITESPACE: - for (c = 0; c < 32; c++) - start_bits[c] |= cd->cbits[c+cbit_space]; - break; - - case OP_NOT_WORDCHAR: - for (c = 0; c < 32; c++) - start_bits[c] |= ~cd->cbits[c+cbit_word]; - break; - - case OP_WORDCHAR: - for (c = 0; c < 32; c++) - start_bits[c] |= cd->cbits[c+cbit_word]; - break; - } - - tcode += 2; - try_next = TRUE; - break; - - /* Character class: set the bits and either carry on or not, - according to the repeat count. */ - - case OP_CLASS: - { - tcode++; - for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; - tcode += 32; - switch (*tcode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRQUERY: - case OP_CRMINQUERY: - tcode++; - try_next = TRUE; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - if (((tcode[1] << 8) + tcode[2]) == 0) - { - tcode += 5; - try_next = TRUE; - } - break; - } - } - break; /* End of class handling */ - - } /* End of switch */ - } /* End of try_next loop */ - - code += (code[1] << 8) + code[2]; /* Advance to next branch */ - } -while (*code == OP_ALT); -return TRUE; -} - - - -/************************************************* -* Study a compiled expression * -*************************************************/ - -/* This function is handed a compiled expression that it must study to produce -information that will speed up the matching. It returns a pcre_extra block -which then gets handed back to pcre_exec(). - -Arguments: - re points to the compiled expression - options contains option bits - errorptr points to where to place error messages; - set NULL unless error - -Returns: pointer to a pcre_extra block, - NULL on error or if no optimization possible -*/ - -pcre_extra * -pcre_study(const pcre *external_re, int options, const char **errorptr) -{ -uschar start_bits[32]; -real_pcre_extra *extra; -const real_pcre *re = (const real_pcre *)external_re; -compile_data compile_block; - -*errorptr = NULL; - -if (re == NULL || re->magic_number != MAGIC_NUMBER) - { - *errorptr = "argument is not a compiled regular expression"; - return NULL; - } - -if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) - { - *errorptr = "unknown or incorrect option bit(s) set"; - return NULL; - } - -/* For an anchored pattern, or an unchored pattern that has a first char, or a -multiline pattern that matches only at "line starts", no further processing at -present. */ - -if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0) - return NULL; - -/* Set the character tables in the block which is passed around */ - -compile_block.lcc = re->tables + lcc_offset; -compile_block.fcc = re->tables + fcc_offset; -compile_block.cbits = re->tables + cbits_offset; -compile_block.ctypes = re->tables + ctypes_offset; - -/* See if we can find a fixed set of initial characters for the pattern. */ - -memset(start_bits, 0, 32 * sizeof(uschar)); -if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0, - &compile_block)) return NULL; - -/* Get an "extra" block and put the information therein. */ - -extra = (real_pcre_extra *)(pcre_malloc)(sizeof(real_pcre_extra)); - -if (extra == NULL) - { - *errorptr = "failed to get memory"; - return NULL; - } - -extra->options = PCRE_STUDY_MAPPED; -memcpy(extra->start_bits, start_bits, sizeof(start_bits)); - -return (pcre_extra *)extra; -} - -/* End of study.c */ diff --git a/pcre/vc_dftables.dsp b/pcre/vc_dftables.dsp deleted file mode 100755 index 60404f8c..00000000 --- a/pcre/vc_dftables.dsp +++ /dev/null @@ -1,296 +0,0 @@ -# Microsoft Developer Studio Project File - Name="vc_dftables" - Package Owner=<4> -# Microsoft Developer Studio Generated Build File, Format Version 5.00 -# ** DO NOT EDIT ** - -# TARGTYPE "Win32 (x86) Console Application" 0x0103 - -CFG=vc_dftables - Win32 Debug with Win32 threads -!MESSAGE This is not a valid makefile. To build this project using NMAKE, -!MESSAGE use the Export Makefile command and run -!MESSAGE -!MESSAGE NMAKE /f "vc_dftables.mak". -!MESSAGE -!MESSAGE You can specify a configuration when running NMAKE -!MESSAGE by defining the macro CFG on the command line. For example: -!MESSAGE -!MESSAGE NMAKE /f "vc_dftables.mak"\ - CFG="vc_dftables - Win32 Debug with Win32 threads" -!MESSAGE -!MESSAGE Possible choices for configuration are: -!MESSAGE -!MESSAGE "vc_dftables - Win32 Release" (based on\ - "Win32 (x86) Console Application") -!MESSAGE "vc_dftables - Win32 Debug" (based on\ - "Win32 (x86) Console Application") -!MESSAGE "vc_dftables - Win32 Debug with Win32 threads" (based on\ - "Win32 (x86) Console Application") -!MESSAGE "vc_dftables - Win32 Release with Win32 threads" (based on\ - "Win32 (x86) Console Application") -!MESSAGE - -# Begin Project -# PROP Scc_ProjName "" -# PROP Scc_LocalPath "" -CPP=cl.exe -RSC=rc.exe - -!IF "$(CFG)" == "vc_dftables - Win32 Release" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 0 -# PROP BASE Output_Dir "Release" -# PROP BASE Intermediate_Dir "Release" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 0 -# PROP Output_Dir "vc_dftables" -# PROP Intermediate_Dir "vc_dftables" -# PROP Target_Dir "" -# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD BASE RSC /l 0x809 /d "NDEBUG" -# ADD RSC /l 0x809 /d "NDEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 -# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 -# Begin Special Build Tool -OutDir=.\vc_dftables -SOURCE=$(InputPath) -PostBuild_Desc=Running program to generate chartables.c -PostBuild_Cmds=$(OutDir)\vc_dftables.exe >$(OutDir)\..\chartables.c -# End Special Build Tool - -!ELSEIF "$(CFG)" == "vc_dftables - Win32 Debug" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 1 -# PROP BASE Output_Dir "Debug" -# PROP BASE Intermediate_Dir "Debug" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 1 -# PROP Output_Dir "vc_dftables_dbg" -# PROP Intermediate_Dir "vc_dftables_dbg" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD BASE RSC /l 0x809 /d "_DEBUG" -# ADD RSC /l 0x809 /d "_DEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept -# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept -# Begin Special Build Tool -OutDir=.\vc_dftables_dbg -SOURCE=$(InputPath) -PostBuild_Desc=Running program to generate chartables.c -PostBuild_Cmds=$(OutDir)\vc_dftables.exe >$(OutDir)\..\chartables.c -# End Special Build Tool - -!ELSEIF "$(CFG)" == "vc_dftables - Win32 Debug with Win32 threads" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 1 -# PROP BASE Output_Dir "vc_dftab" -# PROP BASE Intermediate_Dir "vc_dftab" -# PROP BASE Ignore_Export_Lib 0 -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 1 -# PROP Output_Dir "vc_dftables_dbg" -# PROP Intermediate_Dir "vc_dftables_dbg" -# PROP Ignore_Export_Lib 0 -# PROP Target_Dir "" -# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD BASE RSC /l 0x809 /d "_DEBUG" -# ADD RSC /l 0x809 /d "_DEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept -# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept -# Begin Special Build Tool -OutDir=.\vc_dftables_dbg -SOURCE=$(InputPath) -PostBuild_Desc=Running program to generate chartables.c -PostBuild_Cmds=$(OutDir)\vc_dftables.exe >$(OutDir)\..\chartables.c -# End Special Build Tool - -!ELSEIF "$(CFG)" == "vc_dftables - Win32 Release with Win32 threads" - -# PROP BASE Use_MFC 0 -# PROP BASE Use_Debug_Libraries 0 -# PROP BASE Output_Dir "vc_dfta0" -# PROP BASE Intermediate_Dir "vc_dfta0" -# PROP BASE Target_Dir "" -# PROP Use_MFC 0 -# PROP Use_Debug_Libraries 0 -# PROP Output_Dir "vc_dftables" -# PROP Intermediate_Dir "vc_dftables" -# PROP Target_Dir "" -# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD BASE RSC /l 0x809 /d "NDEBUG" -# ADD RSC /l 0x809 /d "NDEBUG" -BSC32=bscmake.exe -# ADD BASE BSC32 /nologo -# ADD BSC32 /nologo -LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 -# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 -# Begin Special Build Tool -OutDir=.\vc_dftables -SOURCE=$(InputPath) -PostBuild_Desc=Running program to generate chartables.c -PostBuild_Cmds=$(OutDir)\vc_dftables.exe >$(OutDir)\..\chartables.c -# End Special Build Tool - -!ENDIF - -# Begin Target - -# Name "vc_dftables - Win32 Release" -# Name "vc_dftables - Win32 Debug" -# Name "vc_dftables - Win32 Debug with Win32 threads" -# Name "vc_dftables - Win32 Release with Win32 threads" -# Begin Group "File Copy" - -# PROP Default_Filter "" -# Begin Source File - -SOURCE=..\vc_config_pthreads.h - -!IF "$(CFG)" == "vc_dftables - Win32 Release" - -# PROP Ignore_Default_Tool 1 -# Begin Custom Build - Copying vc_config_pthreads.h -WkspDir=. -InputPath=..\vc_config_pthreads.h - -"$(WkspDir)\..\config.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy "$(InputPath)" "$(WkspDir)\..\config.h" - -# End Custom Build - -!ELSEIF "$(CFG)" == "vc_dftables - Win32 Debug" - -# PROP Ignore_Default_Tool 1 -# Begin Custom Build - Copying vc_config_pthreads.h -WkspDir=. -InputPath=..\vc_config_pthreads.h - -"$(WkspDir)\..\config.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy "$(InputPath)" "$(WkspDir)\..\config.h" - -# End Custom Build - -!ELSEIF "$(CFG)" == "vc_dftables - Win32 Debug with Win32 threads" - -# PROP Exclude_From_Build 1 -# PROP Ignore_Default_Tool 1 - -!ELSEIF "$(CFG)" == "vc_dftables - Win32 Release with Win32 threads" - -# PROP Exclude_From_Build 1 -# PROP Ignore_Default_Tool 1 - -!ENDIF - -# End Source File -# Begin Source File - -SOURCE=..\vc_config_winthreads.h - -!IF "$(CFG)" == "vc_dftables - Win32 Release" - -# PROP Exclude_From_Build 1 -# PROP Ignore_Default_Tool 1 - -!ELSEIF "$(CFG)" == "vc_dftables - Win32 Debug" - -# PROP Exclude_From_Build 1 -# PROP Ignore_Default_Tool 1 - -!ELSEIF "$(CFG)" == "vc_dftables - Win32 Debug with Win32 threads" - -# PROP Ignore_Default_Tool 1 -# Begin Custom Build - Copying vc_config_winthreads.h -WkspDir=. -InputPath=..\vc_config_winthreads.h - -"$(WkspDir)\..\config.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy "$(InputPath)" "$(WkspDir)\..\config.h" - -# End Custom Build - -!ELSEIF "$(CFG)" == "vc_dftables - Win32 Release with Win32 threads" - -# PROP Ignore_Default_Tool 1 -# Begin Custom Build - Copying vc_config_winthreads.h -WkspDir=. -InputPath=..\vc_config_winthreads.h - -"$(WkspDir)\..\config.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy "$(InputPath)" "$(WkspDir)\..\config.h" - -# End Custom Build - -!ENDIF - -# End Source File -# End Group -# Begin Source File - -SOURCE=..\config.h -# End Source File -# Begin Source File - -SOURCE=.\config.h -# End Source File -# Begin Source File - -SOURCE=.\dftables.c -# End Source File -# Begin Source File - -SOURCE=.\internal.h -# End Source File -# Begin Source File - -SOURCE=.\maketables.c - -!IF "$(CFG)" == "vc_dftables - Win32 Release" - -# PROP Exclude_From_Build 1 - -!ELSEIF "$(CFG)" == "vc_dftables - Win32 Debug" - -# PROP Exclude_From_Build 1 - -!ELSEIF "$(CFG)" == "vc_dftables - Win32 Debug with Win32 threads" - -# PROP BASE Exclude_From_Build 1 -# PROP Exclude_From_Build 1 - -!ELSEIF "$(CFG)" == "vc_dftables - Win32 Release with Win32 threads" - -# PROP BASE Exclude_From_Build 1 -# PROP Exclude_From_Build 1 - -!ENDIF - -# End Source File -# Begin Source File - -SOURCE=.\pcre.h -# End Source File -# End Target -# End Project