irix-657m-src/eoe/cmd/man/manwhat.c

/**************************************************************************
 *                                                                        *
 *               Copyright (C) 1989, Silicon Graphics, Inc.               *
 *                                                                        *
 *  These coded instructions, statements, and computer programs  contain  *
 *  unpublished  proprietary  information of Silicon Graphics, Inc., and  *
 *  are protected by Federal copyright law.  They  may  not be disclosed  *
 *  to  third  parties  or copied or duplicated in any form, in whole or  *
 *  in part, without the prior written consent of Silicon Graphics, Inc.  *
 *                                                                        *
 **************************************************************************/

/* build the man whatis database the same way that man -W does, but
 * *much* faster (about 1/4 - 1/6 the time, because it only runs
 * children on unformatted or compressed (rather than packed) man
 * pages.  Heavily hacked from man.c, 3/95.  Dave Olson
*/

#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <dirent.h>
#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <ctype.h>
#include <errno.h>
#include <malloc.h>
#include <libgen.h>
#include <locale.h>
#include <setjmp.h>
#include "path.h"

/*
//  Routines
*/
extern int	main(int argc, char * argv[]);
static int	parsePaths(char * pathstr, char * patharray[]);
static void	lookdirs(char * manpaths[]);
static int	findman(char * rootpath);
static void	launch(char * name);
static int filetype(char * file);
static int unpack(char *src, int srccnt, char *dest, int destcnt);
extern int ungzip(char *src, int srccnt, char *dest, int destcnt);
extern int uncompress(char *src, int srccnt, char *dest, int destcnt);
static void getcatNAME(char *buf, int len);
static void gethtmlNAME(char *buf, int len);

#define	DEFAULTMANPATHS "/usr/share/catman:/usr/share/man:/usr/catman:/usr/man"


#define DEFAULTNROFFCMD "nroff"

#define	FBSZ		(4096)

int fsize;	/* current size of filebuf */
unsigned fmem;	/* current direct io mem requirement; can be
	different for different filesystems */
int pagesize;	/* system page size; that's our mininum alignment */
char *filebuf; /* have to allocate dynamically because xfs block
		sizes can be large */

static char *tbuf;
static int fbsz;
static struct stat statbuf;

/* variables associated with getcatNAME */
static char *usrc, *usend, *udest, *udend;
static jmp_buf packjmp;
static int inleft;

/* variables associated with unpack */
#define US	037
#define RS	036
static char	*uninp;
/* the dictionary */
static long	origsize;
static short	maxlev;
static short	intnodes[25];
static char	*tree[25];
static char	characters[256];
static char	*eof;
static int	Tree[1024];
static void getdict(void);
static unsigned char ugetch(void);
static unsigned short getword(void);
static void expand(void);
static void decode(void);


static char * macropackage = "/usr/lib/tmac/tmac.an";
static char * filtercmd;
static char * nroffcmd;
static char * manfmtcmd;
static char * awfcmd = "awf -man";

#define	GUESS	0
#define	CAT	1
#define	MAN	2

#define	BUFSIZE 4096
#define MANDIRCNT 200	/* 1/2 this is max dirs in manpath */

#define	USAGE "Usage:  manwhatis [-W] [-M manpath]\n"

#define	PACKMAGIC	017436
#define	COMPRESSMAGIC	0x1f9d
#define	GZIPMAGIC	0x1f8b

#define NOTREGULARFILE  0
#define	PACKFILE	1
#define	COMPRESSFILE	2
#define	NROFFFILE	3
#define	CATFILE		4
#define	GZIPFILE	5
#define	GZIPHTML	6

#define GZIPFILEOFFSET	10

extern int
main(int argc, char * argv[])
{
    char *		manpaths[MANDIRCNT];
    char *	envmp;
    int 	o;
    extern char *	optarg;

    (void)setlocale(LC_ALL, "");

    /* Set up defaults search paths */
    (void)parsePaths(DEFAULTMANPATHS, manpaths);

    if ((envmp = getenv("MANPATH")) != NULL) {
	    if (parsePaths(envmp, manpaths) == 0)
		    (void)parsePaths(DEFAULTMANPATHS, manpaths);
    }

    nroffcmd = getenv("NROFF");
    if (nroffcmd == 0) nroffcmd = DEFAULTNROFFCMD;

	/* if set, this is the *full* command we us to format unformatted
	 * man pages.  Otherwise we have to add tbl, eqn, and a way to
	 * specify arguments to nroff, etc.  For exampe, this allows
	 * us to use things Henry Spencer's "awf" simply.
	*/
    manfmtcmd = getenv("MANFMTCMD");

	/* only used for compressed man pages, and unformatted man pages */
	    filtercmd = "/usr/lib/getcatNAME";

	while ((o = getopt(argc, argv, "M:W")) != -1) {
		switch (o) {
		case 'W':	/* accept and ignore the man -W flag */
			break;
		case 'M':	/* man path */
			if (parsePaths(optarg, manpaths) == 0)
			    (void)parsePaths(DEFAULTMANPATHS, manpaths);
			break;
		case '?':
			(void) fprintf(stderr, USAGE);
			exit(2);
		}
	}

	/* align on a page boundary, for the last tweak of i/o
	 * performance, but allocate only 4K regardless of page size
	 * since we should never need to read more than that to get
	 * the useful info.  O_DIRECT requirements may need more than
	 * the pagesize, because xfs blocksizes can be large, but we
	 * will always operate on no more than FBSZ in any case.
	*/
	pagesize = getpagesize();
	tbuf = (char *)memalign(pagesize, FBSZ);
	if(!tbuf) {
		perror("manwhat: Can't allocate memory for buffers");
		exit(1);
	}

	lookdirs(manpaths);

	fflush(stdout);

	return(0);	/* exit */
}

static void
lookdirs(char * manpaths[])
{
    struct dirent *	direntptr;
    DIR *	dirptr;
	char **mp;
	char dname[PATH_MAX];

    for (mp=manpaths; *mp; mp++) {
		if ((dirptr = opendir(*mp)) == NULL)
			continue;
		while ((direntptr = readdir(dirptr)) != NULL) {
			char *nm = direntptr->d_name;
			switch(*nm) {
			case 'u':
			case 'a':
			case 'p':
			case 'g':
				if(strcmp(nm+1, "_man"))
					continue;
				break;
			case 'l':
				if(strcmp(nm+1, "ocal"))
					continue;
				break;
			case 'm':
				/* manX, but not manXy* */
				if(strncmp(nm+1, "an", 2) || nm[4])
					continue;
				break;
			case 'c':
				/* catX, but not catXy* */
				if(strncmp(nm+1, "at", 2) || nm[4])
					continue;
				break;
			default:
				continue;
			}
			sprintf(dname,"%s/%s", *mp, nm);
			(void)findman(dname);
		}
		closedir(dirptr);
	}
}

static int
parsePaths(char * pathstr, char * patharray[])
{
    int	count;
    char *	ps;
    int  		addpath;
    char *		langname = setlocale(LC_MESSAGES, NULL);

    if ((langname == NULL) || !strcmp(langname, "C"))
        addpath = 0;
    else
        addpath = 1;

    count = 0;
    ps = pathstr;
    while(count < MANDIRCNT/2 && (patharray[count] = strtok(ps, ":"))) {
        if (addpath) {
            patharray[count + 1] = patharray[count];
            patharray[count] = malloc(strlen(patharray[count + 1]) +
			       strlen(langname) + 2);
            sprintf(patharray[count], "%s/%s", patharray[count + 1], langname);
	    count++;
        }
	count++;
	if(count >= MANDIRCNT/2)
		fprintf(stderr, "only %d dirs handled in MANPATH, rest ignored\n",
			MANDIRCNT/2);
	ps = NULL;
    }
    return(count);
}

static int
findman(char * rootpath)
{
    DIR *	dirptr;
    struct dirent *	direntptr;
	char name[PATH_MAX+NAME_MAX+2], *namep;

	if ((dirptr = opendir(rootpath)) == NULL)
		return (-1);

	strcpy(name, rootpath);
	namep = name + strlen(name);
	*namep++ = '/';

	while ((direntptr = readdir(dirptr)) != NULL) {
		char *rcscheck;

		strcpy(namep, direntptr->d_name);
		if(!stat(name, &statbuf)) {
			if(S_ISREG(statbuf.st_mode)) {
				/* Ignore RCS files (...,v) */
				rcscheck = strrchr(namep, ',');
				if(rcscheck && strcmp(rcscheck, ",v"))
					rcscheck = NULL;
				if(*namep != '.' && rcscheck == NULL)
					launch(name);
			}
			else if(S_ISDIR(statbuf.st_mode) &&
				*namep != '.' && strcmp(namep, "RCS")) {
				/* Avoid RCS directories to speed searches against man page
				   source trees.  Also skip all . dirs, not just . and .. */
				(void)findman(name);
			}
		}
	}
    closedir(dirptr);
	return 0;
}


/* fork and exec to see if cmd exists in search path; designed to
 * work only with cmds that work as filters and work with no args
*/
static int
forkandexeclp(char *cmd)
{
    int pid, wpid, i;
    wait_t status;
    char *args[11];

    if((pid=fork()) == -1)
	return 0;
    if(pid) {
	while((wpid = waitpid(pid, &status.w_status, 0)) != pid && wpid != -1)
		;
	if(wpid != pid)
	    return 0;	/* something's weird, act old way */
	if(status.w_termsig)
	     return 0;	/* strange; possibly resources; act old way */
	if((status.w_retcode & 0xff) == 0xff)
	     return -1;	/* it isn't there */
	/* any non-ff code take as success */
	return 1;
    }
    /* close fd 0; all of the cmds we are interested in work as
     * filters, reading from stdin, and work with no args;
     * close 1 also, in case, like *eqn output is generated immediately;
     * close 2 in case it generates usage messages.
     * it would be nice to use popen to handle strings with args,
     * but it is too hard to tell if exec fails; can't just use
     * first part of cmd, becauase cmds like psroff will happily
     * generate empty print jobs...; if more than 10 args, assume
     * they know what they are doing in specifying their old
     * filter and bomb out also */
    args[0] = strtok(cmd, " ");
    for(i=1; i<11 && (args[i]=strtok(NULL, " ")); i++)
	;
    if(i == 11)
	exit(0);

    close(0);
    close(1);
    close(2);
    execvp(args[0], args);
    exit(0xff);
	/*NOTREACHED*/
}


/* check to see if the formatting commands exist; also check for
 * tbl, and the appropriate *eqn command.  Too many customers just
 * won't read the man page...  I'm tired of seeing and answering this
 * question, as are most of support, I suspect.
 * return 1 on success, -1 on failure; 0 if fork fails, so we try
 * again on later man pages, if any; this essentially means that
 * the older style message about formatters "not found" message will
 * be printed.  Better than bogusly not trying and printing incorrect
 * no DWB message.  Normally called only once per roff cmd (i.e.,
 * once for nroff per invocation of the man cmd
*/
static
chkexist(char *roffcmd)
{
    int ret;
    if((ret = forkandexeclp(roffcmd)) <= 0)
	return ret;
    return 1;
}


static void
launch(char * name)
{
    unsigned	ft;
    char cmd[ARG_MAX];
    int skipit = 0, usize;

    if ((ft = filetype(name)) == -1) return;

    switch (ft) {
    case CATFILE:
		getcatNAME(filebuf, fbsz);
		return;
    case PACKFILE:
		usize = unpack(filebuf, fbsz, tbuf, FBSZ);
		getcatNAME(tbuf, usize);
		return;
    case COMPRESSFILE:
		usize = uncompress(filebuf, fbsz, tbuf, FBSZ);
		if (usize > 0) {
			getcatNAME(tbuf, usize);
			return;
		} else {
			/* unknown compress method, corrupt file, etc. */
			(void) sprintf(cmd, "zcat %s ", name);
			strcat(cmd, "| ");
			strcat(cmd, filtercmd);
		}
		break;
    case GZIPFILE:
    case GZIPHTML:
		usize = ungzip(filebuf, fbsz, tbuf, FBSZ);
		if (usize > 0) {
		if (ft == GZIPHTML)
			gethtmlNAME(tbuf, usize);
		else
			getcatNAME(tbuf, usize);
			return;
		} else {
			/* unknown gzip method, corrupt file, etc. */
			(void) sprintf(cmd, "gzip -dc %s ", name);
			strcat(cmd, "| ");
			strcat(cmd, filtercmd);
		}
		break;
    case NROFFFILE:
	{
	    static chknfmtexist;
awfit:
		if(manfmtcmd) {
			(void)sprintf(cmd, "%s %s", manfmtcmd, name);
			strcat(cmd, "| ");
			strcat(cmd, filtercmd);
		}
		else {
			 if(!chknfmtexist) {
				if((chknfmtexist = chkexist(nroffcmd)) == -1) {
					/* if no nroff, look for awf, and if it's there,
					 * use it for this and the other unformatted man pages.
					 * awf is Henry Spencer's awk based formatter */
					if((chknfmtexist = forkandexeclp(awfcmd)) != -1) {
						fprintf(stderr,"Standard text formatter package not found,"
						 " using the \"awf\" formatter\n");
						manfmtcmd = awfcmd;
						goto awfit;
					}
				}
			}
			if(chknfmtexist == -1)
				skipit = 1;
			(void)sprintf(cmd, "tbl -TX %s | neqn /usr/pub/eqnchar - | %s -i %s ",
				name, nroffcmd, macropackage);
			strcat(cmd, "| ");
			strcat(cmd, filtercmd);
		}
	}
	break;
    case NOTREGULARFILE:
    default:
	fprintf(stderr, "File %s is not a printable man page.\n", name);
	return;	 /* No reason to launch if we can't handle it */
    }

    if (ft == NROFFFILE) {
	static nodwb = 0;
	if(skipit) {
	    if(!nodwb) {
		nodwb = 1;
		fprintf(stderr,
		  "The text formatters necessary to format this man page\n"
		  "do not appear to be installed or in your PATH.  You must\n"
		  "install the \"Documenter's Workbench\" (dwb) product, or the\n"
		  "equivalent in order to display unformatted man pages\n");
	    }
	    fprintf(stderr, "Skipping %s.\n", name);
	    return;
	}
    }
    fflush(stdout);	/* flush our buffer before running pipeline */
    (void) system(cmd);
}


static int
filetype(char * file)
{
    int	ifd;
    unsigned int ft;
    unsigned int i;
    struct dioattr attr;

    /* use O_DIRECT, since we won't ever look at the data more than
     * once, and it saves the data copy by the kernel; if we get EINVAL,
     * try again without O_DIRECT, to handle nfs mounted man pages, before
     * complaining. */
    ifd = open(file, O_RDONLY|O_DIRECT);
    if (ifd == -1) {
	if (errno == EINVAL)
	    ifd = open(file, O_RDONLY);
	if (ifd == -1) {
	    perror(file);
	    return(-1);
	}
	if(!fsize) { /* first manpage, and direct i/o not supported on
	    * this man page (probably over nfs) */
	    fsize = FBSZ;
	    if(!(filebuf = memalign(pagesize, fsize))) {
		perror("manwhat: Can't allocate memory for file buffer");
		exit(1);
	    }
	}
    }
    else if(fcntl(ifd, F_DIOINFO, &attr) == 0) {
	/* for most systems, we'll do this only once... */
	if(attr.d_miniosz > fsize || attr.d_mem > fmem) {
	    if(filebuf)
		free(filebuf);
	    fsize = attr.d_miniosz > FBSZ ? attr.d_miniosz : FBSZ;
	    /* page align, at least */
	    fmem = pagesize > attr.d_mem ? pagesize : attr.d_mem;
	    if(!(filebuf = memalign(fmem, fsize))) {
		perror("manwhat: Can't allocate memory for file buffer");
		exit(1);
	    }
	}
    }

    /* we stat'ed file in findman() just before calling launch(),
     * which immediately calls filetype; saves a stat followed by
     * fstat */
    if (statbuf.st_mode & S_IFREG) {
	ft = NOTREGULARFILE;

	fbsz = read(ifd, filebuf, fsize);
	if (fbsz > 3) {
	    short magic = *(short *)filebuf;
	    if(fbsz > FBSZ)
		fbsz = FBSZ; /* never more than first FBSZ bytes */
	    if (magic == PACKMAGIC) {
		ft = PACKFILE;
	    } else
	    if (magic == COMPRESSMAGIC) {
		ft = COMPRESSFILE;
	    } else
	    if (magic == GZIPMAGIC) {
		if(strstr(filebuf+GZIPFILEOFFSET,".html") != NULL)
		    ft = GZIPHTML;
		else
		    ft = GZIPFILE;
	    } else {
		/*
		** NROFF filetyping heuristic -- look for a line beginning
		** with what may be an NROFF command (regexp "^[.'][A-Za-z'\\]")
		*/
		for (i=0; i<fbsz; i++) {
		    if ((i == 0 || filebuf[i-1] == '\n')
			&& ((filebuf[i] == '.') || (filebuf[i] == '\''))
			&& (isalpha(filebuf[i+1]) ||
			    (filebuf[i+1] == '\\') ||
			    (filebuf[i+1] == '\'')
			)
		    ) {
			ft = NROFFFILE;
			break;
		    }
		}
	    }
	    if (ft == NOTREGULARFILE) {
		/*
		** If we haven't validated any other file type, try validating
		** an ASCII formatted CATFILE by making sure all of the
		** characters are printable.  This will reduce the chance that
		** things like executables get printed.
		*/
		int printable = 1;
		for (i=0; i<fbsz && printable; i++) {
		    printable = isprint(filebuf[i])
				|| isspace(filebuf[i])
				|| filebuf[i] == '\b';
		}
		if (printable)
		    ft = CATFILE;
	    }
	}
    }
    else
	ft = NOTREGULARFILE;
    (void) close(ifd);
    return(ft);
}

#define	BACKSPACE	0x08
#define TAB	'\t'
#define SPACE	' '
#define NEWLINE	'\n'

#define FMTWIDTH 23

static
match(register char* str1, register char* str2)
{
    for (; *str1 == *str2 && *str1 != '\0'; str1++, str2++) ;
    return (*str1 == '\0');
}

/* this is essentially the guts of the getcatNAME program, but
 * avoids recompiling regexp's, and already has the data in a buffer
 * (and of course, avoids the fork and exec!)
*/
static void
getcatNAME(char *buf, int len)
{
    char in[4096];
    char sectionNumberStr[256];
    static char *sectionNumberPattern = 0;
    char *strp, *inp, *endp;
    char last;
    unsigned int spaces;
    unsigned int havesection = 0;
    unsigned int gotname = 0;
    unsigned int pastfirstlineofname = 0;
    unsigned int printedsec = 0;

    sectionNumberStr[0] = '\0';

	if(!sectionNumberPattern)
		/* section number is '(' [0-9l][0-9a-zA-Z+]* ')' */
		sectionNumberPattern = regcmp("(\\([0-9lD][+0-9a-zA-Z]*\\))$0", (char*)0);

	endp = &buf[len-1];

	while (buf < endp) {
		strp = buf;
		while (buf < endp && *buf != '\n')
			buf++;
		*buf = '\0';
		buf++;

		/*
		**  Read input stream.  Change tabs, multiple spaces, and newlines
		**  to single character spaces.  Backspaces act as real backspaces
		**  in the "in" buffer.
		*/
		for (inp=in, last=NEWLINE, spaces=0; *strp; strp++) {
			if (*strp == TAB) *strp = SPACE;
			if ((*strp == SPACE) && (last == NEWLINE)) {
			if (pastfirstlineofname) {
				*strp = SPACE;
			} else {
				continue;
			}
			}
			if (*strp == SPACE) spaces++; else spaces = 0;
			if (spaces > 1) continue;

			if (*strp == BACKSPACE && inp != in) {
			inp--;
			continue;
			}

			*inp++ = *strp;
			last = *strp;
		}
		*inp = '\0';


		/*
		**  Parse section number from header.  Looking for:
		**
		** 	'(' [0-9l][0-9a-zA-Z+]* ')'
		**
		**  This pattern handles diverse section numbers such as:
		**
		**	(l)
		**	(3X11)
		**	(1EXP)
		**	(3C++)
		**
		**  If have already gotten the NAME section, don't mistakenly
		**  extract bogus section number information.
		**
		**  Some manual page headers are completely busted and look like:
		**
		**	foo(Silicon Graphics foo(3X)
		**
		**  or
		**	foo(3X(tentative)(3X)
		**
		**  Since there is little chance we can get all of these fixed, and
		**  since this is a common formatting error, we'll for section numbers
		**  which have a more specific format than just '(' .* ')'.
		*/
		if (!gotname && !havesection) {
			(void) regex(sectionNumberPattern, in, sectionNumberStr);
			if (sectionNumberStr[0] != '\0') {
			havesection = 1;	/* section number is valid */
			continue;		/* done parsing, get next line */
			}
			/* Valid section not gotten so try matching other patterns */
		}


		/*
		**  Begin parsing NAME section of manual page.
		**  Clearcase manual pages use SUBCOMMAND NAME instead.
		*/
		if (match("NAME", in) || match("SUBCOMMAND NAME", in)) {
			gotname = 1;
			continue;	/* done parsing, get next line */
		}


		/*
		**  Terminate parsing of NAME section.
		**
		**  The name section may be multiple lines of text. We stop parsing
		**  NAME section text when we hit a blank line or if we run into any
		**  commonly known section names which include:
		**
		**  	SYNTAX, SYNOPSIS, DESCRIPTION, C SYNOPSIS, FORTRAN SYNOPSIS,
		**	C SPECIFICATION, FORTRAN SPECIFICATION
		*/
		if (in[0] == '\0') {
			if (pastfirstlineofname) {
			break;
			} else {
			continue;  /* name section is multiple lines, get next line */
			}
		}
		if (match("SYNTAX", in)) break;
		if (match("SYNOPSIS", in)) break;
		if (match("DESCRIPTION", in)) break;
		strp = strchr(in, ' ');
		if (strp != NULL && match("SYNOPSIS", strp+1)) break;
		if (strp != NULL && match("SPECIFICATION", strp+1)) break;


		/*
		**  Output the NAME section of the manual in whatis format.
		*/
		if (gotname) {
			unsigned int i;
			unsigned int fmtcount = 0;

			for (i = 0; in[i] != '\0'; i++) {
			if (in[i] == ' ' && in[i+1] == '-' &&
				(in[i+2] == ' ' || in[i+2] == '\0')) {
				putc(' ', stdout);
				fputs(sectionNumberStr, stdout);
				printedsec = 1;
				if (!pastfirstlineofname) {
				fmtcount += 1 + strlen(sectionNumberStr);
				for (; fmtcount < FMTWIDTH; fmtcount++) {
					putc(' ', stdout);
				}
				}
				break;
			}

			putc(in[i], stdout);
			fmtcount++;
			}
			fputs(&(in[i]), stdout);
			pastfirstlineofname = 1;
		}
	}

    if (!printedsec) {
	putc(' ', stdout);
	fputs(sectionNumberStr, stdout);
    }
    putc('\n', stdout);
}


/* Output a HTML TITLE string in apropos format */
void outputAproposFmt(char* str)
{
    int i=0;
    int k=0;

 /*
 *  -Should enforce a space before the (1M)?
 *  -HTML entity translation
 */

    while(str[i] != NULL){

	if((str[i] == '-') && (i <= FMTWIDTH)){
	    k = FMTWIDTH-i+1;
	    while(k > 0){
		putchar(SPACE);
		k--;
	    }
	}
	putchar(str[i]);
	i++;
    }
    putchar(NEWLINE);

}

/*
 * This is a equivelent to getcatNAME,  but is used specifically with
 * HTML man pages.  All the information we need is in the TITLE tag.
 */
static void
gethtmlNAME(char *buf, int len)
{
    char in[4096];
    char matchTag[32];
    char matchStr[4096];
    char titleStr[4096];
    char titleLine[4096];
    static char *bTitlePattern = 0;
    static char *eTitlePattern = 0;
    char *strp, *inp, *endp;
    char last;
    unsigned int inhtmltitle = 0;
    unsigned int havehtmltitle = 0;
    unsigned int spaces;
    unsigned int pastfirstlineofname = 0;

    if(!bTitlePattern)
	bTitlePattern = regcmp("(<[Tt][Ii][Tt][Ll][Ee]>)$0([^\n]*)$1", (char*)0);

    if(!eTitlePattern)
	eTitlePattern = regcmp("([^<]*)$0(</[Tt][Ii][Tt][Ll][Ee]>)$1", (char*)0);

    matchStr[0]	    = NULL;
    titleStr[0]	    = NULL;
    titleLine[0]    = NULL;

    endp = &buf[len-1];

    while (buf < endp) {
	strp = buf;
	while (buf < endp && *buf != '\n')
	    buf++;
	*buf = '\0';
	buf++;

	/*
	**  Read input stream.  Change tabs, multiple spaces, and newlines
	**  to single character spaces.  Backspaces act as real backspaces
	**  in the "in" buffer.
	*/

	for (inp=in, last=NEWLINE, spaces=0; *strp; strp++) {
	    if (*strp == TAB) *strp = SPACE;
	    if ((*strp == SPACE) && (last == NEWLINE)) {
		if (pastfirstlineofname) {
		    *strp = SPACE;
		} else {
		    continue;
		}
	    }
	    if (*strp == SPACE) spaces++;
	    else spaces = 0;
	    if (spaces > 1) continue;

	    if (*strp == BACKSPACE && inp != in) {
		inp--;
		continue;
	    }

	    *inp++ = *strp;
	    last = *strp;
	}
	*inp = '\0';


	/* Looking for all the possible cases:
	 *
	 *	<TITLE>A one line Title</TITLE> (One line)
	 *
	 *	<Title>
	 *	A one line Title
	 *	</Title>
	 *
	 *	<Title>This is a
	 *	two line title</Title>
	 *
	 */

	if(!havehtmltitle){
	    if(!inhtmltitle){
		matchTag[0] = NULL;
		(void) regex(bTitlePattern, in, matchTag, matchStr );
		if (matchTag[0] != NULL){
		    matchTag[0] = NULL;
		    (void) regex(eTitlePattern, matchStr, titleLine, matchTag); /* memory? */
		    if(matchTag[0] != NULL){
			havehtmltitle = 1;
			outputAproposFmt(titleLine);
			return;
		    } else{
			inhtmltitle = 1;
			strcat(titleStr, matchStr);
		    }
		}
	    } else{
		matchTag[0] = NULL;
		(void) regex(eTitlePattern, in, titleLine, matchTag);
		if(matchTag[0] != NULL){
		    if(titleLine[0] != NULL){
			strncat(titleStr, " ", 1);
			strcat(titleStr, titleLine);
		    }
		    havehtmltitle = 1;
		    outputAproposFmt(titleStr);
		    return;
		} else{ /* Take the whole line */
		    strncat(titleStr, " ", 1);
		    strcat(titleStr, in);
		}
	    }
	}

    }
}


static unsigned char
ugetch(void)
{
	if(usrc < usend)
		return *usrc++;
	longjmp(packjmp, 1);
	/*NOTREACHED*/
}

/* these hacks and globals are to minimize the effort of porting
 * unpack into inline code. */
static unsigned short
getword(void)
{
	unsigned char c;
	unsigned short d;
	c = ugetch ();
	d = ugetch ();
	d <<= 8;
	d |= c & 0377;
	return (d);
}

static void
putch(char c)
{
	if(udest < udend)
		*udest++ = c;
	else
		longjmp(packjmp, 1);
}

/* this is the guts of the unpack command, but it already
 * has the data in the buffer, and of course, it avoids the
 * overhead of fork and exec
*/
static int
unpack(char *src, int srccnt, char *dest, int destcnt)
{
	usrc = src;
	usend = src + srccnt -1;
	udest = dest;
	udend = dest + destcnt -1;
	if(setjmp(packjmp)) {
		*udest = '\0';
		return udest - dest;
	}
	getdict();
	*udest = '\0';
	return udest - dest;
}


/* read in the dictionary portion and build decoding structures */
/* return 1 if successful, 0 otherwise */
static void
getdict(void)
{
	register int c, i, nchildren;

	/*
	 * check two-byte header
	 * get size of original file,
	 * get number of levels in maxlev,
	 * get number of leaves on level i in intnodes[i],
	 * set tree[i] to point to leaves for level i
	 */
	eof = &characters[0];

	inleft = usend - usrc;
	if (usrc[0] != US)
		goto goof;

	if (usrc[1] == US) {		/* oldstyle packing */
		expand ();
		return;
	}
	if (usrc[1] != RS)
		goto goof;

	uninp = &usrc[2];
	origsize = 0;
	for (i=0; i<4; i++)
		origsize = origsize*256 + ((*uninp++) & 0377);
	maxlev = *uninp++ & 0377;
	if (maxlev > 24) {
goof:
		return;
	}
	for (i=1; i<=maxlev; i++)
		intnodes[i] = *uninp++ & 0377;
	for (i=1; i<=maxlev; i++) {
		tree[i] = eof;
		for (c=intnodes[i]; c>0; c--) {
			if (eof >= &characters[255])
				goto goof;
			*eof++ = *uninp++;
		}
	}
	*eof++ = *uninp++;
	intnodes[maxlev] += 2;
	inleft -= uninp - &usrc[0];
	if (inleft < 0)
		goto goof;

	/*
	 * convert intnodes[i] to be number of
	 * internal nodes possessed by level i
	 */

	nchildren = 0;
	for (i=maxlev; i>=1; i--) {
		c = intnodes[i];
		intnodes[i] = nchildren /= 2;
		nchildren += c;
	}
	decode();
}


/* unpack the file */
static void
decode(void)
{
	register int bitsleft, c, i;
	int j, lev;
	char *p;
	char *orig = udest;

	lev = 1;
	i = 0;
	for(;;) {
		if(--inleft < 0) {
uggh:
			return;
		}
		c = *uninp++;
		bitsleft = 8;
		while (--bitsleft >= 0) {
			i *= 2;
			if (c & 0200)
				i++;
			c <<= 1;
			if ((j = i - intnodes[lev]) >= 0) {
				p = &tree[lev][j];
				if (p == eof) {
					c = udest - orig;
					origsize -= c;
					if (origsize != 0)
						goto uggh;
					return;
				}
				*udest++ = *p;
				if (udest == udend)
					longjmp(packjmp, 1);
				lev = 1;
				i = 0;
			} else
				lev++;
		}
	}
}


/*
 * This code is for unpacking files that
 * were packed using the old-style algorithm.
 */

static void
expand(void)
{
	register tp, bit;
	short word;
	int keysize, i, *t;

	uninp = &usrc[2];
	inleft -= 2;
	origsize = ((long) (unsigned) getword ())*256*256;
	origsize += (unsigned) getword ();
	t = Tree;
	for (keysize = getword (); keysize--; ) {
		if ((i = ugetch ()) == 0377)
			*t++ = getword ();
		else
			*t++ = i & 0377;
	}

	bit = tp = 0;
	for (;;) {
		if (bit <= 0) {
			word = getword ();
			bit = 16;
		}
		tp += Tree[tp + (word<0)];
		word <<= 1;
		bit--;
		if (Tree[tp] == 0) {
			putch (Tree[tp+1]);
			tp = 0;
			if ((origsize -= 1) == 0) {
				return;
			}
		}
	}
}