/************************************************************************** * * * Copyright (C) 1991, Silicon Graphics, Inc. * * All Rights Reserved. * * * * This is UNPUBLISHED PROPRIETARY SOURCE CODE of Silicon Graphics, Inc.; * * the contents of this file may not be disclosed to third parties, * * copied or duplicated in any form, in whole or in part, without the * * prior written permission of Silicon Graphics, Inc. * * * * RESTRICTED RIGHTS LEGEND: * * Use, duplication or disclosure by the Government is subject to * * restrictions as set forth in subdivision (c)(1)(ii) of the Rights * * in Technical Data and Computer Software clause at DFARS * * 252.227-7013, and/or in similar or successor clauses in the FAR, * * DOD or NASA FAR Supplement. Unpublished - rights reserved under * * the Copyright Laws of the United States. * ************************************************************************** * * File: Formatter.c * * Description: Routines for formatting and reading ASCII and nroff * text into a program buffer. * **************************************************************************/ #ident "$Revision: 1.3 $" #include #include #include #include #include #include #include #include #include "Formatter.h" #define MAXLINESIZE BUFSIZ /* BUFSIZ defined in stdio.h */ #define TABSIZE 8 /* Number of spaces in a tab */ /* Line buffer allocation constants */ #define CHARS_PER_LINE 40 /* Guess as to # chars per line */ #define ADD_MORE_LINES 100 /* Add in 100 line chunks */ #define ADD_MORE_CHARS 1024 /* Amount to grow the text buffer */ /* Characters */ #define CAR_RET '\r' #define TAB '\t' #define ESCAPE '\033' #define BACKSPACE '\010' /* Formatting commands */ #define NEQN_CMD "/usr/bin/neqn /usr/pub/eqnchar" #define TBL_CMD "/usr/bin/tbl" #define NROFF_CMD "/usr/bin/nroff -i" #define COL_CMD "/usr/bin/col -f" #define ZCAT_CMD "/usr/bsd/zcat" #define PCAT_CMD "/usr/bin/pcat" #define CAT_CMD "/bin/cat" #define GZCAT_CMD "/usr/sbin/gzcat -c" #define HTML_CMD "/usr/sbin/html2term" /* Font change test macro */ /* Tests whether the current font is the same as the specified font. If the fonts are different, a new segment is added to the current line in the current font and the buffer is reset. */ #define FONT_CHANGE_TEST(font) if (font_flag != UNKNOWN_STYLE && \ font_flag != (font)) { \ *tbptr = '\0'; \ skip_under = 0; \ add_text(line, line_chars, temp_buf, \ font_flag); \ *temp_buf = '\0'; \ tbptr = temp_buf; \ } static char *process_line(vwr_text_line_t*, char*, char**); static char *classify(char*, char*, int*); static void add_text(vwr_text_line_t*, char*, char*, int); static vwr_text_style_t *text_styles; /* Formatting commands */ static char *format_cmds[] = { {""}, /* FMT_NULL */ {CAT_CMD" %s > %s"}, /* FMT_GENERIC */ {NEQN_CMD" %s|"TBL_CMD"|"NROFF_CMD" -man|"COL_CMD">%s"}, /* FMT_NROFF_MAN */ {NEQN_CMD" %s|"TBL_CMD"|"NROFF_CMD" -mm|"COL_CMD">%s"}, /* FMT_NROFF_MM */ {ZCAT_CMD" %s|"COL_CMD">%s"}, /* FMT_ZCAT */ {PCAT_CMD" %s|"COL_CMD">%s"}, /* FMT_PCAT */ {GZCAT_CMD" %s|"COL_CMD">%s"}, /* FMT_GZCAT */ {GZCAT_CMD" %s|"HTML_CMD"|"COL_CMD">%s"}, /* FMT_GZCAT_HTML*/ }; /************************************************************************** * * Function: VwrFormatText * * Description: Takes a file in a number of formats and converts it to * nroff output format. The resulting file can then be read by * VwrReadText into a program buffer. * * Parameters: * source_name (I) - source text file pathname * dest_name (I) - formatted output file pathname * source_format (I) - format of source file. One of: * * FMT_NROFF_MAN - Unformatted nroff man page * FMT_NROFF_MM - Unformatted nroff mm doc * FMT_ZCAT - Compressed formatted nroff * FMT_PCAT - Packed formatted nroff * FMT_GENERIC - A copy operation * FMT_UNKNOWN - Unknown, take best guess * * Return: 0 if successfull at formatting. If an error has occurred -1 * will be returned and errno will be set. * **************************************************************************/ int VwrFormatText(char *source_name, char *dest_name, int source_format) { char cmd[BUFSIZ], buffer[BUFSIZ], new_name[BUFSIZ]; char *suffix; int exit_status; /* * Determine the file suffix */ suffix = strrchr(source_name, '.'); /* * If format unknown try to determine it by * looking at the file suffix */ if (source_format == FMT_UNKNOWN) { if (!suffix) source_format = FMT_NROFF_MM; else if (strcmp(suffix, ".z") == 0) source_format = FMT_PCAT; else if (strcmp(suffix, ".Z") == 0) source_format = FMT_ZCAT; else source_format = FMT_NROFF_MAN; } if (strcmp(suffix, ".gz") == 0) { if (strstr(source_name, "/ch")) source_format = FMT_GZCAT; else source_format = FMT_GZCAT_HTML; } /* * pcat and zcat are very finnicky. They insist on the files they work * with ending in .z and .Z repsectively. We now ensure that this is the * case for those formatting situations. * * We finally issue the formatting command. */ if (source_format == FMT_PCAT) { if (strcmp(source_name, ".z") != 0) { sprintf(new_name, "%s.z", dest_name); sprintf(buffer, "ln -s %s %s; %s; rm -f %s", source_name, new_name, format_cmds[source_format], new_name); sprintf(cmd, buffer, new_name, dest_name); } else sprintf(cmd, format_cmds[source_format], source_name, dest_name); } else if (source_format == FMT_ZCAT) { if (strcmp(source_name, ".Z") != 0) { sprintf(new_name, "%s.Z", dest_name); sprintf(buffer, "ln -s %s %s; %s; rm -f %s", source_name, new_name, format_cmds[source_format], new_name); sprintf(cmd, buffer, new_name, dest_name); } else sprintf(cmd, format_cmds[source_format], source_name, dest_name); } else { sprintf(cmd, format_cmds[source_format], source_name, dest_name); } exit_status = system(cmd); return(WEXITSTATUS(exit_status)); } /************************************************************************** * * Function: VwrReadText * * Description: Reads the nroff output format text from the file specified. * Each line of text in the file is processed such that the text and * attributes are separated. The text goes into one large text buffer, * while the attributes for each line are formed into a linked list * of text attribute segment for each line. A text attribute segment * is a continous piece of the text line in the same font. So a line * all in normal font text would consist of one segment whereas a line * of text with the first word bolded and all subsequent text in normal * font would consist of two segments. Each segment contains the number * of characters in the segment and the line structure contains a * pointer to the \n terminated string of characters in the text buffer * that comprises the line. * * The motivation for one large text buffer is to allow regular * expression text searching using the off-the-shelf search routines. * * Parameters: * filename (I) - name of text file to read * text (O) - text structure to fill with newly read text * styles (I) - array of styles for the text. The array of styles must * be organised as follows: * styles[0] - style for normal text * styles[1] - style for underlined text * styles[2] - style for bold text * styles[3] - style for bold underlined text * blank_compress (I) - Blank line compression * * Return: If successful, 0 is returned. If an error occurs, a message * is printed to stderr, errno is set and -1 is returned. * **************************************************************************/ int VwrReadText(char *filename, vwr_text_t *text, vwr_text_style_t *styles, int blank_compress) { FILE *fp; char buf[MAXLINESIZE], *new_buf, *bptr; char *processed_line; vwr_text_line_t *tlptr; int nlines_guess, len; int buf_amount; register int char_buf_off; register int buf_used; register int i, line_num, blank_count, base; struct stat fileinfo; /* * Save the list of styles for future reference */ text_styles = styles; /* * Open the file. */ if ((fp = fopen(filename, "rb")) == NULL) { (void)fprintf(stderr, "Unable to open %s\n", filename); return(-1); } /* * Based on the size of the file and a guess as to the number of * characters per line we will take a guess as to the number of lines * of text in the file. We will allocate the line buffers based on * this guess. If it proves to be insufficient we will grow the * buffer dynamically based on ADD_MORE_LINES */ if (fstat(fileno(fp), &fileinfo)) { (void)fprintf(stderr, "Unable to stat %s\n", filename); return(-1); } VwrFreeText(text); buf_amount = fileinfo.st_size; nlines_guess = buf_amount / CHARS_PER_LINE; text->lines = (vwr_text_line_t*)malloc(nlines_guess * sizeof(vwr_text_line_t)); /* * Based on the size of the unprocessed text file we will allocate * the text buffer. This is ussually sufficient, but in uncommon cases * where the file contains a large number of tab characters we will have * to grow this buffer. */ text->char_buf = (char*)malloc(buf_amount); /* * Read each line of the file into the buffer, process the line * to interpret bolds, underlines and tabs. Some lines * (mainly tbl output) contain multiple lines. These are broken * into separate lines and added to the text structure. */ char_buf_off = 0; /* Start at beginning of char buf */ line_num = blank_count = 0; /* First line, no blanks */ buf_used = 0; /* Haven't used any of the char buf */ while ((bptr = fgets(buf, MAXLINESIZE, fp)) != NULL) { do { /* * Blank compression */ if (blank_compress && (*bptr == '\n')) { if (++blank_count / blank_compress) continue; } else blank_count = 0; /* * Line buffer allocation heuristic */ if ((line_num + 1) > nlines_guess) { nlines_guess += ADD_MORE_LINES; text->lines = (vwr_text_line_t*)realloc((char*)text->lines, nlines_guess * sizeof(vwr_text_line_t)); } /* * Process the line into attributes and plain text */ tlptr = &text->lines[line_num]; processed_line = process_line(tlptr, bptr, &new_buf); /* * Add a \n to the end of the plain text line and * append this line to the character buffer. The buffer * is grown if necessary */ len = tlptr->line_nchars; processed_line[len++] = '\n'; processed_line[len] = '\0'; buf_used += len + 2; if (buf_used >= buf_amount) { buf_amount += ADD_MORE_CHARS; text->char_buf = (char*)realloc(text->char_buf, buf_amount); } (void)memcpy(text->char_buf + char_buf_off, processed_line, len+1); tlptr->line_chars = (char*)char_buf_off; char_buf_off += len; /* * Check max line length stat */ if (tlptr->line_len > text->max_line_len) text->max_line_len = tlptr->line_len; /* * Next line */ line_num++; /* * Multiple lines on a single line processing */ if (new_buf) bptr = new_buf; } while (new_buf); /* Do while multiple lines on single line */ } /* * Close the file. */ (void)fclose(fp); /* * Save the number of lines */ text->nlines = line_num; /* * Convert the text buffer offsets into text buffer addresses. * This is done by adding the text buffer base address to the * offsets for each line */ base = (int)(text->char_buf); for (i = 0, tlptr = text->lines; i < line_num; i++, tlptr++) tlptr->line_chars += base; return(0); } /************************************************************************** * * Function: VwrInitText * * Description: Initializes a text buffer * * Parameters: * text (I) - newly instanciated text buffer * * Return: none * **************************************************************************/ void VwrInitText(vwr_text_t *text) { text->nlines = 0; text->lines = (vwr_text_line_t*)NULL; text->char_buf = NULL; text->max_line_len = 0; } /************************************************************************** * * Function: VwrFreeText * * Description: Frees the text buffer if one has been allocated. * * Parameters: none * * Return: none * **************************************************************************/ void VwrFreeText(vwr_text_t *tbuf) { register int i; register vwr_text_line_t *tptr; register vwr_text_seg_t *tsptr, *temp; /* * Free each string segment, then free each lines segment array * and finally free the array of lines. Make sure that we have a * non-NULL pointer and a non-zero line count. */ if (tbuf->lines && tbuf->nlines) { for (i = 0, tptr = tbuf->lines; i < tbuf->nlines; i++, tptr++) { if (tptr->segs) { tsptr = tptr->segs; while (tsptr) { temp = tsptr->next; free((char*)tsptr); tsptr = temp; } } tptr->line_len = 0; tptr->segs = tptr->last_seg = NULL; } free((char*)tbuf->lines); } if (tbuf->char_buf) free((char*)tbuf->char_buf); VwrInitText(tbuf); } /* ========================================================================== LOCAL FUNCTIONS ========================================================================== */ /************************************************************************** * * Function: process_line * * Description: Takes the specified, null terminated line and constructs * a string segment array based on the text in that string. The * function interprets normal text, boldifying and underlining and * converts these to different fonts. The function also expands * tabs. The function also handles the occurence of multiple lines * contained on a single input line. This occurs with tbl output * since it contains forward half-newlines followed by the \r * character. * * Parameters: * line (O) - Pointer to the text line structure * buf (I) - Null terminated line of text * new_bufp (O) - A new line of text extracted from the original * line and detected by the presence of a ^M. If * the original line does not contain another line, * this variable will be set to NULL. * * Return: Pointer to processed plain text line buffer. * **************************************************************************/ static char *process_line(vwr_text_line_t *line, char *buf, char **new_bufp) { int skip_under = 0; int len, again; int fill, cpos; int font_flag, style; register int i, j; char temp_buf[MAXLINESIZE]; char line_chars[MAXLINESIZE]; char base_char, next_base; register char *tbptr, *bptr; /* * Get length of string and strip trailing \n if any */ len = strlen(buf); if (buf[len-1] == '\n') buf[len-1] = '\0'; /* * Interpret the line * * buf - original string * temp_buf - processed string segment. Added to string segment array * with each font change and at end of line. */ line->segs = NULL; /* Initially empty list */ line->last_seg = NULL; /* Nothing in list */ line->line_len = 0; /* And no line length */ line->line_nchars = 0; /* And no characters */ line->end_select = False; /* End of line not selected */ line->line_chars = NULL; /* Start with empty line */ *line_chars = '\0'; /* Start with empty accum buffer */ cpos = 0; /* Processed line character position */ bptr = buf; /* Line buffer pointer */ tbptr = temp_buf; /* String building buffer ptr */ font_flag = UNKNOWN_STYLE; /* Font is not known at start */ *new_bufp = (char*)NULL; /* Assume no addtional lines */ again = TRUE; /* Continue flag */ for (i = 0; i <= len && again; i++) { switch (*bptr) { /* * A NULL indicates the end of the line. Just tack it * onto the accumulation buffer and signal to stop processing. */ case '\0': *tbptr = *bptr; /* Pass character from buf to temp */ again = FALSE; /* End of the line */ break; /* * Tabs will be expanded with enough ' ' characters to get * to the next tab stop. The number of ' ' characters between * tab stops is specified by TABSIZE. */ case TAB: fill = (cpos / TABSIZE + 1) * TABSIZE - cpos; cpos += fill; for (j = 0; j < fill; j++) *tbptr++ = ' '; bptr++; break; /* * tbl output processed by col -f has forward half linefeeds * and can follow these with a \r character for table lines. * We break this kind of thing into multiple lines and use the * new_buf pointer to indicate the condition. */ case CAR_RET: *bptr = '\0'; *new_bufp = bptr + 1; break; /* * Escape characters will be ignored. We must also ignore * the '9' charcter that follows an escape since an ESC-9 * is the forward half-linefeed indication output by col -f. */ case ESCAPE: bptr++; if (*bptr == '9') bptr++; break; /* * Handle printable characters. This part of the switch * supervises the classification of the unprocessed * character and the addition of text to the output * text buffer. */ default: bptr = classify(bptr, &base_char, &style); /* * For bold, underline and bold underlined text */ if (style != NORMAL_STYLE) { FONT_CHANGE_TEST(style); font_flag = style; *tbptr = base_char; /* * If the current character is caps then decide * if '_' should be skipped for this segment. We * skip '_' if this segment has any words that start * with cap letters. */ if (isupper((int)base_char)) { if (tbptr == temp_buf) { skip_under = 1; } else if (tbptr[-1] == ' ') { skip_under = 1; } } tbptr++; } /* * For normal text */ else { /* * Note that when in underline or bold underline font * words are separated with '_' instead of ' '. This * posess a problem for variables in these fonts that * use underscores as part of the variable. We use a * heuristic to determine whether to print the '_'. * The heuristic is: * * if (current font is underline or bold underline) * If (no upper case chars have started words * in this text segment and we have not * skipped any '_' in this segment) * If (char after '_' is lower case or * is a digit or segment started * with '|' [tbl]) * print '_' * else * print ' ' * skip all rest of '_' in this segment */ if (base_char == '_' && (font_flag == UNDERLINE_STYLE || font_flag == BOLDUNDER_STYLE)) { (void)classify(bptr, &next_base, &style); if (skip_under || (!islower((int)next_base) && !isdigit((int)next_base)) && *temp_buf != '|') { *tbptr++ = ' '; skip_under = 1; } else { *tbptr++ = base_char; } } /* * Purely normal text */ else { FONT_CHANGE_TEST(style); font_flag = style; *tbptr++ = base_char; } } cpos++; break; } } /* * End last part of line, if any */ if (tbptr != temp_buf && font_flag != UNKNOWN_STYLE) add_text(line, line_chars, temp_buf, font_flag); return line_chars; } /************************************************************************** * * Function: classify * * Description: Classifies the font style of the specified unprocessed * character. * * The font styles are identified in the text as follows: * * abcd Normal font * _^Ha_^Hb_^Hc_^Hd Underline font * a^Ha^Hab^Hb^Hb Bold font * _^Ha^Ha^Ha_^Hb^Hb^Hb Bold underline font * * For underline, bold and bold underline fonts the base character * (ie. the character that is actually printed on the screen) is * the character following the first backspace. * * Parameters: * bptr (I) - Pointer to location in unprocessed character buffer * where character starts, * basep (O) - Actual character represented * stylep (O) - Character font style. One of NORMAL_STYLE, BOLD_STYLE, * UNDERLINE_STYLE or BOLDUNDER_STYLE. * * Return: Pointer to start of next unprocessed character * **************************************************************************/ static char *classify(char *bptr, char *basep, int *stylep) { /* * Non-normal font with have a BACKSPACE immediately after * the current character. */ if (bptr[1] == BACKSPACE) { *basep = bptr[2]; /* Bold font */ if (*basep == *bptr) { *stylep = BOLD_STYLE; while (bptr[1] == BACKSPACE) /* Skip repeats */ bptr += 2; bptr++; } /* Underline or bold underline font */ else if (*bptr == '_') { /* Bold underline font */ if (bptr[3] == BACKSPACE) { *stylep = BOLDUNDER_STYLE; while (bptr[1] == BACKSPACE) /* Skip repeats */ bptr += 2; bptr++; } /* Underline font */ else { *stylep = UNDERLINE_STYLE; bptr += 3; } } /* Normal font - bullet character a^Hb */ else { *stylep = NORMAL_STYLE; bptr += 3; } } /* * Normal font. Also need to handle the '_' character * if in underline or bold underline font */ else { *basep = *bptr; *stylep = NORMAL_STYLE; bptr++; } /* * Return pointer to next unprocessed character */ return bptr; } /************************************************************************** * * Function: add_text * * Description: Adds the new string segment to the lines strings segment * array using the specified GC. * * Parameters: * line (I) - current text line structure * line_buf (I,O) - text line accumaltion buffer * new_text (I) - new text to be added to the end of the original * string segment array. * font_flag (I) - font style to render new string. One of * NORMAL_STYLE, UNDERLINE_STYLE, BOLD_STYLE, * or BOLDUNDER_STYLE. * * Return: none * **************************************************************************/ static void add_text(vwr_text_line_t *line, char *line_buf, char *new_text, int font_flag) { vwr_text_seg_t *sptr; int nchars; static char *buf_ptr; /* * Get the segment length */ nchars = strlen(new_text); /* * Copy new text to the accumulation text buffer */ if (!(*line_buf)) buf_ptr = line_buf; (void)memcpy(buf_ptr, new_text, nchars + 1); buf_ptr += nchars; /* * Allocate a new string segment and fill it with the * relevant info */ sptr = (vwr_text_seg_t*)malloc(sizeof(vwr_text_seg_t)); sptr->next = NULL; sptr->seg_type = font_flag; sptr->seg_nchars = nchars; sptr->seg_len = XTextWidth(text_styles[font_flag].font, new_text, sptr->seg_nchars); /* * Update the line length and char count fields */ line->line_len += sptr->seg_len; line->line_nchars += nchars; /* * Link this new segment onto segment list and update segment * count */ if (line->segs) { line->last_seg->next = sptr; line->last_seg = sptr; } else { line->segs = line->last_seg = sptr; } }