--- source.gnu.orig/analyze.c Sun Jan 6 15:05:11 1991 +++ analyze.c Fri Jul 31 23:08:48 1992 @@ -35,6 +35,7 @@ void setup_output (); extern int no_discards; +extern int files_are_identical; static int *xvec, *yvec; /* Vectors being compared. */ static int *fdiag; /* Vector, indexed by diagonal, containing @@ -711,7 +712,112 @@ return script; } - + +#include +/* Return size of main memory in bytes */ +memsize(){ + inventory_t *getinvent(void); + inventory_t *sp; + static int sz = 0; + + if (sz == 0) + { + while ((sp = getinvent()) != 0) + { + if (sp->inv_class == INV_MEMORY && sp->inv_type == INV_MAIN) + { + sz = sp->inv_state; + break; + } + } + } + + if (sz == 0) + sz = 8*1024*1024; /* 8 MByte default main memory size */ + + return sz; +} + +#include +#include +#include + +int +tryodiff (char *cmd, int argc, char **argv) +{ + int status, pid, w, sig; + struct sigaction oldistat, oldqstat, oldcstat, newiqstat, newcstat, newustat; + + /* + * ignore INT and QUIT, saving previous handlers and masks + */ + if (sigaction(SIGINT,NULL,&oldistat) == -1) + return(-1); + + newiqstat.sa_handler = SIG_IGN; + newiqstat.sa_flags = 0; + newcstat.sa_handler = SIG_DFL; + newcstat.sa_flags = 0; + + if ( (sigaction(SIGINT,&newiqstat,&oldistat) == -1) || + (sigaction(SIGQUIT,&newiqstat,&oldqstat) == -1) || + (sigaction(SIGCLD,&newcstat,&oldcstat) == -1) ) { + return(-1); + } + + if ((pid = fork()) == 0) { + char *emsg = ": sigaction internal botch\n"; + /* Child process */ + /* reinstall old handlers and masks. */ + (void) sigaction(SIGINT,&oldistat,NULL); + (void) sigaction(SIGQUIT,&oldqstat,NULL); + (void) sigaction(SIGCLD,&oldcstat,NULL); + argv[0] = cmd; + (void) execvp(argv[0], argv); + + /* oops - exec failed - tell parent that we didn't + * do anything useful by committing SIGUSR2 harikari. + */ + newustat.sa_handler = SIG_DFL; + newustat.sa_flags = 0; + sigaction(SIGUSR2,&newustat,NULL); + kill(getpid(), SIGUSR2); + /* should be dead by now -- but just in case ... */ + /* Avoid stdio: parent process's buffers are inherited. */ + write (fileno (stderr), program, strlen (program)); + write (fileno (stderr), emsg, strlen(emsg)); + _exit(127); /* dont double fflush stdio */ + /* NOTREACHED */ + } + + if (pid < 0) { + w = -1; /* fork failed */ + } else { + w = waitpid(pid,&status,0); + } + + /* reinstall old handlers and masks. */ + (void) sigaction(SIGINT,&oldistat,NULL); + (void) sigaction(SIGQUIT,&oldqstat,NULL); + (void) sigaction(SIGCLD,&oldcstat,NULL); + + sig = WTERMSIG(status); + + if (w == -1) + return -1; /* fork failed */ + else if (WIFSIGNALED(status) && sig == SIGUSR2) + return -1; /* exec failed */ + else if (WIFEXITED(status)) + return WEXITSTATUS(status); /* normal odiff exit */ + else if (WIFSIGNALED(status) && (sig == SIGINT || sig == SIGQUIT)) + kill(getpid(), sig); /* reissue held off signal to self */ + return 2; /* problems */ +} + +extern int xargc; +extern char **xargv; +extern int gnudiffopts; + /* Report the differences of two files. DEPTH is the current directory depth. */ int @@ -734,8 +840,42 @@ && filevec[0].stat.st_dev == filevec[1].stat.st_dev) return 0; + /* See if the two named files are too big to fit in memory + comfortably. Since GNU diff reads/mmaps both files into its + virtual addr space, it pages heavily on huge files. The old BSD + derived "odiff" is much more space (swap and main memory) + efficient. But if the files fit in main memory, then the GNU + diff is quite a bit faster - as much as 2x to 3x faster. Arbitrarily + consider 1/4 Main Memory size as too big. User can force choice: + specify -H always gets GNU diff, use odiff always gets the old BSD diff. + Return of -1 from tryodiff means that odiff wasn't even run - so we + should try further here instead. */ + + if (gnudiffopts == 0 && + (filevec[0].stat.st_size + filevec[1].stat.st_size) > memsize()/4 ) + { + int ret; + if ((ret = tryodiff("odiff",xargc,xargv)) != -1) + return ret; + } + + files_are_identical = 0; + binary = read_files (filevec); + if (files_are_identical) + { + for (i = 0; i < 2; ++i) + if (filevec[i].buffer) + { + if (filevec[i].buffer_was_mmapped) + munmap (filevec[i].buffer, filevec[i].bufsize + 2); + else + free (filevec[i].buffer); + } + return 0; + } + /* If we have detected that file 0 is a binary file, compare the two files as binary. This can happen only when the first chunk is read. @@ -753,7 +893,12 @@ for (i = 0; i < 2; ++i) if (filevec[i].buffer) + { + if (filevec[i].buffer_was_mmapped) + munmap (filevec[i].buffer, filevec[i].bufsize + 2); + else free (filevec[i].buffer); + } return differs; } @@ -900,7 +1045,12 @@ for (i = 0; i < 2; ++i) { if (filevec[i].buffer != 0) + { + if (filevec[i].buffer_was_mmapped) + munmap (filevec[i].buffer, filevec[i].bufsize + 2); + else free (filevec[i].buffer); + } free (filevec[i].linbuf); } --- source.gnu.orig/diff.c Mon Mar 9 19:58:10 1992 +++ diff.c Fri Jul 31 23:08:49 1992 @@ -23,7 +23,7 @@ #define GDIFF_MAIN #include "regex.h" #include "diff.h" -#include "getopt.h" +#include /* Nonzero for -r: if comparing two directories, @@ -66,45 +66,12 @@ return result; } - -/* The numbers 129 and 130 that appear in the fourth element - for the context and unidiff entries are used as a way of - telling the big switch in `main' how to process those options. */ -static struct option longopts[] = -{ - {"ignore-blank-lines", 0, 0, 'B'}, - {"context", 2, 0, 129}, - {"ifdef", 1, 0, 'D'}, - {"show-function-line", 1, 0, 'F'}, - {"speed-large-files", 0, 0, 'H'}, - {"ignore-matching-lines", 1, 0, 'I'}, - {"file-label", 1, 0, 'L'}, - {"entire-new-files", 0, 0, 'N'}, - {"new-files", 0, 0, 'N'}, - {"starting-file", 1, 0, 'S'}, - {"initial-tab", 0, 0, 'T'}, - {"text", 0, 0, 'a'}, - {"all-text", 0, 0, 'a'}, - {"ascii", 0, 0, 'a'}, - {"ignore-space-change", 0, 0, 'b'}, - {"minimal", 0, 0, 'd'}, - {"ed", 0, 0, 'e'}, - {"reversed-ed", 0, 0, 'f'}, - {"ignore-case", 0, 0, 'i'}, - {"print", 0, 0, 'l'}, - {"rcs", 0, 0, 'n'}, - {"show-c-function", 0, 0, 'p'}, - {"binary", 0, 0, 'q'}, - {"brief", 0, 0, 'q'}, - {"recursive", 0, 0, 'r'}, - {"report-identical-files", 0, 0, 's'}, - {"expand-tabs", 0, 0, 't'}, - {"ignore-all-space", 0, 0, 'w'}, - {"unified", 2, 0, 130}, - {"version", 0, 0, 'v'}, - {0, 0, 0, 0} -}; +int xargc; +char **xargv; +int gnudiffopts; +int sgidiffopts; + main (argc, argv) int argc; @@ -113,8 +80,10 @@ int val; int c; int prev = -1; - int longind; extern char *version_string; + extern char *optarg; + extern int optind; + extern void setxpat(char *); program = argv[0]; @@ -143,28 +112,30 @@ msg_chain = NULL; msg_chain_end = NULL; no_discards = 0; + gnudiffopts = 0; + sgidiffopts = 0; + xargc = argc; + xargv = argv; /* Decode the options. */ - while ((c = getopt_long (argc, argv, - "0123456789abBcC:dD:efF:hHiI:lL:nNpqrsS:tTuvw", - longopts, &longind)) != EOF) + while ((c = getopt (argc, argv, + "0123456789abBcC:dD:efF:hHiI:lL:nNpqrsS:tTuvwx:" + )) != EOF) { - if (c == 0) /* Long option. */ - c = longopts[longind].val; switch (c) { /* All digits combine in decimal to specify the context-size. */ - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case '0': + case '1': gnudiffopts++; + case '2': gnudiffopts++; + case '3': gnudiffopts++; + case '4': gnudiffopts++; + case '5': gnudiffopts++; + case '6': gnudiffopts++; + case '7': gnudiffopts++; + case '8': gnudiffopts++; + case '9': gnudiffopts++; + case '0': gnudiffopts++; if (context == -1) context = 0; /* If a context length has already been specified, @@ -176,25 +147,23 @@ context = context * 10 + c - '0'; break; - case 'a': + case 'a': gnudiffopts++; /* Treat all files as text files; never treat as binary. */ always_text_flag = 1; break; - case 'b': + case 'b': sgidiffopts++; /* Ignore changes in amount of whitespace. */ ignore_space_change_flag = 1; length_varies = 1; break; - case 'B': + case 'B': gnudiffopts++; /* Ignore changes affecting only blank lines. */ ignore_blank_lines_flag = 1; break; - case 'C': - case 129: /* +context[=lines] */ - case 130: /* +unified[=lines] */ + case 'C': gnudiffopts++; if (optarg) { if (context >= 0) @@ -209,35 +178,35 @@ } /* Falls through. */ - case 'c': + case 'c': sgidiffopts++; /* Make context-style output. */ specify_style (c == 130 ? OUTPUT_UNIFIED : OUTPUT_CONTEXT); break; - case 'd': + case 'd': gnudiffopts++; /* Don't discard lines. This makes things slower (sometimes much slower) but will find a guaranteed minimal set of changes. */ no_discards = 1; break; - case 'D': + case 'D': sgidiffopts++; /* Make merged #ifdef output. */ specify_style (OUTPUT_IFDEF); ifdef_string = optarg; break; - case 'e': + case 'e': sgidiffopts++; /* Make output that is a valid `ed' script. */ specify_style (OUTPUT_ED); break; - case 'f': + case 'f': sgidiffopts++; /* Make output that looks vaguely like an `ed' script but has changes in the order they appear in the file. */ specify_style (OUTPUT_FORWARD_ED); break; - case 'F': + case 'F': gnudiffopts++; /* Show, for each set of changes, the previous line that matches the specified regexp. Currently affects only context-style output. */ @@ -244,7 +213,7 @@ function_regexp = optarg; break; - case 'h': + case 'h': sgidiffopts++; /* Split the files into chunks of around 1500 lines for faster processing. Usually does not change the result. @@ -251,29 +220,29 @@ This currently has no effect. */ break; - case 'H': + case 'H': gnudiffopts++; /* Turn on heuristics that speed processing of large files with a small density of changes. */ heuristic = 1; break; - case 'i': + case 'i': sgidiffopts++; /* Ignore changes in case. */ ignore_case_flag = 1; break; - case 'I': + case 'I': gnudiffopts++; /* Ignore changes affecting only lines that match the specified regexp. */ ignore_regexp = optarg; break; - case 'l': + case 'l': sgidiffopts++; /* Pass the output through `pr' to paginate it. */ paginate_flag = 1; break; - case 'L': + case 'L': gnudiffopts++; /* Specify file labels for `-c' output headers. */ if (!file_label[0]) file_label[0] = optarg; @@ -283,52 +252,52 @@ fatal ("too many file label options"); break; - case 'n': + case 'n': sgidiffopts++; /* Output RCS-style diffs, like `-f' except that each command specifies the number of lines affected. */ specify_style (OUTPUT_RCS); break; - case 'N': + case 'N': gnudiffopts++; /* When comparing directories, if a file appears only in one directory, treat it as present but empty in the other. */ entire_new_file_flag = 1; break; - case 'p': + case 'p': gnudiffopts++; /* Make context-style output and show name of last C function. */ specify_style (OUTPUT_CONTEXT); function_regexp = "^[_a-zA-Z]"; break; - case 'q': + case 'q': gnudiffopts++; no_details_flag = 1; break; - case 'r': + case 'r': sgidiffopts++; /* When comparing directories, recursively compare any subdirectories found. */ recursive = 1; break; - case 's': + case 's': sgidiffopts++; /* Print a message if the files are the same. */ print_file_same_flag = 1; break; - case 'S': + case 'S': sgidiffopts++; /* When comparing directories, start with the specified file name. This is used for resuming an aborted comparison. */ dir_start_file = optarg; break; - case 't': + case 't': sgidiffopts++; /* Expand tabs to spaces in the output so that it preserves the alignment of the input files. */ tab_expand_flag = 1; break; - case 'T': + case 'T': gnudiffopts++; /* Use a tab in the output, rather than a space, before the text of an input line, so as to keep the proper alignment in the input line without changing the characters in it. */ @@ -335,21 +304,25 @@ tab_align_flag = 1; break; - case 'v': + case 'v': gnudiffopts++; printf ("GNU diff version %s\n", version_string); break; - case 'u': + case 'u': gnudiffopts++; /* Output the context diff in unidiff format. */ specify_style (OUTPUT_UNIFIED); break; - case 'w': + case 'w': sgidiffopts++; /* Ignore horizontal whitespace when comparing lines. */ ignore_all_space_flag = 1; length_varies = 1; break; + case 'x': sgidiffopts++; + setxpat(optarg); + break; + default: usage (); } @@ -401,19 +374,8 @@ usage () { - fprintf (stderr, "\ -Usage: diff [-#] [-abBcdefhHilnNprstTuvw] [-C lines] [-F regexp] [-I regexp]\n\ - [-L label [-L label]] [-S file] [-D symbol] [+ignore-blank-lines]\n\ - [+context[=lines]] [+unified[=lines]] [+ifdef=symbol]\n\ - [+show-function-line=regexp]\n"); - fprintf (stderr, "\ - [+speed-large-files] [+ignore-matching-lines=regexp] [+new-file]\n\ - [+initial-tab] [+starting-file=file] [+text] [+all-text] [+ascii]\n\ - [+minimal] [+ignore-space-change] [+ed] [+reversed-ed] [+ignore-case]\n"); - fprintf (stderr, "\ - [+print] [+rcs] [+show-c-function] [+binary] [+brief] [+recursive]\n\ - [+report-identical-files] [+expand-tabs] [+ignore-all-space]\n\ - [+file-label=label [+file-label=label]] [+version] path1 path2\n"); + fprintf (stderr, "Usage: diff [-bcefhilnrstw -Dstring -Sname -xpattern ] file1 file2\n"); + fprintf (stderr, "\tGNU diff options: [-# -aBdHNpqTuv -Cnum -Fregexp -Iregexp -Llabel]\n"); exit (2); } --- source.gnu.orig/diff.h Mon Dec 9 21:50:39 1991 +++ diff.h Fri Jul 31 23:08:50 1992 @@ -90,7 +90,7 @@ #endif /* Support old-fashioned C compilers. */ -#if !defined (__STDC__) && !defined (__GNUC__) +#if !defined (__STDC__) && !defined (__GNUC__) && !defined(_MODERN_C) #define const #endif @@ -314,6 +314,8 @@ The allocated size is always linbufsize and the number of valid elements is buffered_lines. */ int *ltran; + /* 1 if buffer was mmapped - need to munmap instead of free */ + int buffer_was_mmapped; }; /* Describe the two files currently being compared. */ --- source.gnu.orig/dir.c Mon Mar 9 19:57:40 1992 +++ dir.c Fri Jul 31 23:08:50 1992 @@ -17,8 +17,41 @@ along with GNU DIFF; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include "regex.h" #include "diff.h" +static struct re_pattern_buffer **revec = NULL; +static struct re_pattern_buffer **revec_end; + +/* add a directory name to the list to exclude + */ +void +setxpat(char *pat) +{ + char *val; + int old_re_syntax = re_set_syntax (RE_SYNTAX_EGREP); + + if (revec == NULL) + revec_end = revec = (struct re_pattern_buffer **) xmalloc (sizeof *revec); + else + { + int oldsz = revec_end - revec; + revec = (struct re_pattern_buffer **) xrealloc (revec, (oldsz+1) * (sizeof *revec)); + revec_end = revec + oldsz; + } + *revec_end = (struct re_pattern_buffer *) xmalloc (sizeof (struct re_pattern_buffer)); + bzero (*revec_end, sizeof (struct re_pattern_buffer)); + if ((val = re_compile_pattern (pat, strlen(pat), *revec_end)) != NULL) + { + error ("%s: %s", pat, val); + fatal ("use egrep(1) style patterns"); + /* NOTREACHED */ + } + (*revec_end)->fastmap = (char *) xmalloc (256); + revec_end++; + re_set_syntax (old_re_syntax); +} + static int compare_names (); /* Read the directory named DIRNAME and return a sorted vector @@ -76,6 +109,21 @@ while (next = readdir (reading)) { + if (revec) + { + struct re_pattern_buffer **rep; + int old_re_syntax = re_set_syntax (RE_SYNTAX_EGREP); + char *s = next->d_name; + int len = strlen (s); + + for (rep = revec; rep < revec_end; rep++) + if (re_search (*rep, s, len, 0, len, 0) >= 0) + break; + re_set_syntax (old_re_syntax); + if (rep != revec_end) + continue; + } + /* Ignore the files `.' and `..' */ if (next->d_name[0] == '.' && (next->d_name[1] == 0 --- source.gnu.orig/io.c Thu Nov 29 13:15:43 1990 +++ io.c Fri Jul 31 23:08:51 1992 @@ -18,6 +18,7 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "diff.h" +#include /* Rotate a value n bits to the left. */ #define UINT_BIT (sizeof (unsigned) * CHAR_BIT) @@ -28,10 +29,11 @@ /* Current file under consideration. */ struct file_data *current; +int files_are_identical; /* Check for binary files and compare them for exact identity. */ -/* Return 1 if BUF contains a non text character. +/* Return 1 if BUF contains a nul character. SIZE is the number of characters in BUF. */ static int @@ -39,48 +41,13 @@ char *buf; int size; { - static const char textchar[] = { - /* ISO 8859 */ - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - }; while (--size >= 0) - if (!textchar[*buf++ & 0377]) + if (*buf++ == '\0') return 1; return 0; } -int binary_file_threshold = 512; +int binary_file_threshold = BUFSIZ; /* Slurp the current file completely into core. Return nonzero if it appears to be a binary file. */ @@ -104,6 +71,17 @@ else if ((current->stat.st_mode & S_IFMT) == S_IFREG) { current->bufsize = current->stat.st_size; + current->buffer = (char *)(-1); + current->buffer_was_mmapped = 0; + if (current->bufsize > 0) { + current->buffer + = (char *)mmap((void *)0, current->bufsize + 2, + PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_AUTOGROW, current->desc, 0); + } + if ((int)(current->buffer) != -1) { + current->buffered_chars = current->bufsize; + current->buffer_was_mmapped = 1; + } else { current->buffer = (char *) xmalloc (current->bufsize + 2); current->buffered_chars = read (current->desc, current->buffer, current->bufsize); @@ -110,6 +88,7 @@ if (current->buffered_chars < 0) pfatal_with_name (current->name); } + } else { int cc; @@ -334,6 +313,43 @@ && current->suffix_begin == current->buffer + current->buffered_chars) --current->linbuf[current->buffered_lines - 1].length; } + +/* For same size files, look for mismatch w/o counting lines, in case identical */ +loop_for_mismatch (pp0, pp1) + char **pp0, **pp1; +{ + char *p0 = *pp0; + char *p1 = *pp1; + + { + register int *ip0 = (int *)p0; + register int *ip1 = (int *)p1; + while (*ip0++ == *ip1++) + ; + --ip0; --ip1; + p0 = (char *)(ip0); + p1 = (char *)(ip1); + } + while (*p0++ == *p1++) + ; + *pp0 = p0; + *pp1 = p1; +} + +/* Don't count lines until failed files_are_identical test */ +loop_for_line_count (pbase, pend) + char *pbase, *pend; +{ + int i = 0; + int j = pend - pbase; + int lines = 0; + + while (i < j) + if (pbase[i++] == '\n') + ++lines; + + return lines; +} /* Given a vector of two file_data objects, find the identical prefixes and suffixes of each object. */ @@ -370,7 +386,28 @@ else p1[filevec[1].buffered_chars] = ~p0[filevec[1].buffered_chars]; + if (filevec[0].buffered_chars == filevec[1].buffered_chars) + { /* Loop until first mismatch, or to the sentinel characters. */ + loop_for_mismatch (&p0, &p1); + + /* If the sentinel was passed, and lengths are equal, the + files are identical. */ + if (p0 - filevec[0].buffer > filevec[0].buffered_chars) + { + filevec[0].prefix_end = p0 - 1; + filevec[1].prefix_end = p1 - 1; + filevec[0].prefix_lines = filevec[1].prefix_lines = 0; + filevec[0].suffix_begin = filevec[0].buffer; + filevec[1].suffix_begin = filevec[1].buffer; + filevec[0].suffix_lines = filevec[1].suffix_lines = 0; + files_are_identical = 1; + return; + } + + lines = loop_for_line_count (filevec[0].buffer, p0 - 1); + } + else while (1) { char c = *p0++; @@ -389,20 +426,6 @@ && p1 - filevec[1].buffer > filevec[1].buffered_chars))) --p0, --p1, --lines; - /* If the sentinel was passed, and lengths are equal, the - files are identical. */ - if (p0 - filevec[0].buffer > filevec[0].buffered_chars - && filevec[0].buffered_chars == filevec[1].buffered_chars) - { - filevec[0].prefix_end = p0 - 1; - filevec[1].prefix_end = p1 - 1; - filevec[0].prefix_lines = filevec[1].prefix_lines = lines; - filevec[0].suffix_begin = filevec[0].buffer; - filevec[1].suffix_begin = filevec[1].buffer; - filevec[0].suffix_lines = filevec[1].suffix_lines = lines; - return; - } - /* Point at first nonmatching characters. */ --p0, --p1; @@ -592,7 +615,12 @@ if (binary || this_binary) return 1; + files_are_identical = 0; + find_identical_ends (filevec); + + if (files_are_identical) + return 0; for (i = 0; i < 2; ++i) { --- source.gnu.orig/util.c Sun Jan 6 15:14:14 1991 +++ util.c Fri Jul 31 23:08:53 1992 @@ -203,33 +203,24 @@ { register char *t1, *t2; register char end_char = line_end_char; - int savechar; /* Check first for exact identity. If that is true, return 0 immediately. This detects the common case of exact identity - faster than complete comparison would. */ + faster than complete comparison would. + The check for equal lengths is almost always not needed. + Only if the last line of two files both hash to the same + value, and the last line in the first file is a prefix + of the last line in the second file, and the first file + lacks a trailing newline, and length_varies is set, + and a ROBUST output style is chosen (which doesnt include + the artificial final newline in the length of the final line) + THEN does the length compare become critical to avoid falsely + concluding that the two lines are the same. Many thanks + to Paul Eggert for noticing this bug. */ - t1 = s1->text; - t2 = s2->text; - - /* Alter the character following line 2 so it doesn't - match that following line 1. - (We used to alter the character after line 1, - but that caused trouble if line 2 directly follows line 1.) */ - savechar = s2->text[s2->length]; - s2->text[s2->length] = s1->text[s1->length] + 1; - - /* Now find the first mismatch; this won't go past the - character we just changed. */ - while (*t1++ == *t2++); - - /* Undo the alteration. */ - s2->text[s2->length] = savechar; - - /* If the comparison stopped at the alteration, - the two lines are identical. */ - if (t2 == s2->text + s2->length + 1) + if (s1->length == s2->length + && memcmp (s1->text, s2->text, s1->length) == 0) return 0; /* Not exactly identical, but perhaps they match anyway --- source.gnu.orig/diff3.c Fri Sep 30 22:14:15 1994 +++ diff3.c Tue Apr 15 17:16:51 1997 @@ -20,7 +20,7 @@ #include "system.h" #include #include -#include "getopt.h" +#include extern char const version_string[]; @@ -202,6 +202,7 @@ static char const diff_program[] = DIFF_PROGRAM; +#if 0 static struct option const longopts[] = { {"text", 0, 0, 'a'}, @@ -217,6 +218,7 @@ {"help", 0, 0, 129}, {0, 0, 0, 0} }; +#endif /* * Main program. Calls diff twice on two pairs of input files, @@ -243,7 +245,7 @@ initialize_main (&argc, &argv); program_name = argv[0]; - while ((c = getopt_long (argc, argv, "aeimvx3AEL:TX", longopts, 0)) != EOF) + while ((c = getopt (argc, argv, "aeimvx3AEL:TX")) != EOF) { switch (c) { @@ -281,9 +283,11 @@ case 'T': tab_align_flag = 1; break; +#if 0 case 'v': printf ("diff3 - GNU diffutils version %s\n", version_string); exit (0); +#endif case 129: usage (); check_stdout (); @@ -561,9 +565,9 @@ *tmpblock, **result_end; - struct diff3_block const *last_diff3; + struct diff3_block *last_diff3; - static struct diff3_block const zero_diff3; + static struct diff3_block zero_diff3; /* Initialization */ result = 0; @@ -1152,9 +1156,11 @@ *ap++ = diff_program; if (always_text) *ap++ = "-a"; +#if 0 sprintf (horizon_arg, "--horizon-lines=%d", horizon_lines); *ap++ = horizon_arg; *ap++ = "--"; +#endif *ap++ = filea; *ap++ = fileb; *ap = 0;