1
0
Files
irix-657m-src/eoe/cmd/egrep/egrep.y
2022-09-29 17:59:04 +03:00

1258 lines
24 KiB
Plaintext

/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
/* All Rights Reserved */
/* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF */
/* UNIX System Laboratories, Inc. */
/* The copyright notice above does not evidence any */
/* actual or intended publication of such source code. */
%{
#ident "@(#)egrep:egrep.y 1.10.2.3"
%}
/* Copyright (c) 1987, 1988 Microsoft Corporation */
/* All Rights Reserved */
/* This Module contains Proprietary Information of Microsoft */
/* Corporation and should be treated as Confidential. */
/*
* egrep -- print lines containing (or not containing) a regular expression
*
* status returns:
* 0 - ok, and some matches
* 1 - ok, but no matches
* 2 - some error; matches irrelevant
*/
%token CHAR MCHAR DOT MDOT CCL NCCL MCCL NMCCL OR CAT STAR PLUS QUEST
%left OR
%left CHAR MCHAR DOT CCL NCCL MCCL NMCCL '('
%left CAT
%left STAR PLUS QUEST
%{
#include <stdio.h>
#include <ctype.h>
#include <memory.h>
#include <libw.h>
#include <stdlib.h>
#include <limits.h>
#include <locale.h>
#include <pfmt.h>
#include <errno.h>
#include <string.h>
#define BLKSIZE 512 /* size of reported disk blocks */
#define MAXLIN 2048
#define NCHARS 256
#define MAXPOS 20000
#define NSTATES 256
#define FINAL -1
#define RIGHT '\n' /* serves as record separator and as $ */
#define LEFT '\n' /* beginning of line */
#define Isalpha(c) (c <= 0177 || !multibyte ? isalpha(c) : 0)
#define MASK 0x8080
#define MASK1 0x8080
#define MASK2 0x0080
#define MASK3 0x8000
char gotofn[NSTATES][NCHARS];
int state[NSTATES];
char out[NSTATES];
int line = 1;
int *name;
int *left;
int *right;
int *parent;
int *foll;
int *positions;
char *chars;
wchar_t *lower;
wchar_t *upper;
int maxlin, maxclin, maxwclin, maxpos;
int nxtpos = 0;
int inxtpos;
int nxtchar = 0;
int *tmpstat;
int *initstat;
int istat;
int nstate = 1;
int xstate;
int count;
int icount;
char *input;
eucwidth_t wp;
#define multibyte wp._multibyte
#define eucw1 wp._eucw1
#define eucw2 wp._eucw2
#define eucw3 wp._eucw3
wchar_t lyylval;
wchar_t nextch();
wchar_t maxmin();
int compare();
void overflo();
void clearg(void);
void execute(char *);
void follow(int);
void add(int *, int);
void synerror(void);
char reinit = 0;
long lnum;
int bflag;
int cflag;
int eflag;
int fflag;
int hflag;
int iflag;
int lflag;
int nflag;
int sflag;
int vflag;
int nfile;
long blkno;
long tln;
int nsucc;
int badbotch;
extern char *optarg;
extern int optind;
int f;
FILE *expfile;
%}
%%
s: t
{
unary(FINAL, $1);
line--;
}
;
t: b r
{ $$ = node(CAT, $1, $2); }
| OR b r OR
{ $$ = node(CAT, $2, $3); }
| OR b r
{ $$ = node(CAT, $2, $3); }
| b r OR
{ $$ = node(CAT, $1, $2); }
;
b:
{ /* if(multibyte)
$$ = mdotenter();
else */
$$ = enter(DOT);
$$ = unary(STAR, $$);
}
;
r: CHAR
{ $$ = iflag && isalpha($1) ? node(OR, enter(tolower($1)), enter(toupper($1))) : enter($1); }
| MCHAR
{ $$ = mchar(lyylval); }
| DOT
{ if(multibyte)
$$ = mdotenter();
else
$$ = enter(DOT);
}
| CCL
{ $$ = cclenter(CCL); }
| NCCL
{ $$ = cclenter(NCCL); }
| MCCL
{ $$ = ccl(CCL); }
| NMCCL
{ $$ = ccl(NCCL); }
;
r: r OR r
{ $$ = node(OR, $1, $3); }
| r r %prec CAT
{ $$ = node(CAT, $1, $2); }
| r STAR
{ $$ = unary(STAR, $1); }
| r PLUS
{ $$ = unary(PLUS, $1); }
| r QUEST
{ $$ = unary(QUEST, $1); }
| '(' r ')'
{ $$ = $2; }
| error
;
%%
void
yyerror(const char *s) {
pfmt(stderr, MM_ERROR|MM_NOGET, "%s\n", s);
exit(2);
}
yylex() {
extern int yylval;
int cclcnt, x, ccount, oldccount;
register wchar_t c, lc;
c = nextch();
switch(c) {
case '^':
yylval = LEFT;
return(CHAR);
case '$':
c = RIGHT;
goto defchar;
case '|': return (OR);
case '*': return (STAR);
case '+': return (PLUS);
case '?': return (QUEST);
case '(': return (c);
case ')': return (c);
case '.': return(DOT);
case '\0': return (0);
case RIGHT: return (OR);
case '[':
x = (multibyte ? MCCL : CCL);
cclcnt = 0;
count = nxtchar++;
if ((c = nextch()) == '^') {
x = (multibyte ? NMCCL : NCCL);
c = nextch();
}
lc = 0;
do {
if(iflag && Isalpha(c))
c = tolower(c);
if (c == '\0') synerror();
if (c == '-' && cclcnt > 0 && lc != 0) {
if ((c = nextch()) != 0) {
if(c == ']') {
chars[nxtchar++] = '-';
cclcnt++;
break;
}
if(iflag && Isalpha(c))
c = tolower(c);
if(!multibyte || (c & MASK) == (lc & MASK) && lc < c && (c > 0377 || !iscntrl(c)) && (lc > 0377 || !iscntrl(lc))) {
if(nxtchar >= maxclin)
if(allocchars() == 0)
overflo();
chars[nxtchar++] = '-';
cclcnt++;
}
}
}
ccount = oldccount = nxtchar;
if(ccount + MB_LEN_MAX >= maxclin)
if(allocchars() == 0)
overflo();
ccount += wctomb(&chars[ccount], c);
cclcnt += ccount - oldccount;
nxtchar += ccount - oldccount;
lc = c;
} while ((c = nextch()) != ']');
chars[count] = cclcnt;
return(x);
case '\\':
if ((c = nextch()) == '\0') synerror();
defchar:
default: if(c <= 0177 || !multibyte) {
yylval = c; return (CHAR);
}
lyylval = c;
return(MCHAR);
}
}
int mgetc();
wchar_t nextch() {
wchar_t lc;
char multic[MB_LEN_MAX];
int length, d;
if (fflag) {
if ((length = mbftowc(multic, &lc, mgetc, &d)) < 0)
synerror();
if(length == 0)
lc = '\0';
}
else {
if((length = mbtowc(&lc, input, MB_LEN_MAX)) == -1)
synerror();
if(length == 0)
return(0);
input += length;
}
return(lc);
}
int mgetc()
{
return(getc(expfile));
}
void
synerror() {
pfmt(stderr, MM_ERROR, ":184:Syntax error\n");
exit(2);
}
enter(x) int x; {
if(line >= maxlin)
if(alloctree() == 0)
overflo();
name[line] = x;
left[line] = 0;
right[line] = 0;
return(line++);
}
cclenter(x) int x; {
register linno;
linno = enter(x);
right[linno] = count;
return (linno);
}
node(x, l, r) {
if(line >= maxlin)
if(alloctree() == 0)
overflo();
name[line] = x;
left[line] = l;
right[line] = r;
parent[l] = line;
parent[r] = line;
return(line++);
}
unary(x, d) {
if(line >= maxlin)
if(alloctree() == 0)
overflo();
name[line] = x;
left[line] = d;
right[line] = 0;
parent[d] = line;
return(line++);
}
allocchars()
{
maxclin += MAXLIN;
if((chars = realloc(chars, maxclin)) == (char *)0)
return 0;
return 1;
}
alloctree()
{
maxlin += MAXLIN;
if((name = (int *)realloc(name, maxlin*sizeof(int))) == (int *)0)
return 0;
if((left = (int *)realloc(left, maxlin*sizeof(int))) == (int *)0)
return 0;
if((right = (int *)realloc(right, maxlin*sizeof(int))) == (int *)0)
return 0;
if((parent = (int *)realloc(parent, maxlin*sizeof(int))) == (int *)0)
return 0;
if((foll = (int *)realloc(foll, maxlin*sizeof(int))) == (int *)0)
return 0;
if((tmpstat = (int *)realloc(tmpstat, maxlin*sizeof(int))) == (int *)0)
return 0;
if((initstat = (int *)realloc(initstat, maxlin*sizeof(int))) == (int *)0)
return 0;
return 1;
}
void overflo()
{
pfmt(stderr, MM_ERROR, ":185:Regular expression too long\n");
exit(2);
}
void
cfoll(v) {
register i;
if (left[v] == 0) {
count = 0;
for (i=1; i<=line; i++) tmpstat[i] = 0;
follow(v);
add(foll, v);
}
else if (right[v] == 0) cfoll(left[v]);
else {
cfoll(left[v]);
cfoll(right[v]);
}
}
void
cgotofn() {
register i;
count = 0;
inxtpos = nxtpos;
for (i=3; i<=line; i++) tmpstat[i] = 0;
if (cstate(line-1)==0) {
tmpstat[line] = 1;
count++;
out[1] = 1;
}
for (i=3; i<=line; i++) initstat[i] = tmpstat[i];
count--; /*leave out position 1 */
icount = count;
tmpstat[1] = 0;
add(state, 1);
istat = nxtst(1, LEFT);
}
nxtst(s, c)
{
register i, num, k;
int pos, curpos, number, newpos;
num = positions[state[s]];
count = icount;
for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
pos = state[s] + 1;
for (i=0; i<num; i++) {
curpos = positions[pos];
k = name[curpos];
if (k >= 0)
if (
(k == c)
|| (k == DOT && dot(c))
|| (k == MDOT && mdot(c))
|| (k == CCL && dot(c) && member(c, right[curpos], 1))
|| (k == NCCL && dot(c) && member(c, right[curpos], 0))
|| (k == MCCL && mdot(c) && member(c, right[curpos], 1))
) {
number = positions[foll[curpos]];
newpos = foll[curpos] + 1;
for (k=0; k<number; k++) {
if (tmpstat[positions[newpos]] != 1) {
tmpstat[positions[newpos]] = 1;
count++;
}
newpos++;
}
}
pos++;
}
if (notin(nstate)) {
if (++nstate >= NSTATES) {
for (i=1; i<NSTATES; i++)
out[i] = 0;
for (i=1; i<NSTATES; i++)
for (k=0; k<NCHARS; k++)
gotofn[i][k] = 0;
nstate = 1;
nxtpos = inxtpos;
reinit = 1;
add(state, nstate);
if (tmpstat[line] == 1) out[nstate] = 1;
return nstate;
}
add(state, nstate);
if (tmpstat[line] == 1) out[nstate] = 1;
gotofn[s][c] = nstate;
return nstate;
}
else {
gotofn[s][c] = xstate;
return xstate;
}
}
cstate(v)
{
register b;
if (left[v] == 0) {
if (tmpstat[v] != 1) {
tmpstat[v] = 1;
count++;
}
return(1);
}
else if (right[v] == 0) {
if (cstate(left[v]) == 0) return (0);
else if (name[v] == PLUS) return (1);
else return (0);
}
else if (name[v] == CAT) {
if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
else return (1);
}
else { /* name[v] == OR */
b = cstate(right[v]);
if (cstate(left[v]) == 0 || b == 0) return (0);
else return (1);
}
}
dot(c)
int c;
{
if(multibyte && c >= 0200 && (!iscntrl(c) || c == SS2 && eucw2 || c == SS3 && eucw3))
return(0);
if(c == RIGHT || c == LEFT)
return(0);
return(1);
}
mdot(c)
int c;
{
if(c >= 0200 && !iscntrl(c))
return(1);
return(0);
}
member(symb, set, torf)
{
register i, num, pos, c, lc;
if(symb == RIGHT || symb == LEFT)
return(0);
num = chars[set];
pos = set + 1;
lc = 0;
if(iflag)
symb = tolower(symb);
for (i=0; i<num; i++) {
c = (unsigned char)chars[pos++];
if(c == '-' && lc != 0 && i++ < num) {
c = (unsigned char)chars[pos++];
if(lc <= symb && symb <= c)
return(torf);
}
if (symb == c)
return (torf);
lc = c;
}
return(!torf);
}
notin(n)
int n;
{
register int i, j, pos;
for (i=1; i<=n; i++) {
if (positions[state[i]] == count) {
pos = state[i] + 1;
for (j=0; j < count; j++)
if (tmpstat[positions[pos++]] != 1) goto nxt;
xstate = i;
return (0);
}
nxt: ;
}
return (1);
}
void
add(array, n)
int *array;
{
register i;
if (nxtpos + count >= maxpos) {
maxpos += MAXPOS + count;
if((positions = (int *)realloc(positions, maxpos *sizeof(int))) == (int *)0)
overflo();
}
array[n] = nxtpos;
positions[nxtpos++] = count;
for (i=3; i <= line; i++) {
if (tmpstat[i] == 1) {
positions[nxtpos++] = i;
}
}
}
void
follow(v)
int v;
{
int p;
if (v == line) return;
p = parent[v];
switch(name[p]) {
case STAR:
case PLUS: cstate(v);
follow(p);
return;
case OR:
case QUEST: follow(p);
return;
case CAT: if (v == left[p]) {
if (cstate(right[p]) == 0) {
follow(p);
return;
}
}
else follow(p);
return;
case FINAL: if (tmpstat[line] != 1) {
tmpstat[line] = 1;
count++;
}
return;
}
}
static char badopen[] = ":92:Cannot open %s: %s\n";
main(argc, argv)
char **argv;
{
register c;
char *usage, *usageid;
char nl = '\n';
int errflag = 0;
(void)setlocale(LC_ALL, "");
(void)setcat("uxcore");
(void)setlabel("UX:egrep");
usage = "[ -bchilnsv ] [ -e exp ] [ -f file ] [ strings ] [ file ] ...";
usageid = ":186";
getwidth(&wp);
while((c = getopt(argc, argv, "bcie:f:hlnsv")) != -1)
switch(c) {
case 'b':
bflag++;
continue;
case 'c':
cflag++;
continue;
case 'e':
eflag++;
input = optarg;
continue;
case 'f':
fflag++;
expfile = fopen(optarg, "r");
if(expfile == NULL) {
pfmt(stderr, MM_ERROR, badopen, optarg,
strerror(errno));
exit(2);
}
continue;
case 'h':
hflag++;
continue;
case 'i':
iflag++;
continue;
case 'l':
lflag++;
continue;
case 'n':
nflag++;
continue;
case 's':
sflag++;
continue;
case 'v':
vflag++;
continue;
case '?':
errflag++;
}
argc -= optind;
if (errflag || ((argc <= 0) && !fflag && !eflag)) {
if (!errflag)
pfmt(stderr, MM_ERROR, ":1:Incorrect usage\n");
pfmt(stderr, MM_ACTION, ":187:Usage:\n\tegrep %s\n",
gettxt(usageid, usage));
exit(2);
}
if(!eflag && !fflag) {
input = argv[optind];
optind++;
argc--;
}
argv = &argv[optind];
/* allocate initial space for arrays */
if((name = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
overflo();
if((left = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
overflo();
if((right = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
overflo();
if((parent = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
overflo();
if((foll = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
overflo();
if((tmpstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
overflo();
if((initstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
overflo();
if((chars = (char *)malloc(MAXLIN)) == (char *)0)
overflo();
if((lower = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0)
overflo();
if((upper = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0)
overflo();
if((positions = (int *)malloc(MAXPOS*sizeof(int))) == (int *)0)
overflo();
maxlin = MAXLIN;
maxclin = MAXLIN;
maxwclin = MAXLIN;
maxpos = MAXPOS;
yyparse();
cfoll(line-1);
cgotofn();
nfile = argc;
if (argc<=0) {
if (lflag && (argc < 0)) exit(1);
execute(0);
}
else while (--argc >= 0) {
if (reinit == 1) clearg();
execute(*argv++);
}
exit(badbotch ? 2 : nsucc==0);
}
void
execute(file)
char *file;
{
register char *p;
register cstat;
register wchar_t c;
register t;
register count;
int count1, count2;
int nchars;
int succ;
char *ptr, *ptrend, *lastptr;
char buf[2*BUFSIZ];
FILE *f;
int nlflag;
static const char *pref_s, *pref_ld;
char *stdinp;
if (file) {
if ((f = fopen(file, "r")) == NULL) {
if (!sflag)
pfmt(stderr, MM_ERROR, badopen, file, strerror(errno));
badbotch=1;
return;
}
}
else {
f = stdin;
stdinp = "(standard input)";
}
lnum = 1;
tln = 0;
if((count = read(fileno(f), buf, BUFSIZ)) <= 0) {
fclose(f);
if (cflag) {
if (nfile>1 && !hflag)
fprintf(stdout, pref_s ? pref_s :
(pref_s = gettxt(":188", "%s:")),
file);
fprintf(stdout, "%ld\n", tln);
}
return;
}
blkno = count;
ptr = buf;
for(;;) {
if((ptrend = memchr(ptr, '\n', buf + count - ptr)) == NULL) {
count = buf + count - ptr;
if(count > BUFSIZ) {
ptr = p = buf;
count = 0;
} else {
if(ptr != buf)
(void)memmove(buf, ptr, count);
p = buf + count;
ptr = buf;
}
if((count1 = read(fileno(f), p, BUFSIZ)) > 0) {
count += count1;
blkno += count1;
continue;
}
ptrend = ptr + count;
nlflag = 0;
} else
nlflag = 1;
*ptrend = '\n';
p = ptr;
lastptr = ptr;
cstat = istat;
succ = 0;
for(;;) {
if(out[cstat]) {
if(multibyte && p > ptr) {
wchar_t wchar;
int length;
char *endptr = p;
p = lastptr;
while(p < endptr) {
length = mbtowc(&wchar, p, MB_LEN_MAX);
if(length <= 1)
p++;
else
p += length;
}
if(p == endptr) {
succ = !vflag;
break;
}
cstat = 1;
length = mbtowc(&wchar, lastptr, MB_LEN_MAX);
if(length <= 1)
lastptr++;
else
lastptr += length;
p = lastptr;
continue;
}
succ = !vflag;
break;
}
c = (unsigned char)*p++;
if ((t = gotofn[cstat][c]) == 0)
cstat = nxtst(cstat, c);
else
cstat = t;
if(c == RIGHT) {
if(out[cstat]) {
succ = !vflag;
break;
}
succ = vflag;
break;
}
}
if(succ) {
nsucc = 1;
if (cflag) tln++;
else if (sflag)
; /* ugh */
else if (lflag) {
printf("%s\n", (file ? file : stdinp));
fclose(f);
return;
}
else {
if (nfile > 1 && !hflag)
printf(pref_s ? pref_s :
(pref_s = gettxt(":188", "%s:")),
file);
if (bflag) {
nchars = blkno - (buf + count - ptrend) - 2;
if(nlflag)
nchars++;
printf(pref_ld ? pref_ld :
(pref_ld = gettxt(":189", "%ld:")),
nchars/BLKSIZE);
}
if (nflag)
printf(pref_ld ? pref_ld :
(pref_ld = gettxt(":189", "%ld:")), lnum);
if(nlflag)
nchars = ptrend - ptr + 1;
else
nchars = ptrend - ptr;
fwrite(ptr, 1, nchars, stdout);
}
}
if(!nlflag)
break;
ptr = ptrend + 1;
if(ptr >= buf + count) {
ptr = buf;
if((count = read(fileno(f), buf, BUFSIZ)) <= 0)
break;
blkno += count;
}
lnum++;
if (reinit == 1)
clearg();
}
fclose(f);
if (cflag) {
if (nfile > 1 && !hflag)
printf(pref_s ? pref_s :
(pref_s = gettxt(":188", "%s:")),
file);
printf("%ld\n", tln);
}
}
void
clearg(void) {
register i, k;
for (i=1; i<=nstate; i++)
out[i] = 0;
for (i=1; i<=nstate; i++)
for (k=0; k<NCHARS; k++)
gotofn[i][k] = 0;
nstate = 1;
nxtpos = inxtpos;
reinit = 0;
count = 0;
for (i=3; i<=line; i++) tmpstat[i] = 0;
if (cstate(line-1)==0) {
tmpstat[line] = 1;
count++;
out[1] = 1;
}
for (i=3; i<=line; i++) initstat[i] = tmpstat[i];
count--; /*leave out position 1 */
icount = count;
tmpstat[1] = 0;
add(state, 1);
istat = nxtst(1, LEFT);
}
mdotenter()
{
int i, x1, x2;
x1 = enter(DOT);
x2 = enter(MDOT);
for(i = 1; i < eucw1; i++)
x2 = node(CAT, x2, enter(MDOT));
x1 = node(OR, x1, x2);
if(eucw2) {
x2 = enter('\216');
for(i = 1; i <= eucw2; i++)
x2 = node(CAT, x2, enter(MDOT));
x1 = node(OR, x1, x2);
}
if(eucw3) {
x2 = enter('\217');
for(i = 1; i <= eucw3; i++)
x2 = node(CAT, x2, enter(MDOT));
x1 = node(OR, x1, x2);
}
return(x1);
}
mchar(c)
wchar_t c;
{
char multichar[MB_LEN_MAX+1];
char *p;
int x1, lc, length;
length = wctomb(multichar, c);
p = multichar;
*(p + length) = '\0';
x1 = enter(*p++);
while(lc = (unsigned char)*p++)
x1 = node(CAT, x1, enter(lc));
return(x1);
}
ccl(type)
int type;
{
wchar_t c, lc;
char multic1[MB_LEN_MAX];
char multic2[MB_LEN_MAX];
int x1, x2, length, current, last, cclcnt;
x2 = 0;
current = 0;
last = genrange(type);
nxtchar = count + 1;
cclcnt = 0;
/* create usual character class for single byte characters */
while(current <= last && (isascii(c = lower[current]) || c <= 0377 && iscntrl(c))) {
cclcnt++;
chars[nxtchar++] = c;
if(lower[current] != upper[current]) {
chars[nxtchar++] = '-';
chars[nxtchar++] = upper[current];
cclcnt += 2;
}
current++;
}
if(cclcnt)
chars[count] = cclcnt;
else
nxtchar = count;
if(current > 0)
/* single byte part of character class */
x2 = cclenter(type);
else if(type == NCCL)
/* all single byte characters match */
x2 = enter(DOT);
while(current <= last) {
if(upper[current] == lower[current])
x1 = mchar(lower[current]);
else {
length = wctomb(multic1, lower[current]);
wctomb(multic2, upper[current]);
x1 = range(multic1, multic2, length);
}
if(x2)
x2 = node(OR, x2, x1);
else
x2 = x1;
current++;
}
return x2;
}
range(p1, p2, length)
unsigned char *p1, *p2;
int length;
{
char multic[MB_LEN_MAX+1];
char *p;
int i, x1, x2;
if(length == 1)
return(classenter(*p1, *p2));
if(p1[0] == p2[0])
return(node(CAT, enter(p1[0]), range(p1+1, p2+1, length - 1)));
p = multic;
for(i = 1; i < length; i++)
*p++ = 0377;
x1 = node(CAT, enter(p1[0]), range(p1+1, multic, length - 1));
if((int)(p1[0] + 1) < (int)p2[0]) {
x2 = classenter(p1[0] + 1, p2[0] - 1);
for(i = 1; i < length; i++)
x2 = node(CAT, x2, enter(MDOT));
x1 = node(OR, x1, x2);
}
p = multic;
for(i = 1; i < length; i++)
*p++ = 0200;
x2 = node(CAT, enter(p2[0]), range(multic, p2+1, length - 1));
return node(OR, x1, x2);
}
classenter(x1, x2)
int x1, x2;
{
static int max, min;
if(!max) {
int i;
for(i = 0200; i <= 0377; i++)
if(!iscntrl(i))
break;
min = i;
for(i = 0377; i >= 0200; i--)
if(!iscntrl(i))
break;
max = i;
}
if(x1 <= min && x2 >= max)
return enter(MDOT);
if(nxtchar + 4 >= maxclin)
if(allocchars() == 0)
overflo();
count = nxtchar++;
chars[nxtchar++] = x1;
chars[nxtchar++] = '-';
chars[nxtchar++] = x2;
chars[count] = 3;
return cclenter(MCCL);
}
genrange(type)
int type;
{
char *p, *endp;
int current, nel, i, last, length;
wchar_t c, lc;
current = 0;
p = &chars[count+1];
endp = &chars[count+1] + chars[count];
lc = 0;
/* convert character class into union of ranges */
while(p < endp) {
length = mbtowc(&c, p, MB_LEN_MAX);
p += length;
if(c == '-' && lc != 0) {
length = mbtowc(&c, p, MB_LEN_MAX);
upper[current-1] = c;
p += length;
} else {
lower[current] = c;
upper[current++] = c;
}
lc = c;
}
nel = current;
/* sort lower and upper bounds of ranges */
qsort((char *)lower, nel, sizeof(wchar_t), compare);
qsort((char *)upper, nel, sizeof(wchar_t), compare);
last = current - 1;
current = 0;
/* combine overlapping or adjacent ranges */
for(i = 0; i < last; i++)
if(upper[i] >= lower[i+1] - 1)
upper[current] = upper[i+1];
else {
lower[++current] = lower[i+1];
upper[current] = upper[i+1];
}
if(type == NCCL) {
/* find complement of character class */
int j, next;
i = 0;
while(i <= current && isascii(c=lower[i]) || c <= 0377 && iscntrl(c))
i++;
if(i > current) {
/* match all multibyte characters */
if(eucw2) {
lower[i] = maxmin(MASK2, 0);
upper[i++] = maxmin(MASK2, 1);
}
if(eucw3) {
lower[i] = maxmin(MASK3, 0);
upper[i++] = maxmin(MASK3, 1);
}
lower[i] = maxmin(MASK1, 0);
upper[i++] = maxmin(MASK1, 1);
return i - 1;
}
next = current + 1;
if(next + current + 2 >= maxwclin) {
maxwclin += MAXLIN + next + current + 2;
if((lower = (wchar_t *)realloc(lower, maxwclin *sizeof(wchar_t))) == (wchar_t *)0 ||
(upper = (wchar_t *)realloc(upper, maxwclin * sizeof(wchar_t))) == (wchar_t *)0)
overflo();
}
if(eucw2 && lower[i] > maxmin(MASK2, 0)) {
lower[next] = maxmin(MASK2, 0);
if((lower[i] & MASK) != MASK2) {
upper[next++] = maxmin(MASK2, 1);
if((lower[i] & MASK) == MASK1 && eucw3) {
lower[next] = maxmin(MASK3, 0);
upper[next++] = maxmin(MASK3, 1);
}
if(lower[i] > maxmin(lower[i] & MASK, 0)) {
lower[next] = maxmin(lower[i] & MASK, 0);
upper[next++] = lower[i] - 1;
}
} else
upper[next++] = lower[i] - 1;
} else if(lower[i] > maxmin(lower[i] & MASK, 0)) {
lower[next] = maxmin(lower[i] & MASK, 0);
upper[next++] = lower[i] - 1;
}
for(j = i; j < current; j++) {
if(upper[j] < maxmin(upper[j] & MASK, 1)) {
lower[next] = upper[j] + 1;
if((upper[j] & MASK) != (lower[j+1] & MASK)) {
upper[next++] = maxmin(upper[j] & MASK, 1);
if(eucw3 && (upper[j] & MASK) == MASK2 && (lower[j+1] & MASK) == MASK1) {
lower[next] = maxmin(MASK3, 0);
upper[next++] = maxmin(MASK3, 1);
}
if(lower[j+1] > maxmin(lower[j+1] & MASK, 0)) {
lower[next] = maxmin(lower[j+1] & MASK, 0);
upper[next++] = lower[j+1] - 1;
}
} else
upper[next++] = lower[j+1] - 1;
} else if(lower[j+1] > maxmin(lower[j+1], 0)) {
lower[next] = maxmin(lower[j+1], 0);
upper[next++] = lower[j+1] - 1;
}
}
if(upper[current] < maxmin(upper[current] & MASK, 1)) {
lower[next] = upper[current] + 1;
upper[next++] = maxmin(upper[current] & MASK, 1);
}
if((upper[current] & MASK) != MASK1) {
if((upper[current] & MASK) == MASK2 && eucw3) {
lower[next] = maxmin(MASK3, 0);
upper[next++] = maxmin(MASK3, 1);
}
lower[next] = maxmin(MASK1, 0);
upper[next++] = maxmin(MASK1, 1);
}
for(j = current + 1; j < next; j++) {
lower[i] = lower[j];
upper[i++] = upper[j];
}
current = i - 1;
}
return(current);
}
int compare(c, d)
wchar_t *c, *d;
{
if(*c < *d)
return -1;
if(*c == *d)
return 0;
return 1;
}
wchar_t maxmin(c, flag)
wchar_t c;
int flag;
{
static wchar_t minmax1[2], minmax2[2], minmax3[2];
if(!minmax1[0]) {
/* compute min and max process codes for all code sets */
int length, i;
char multic[MB_LEN_MAX], minmax[2];
for(i = 0377; i >= 0200; i--)
if(!iscntrl(i))
break;
minmax[1] = i;
for(i = 0200; i <= 0377; i++)
if(!iscntrl(i))
break;
minmax[0] = i;
for(i = 0; i <= 1; i++) {
length = MB_LEN_MAX;
while(length--)
multic[length] = minmax[i];
mbtowc(&minmax1[i], multic, MB_LEN_MAX);
if(eucw2) {
multic[0] = SS2;
mbtowc(&minmax2[i], multic, MB_LEN_MAX);
}
if(eucw3) {
multic[0] = SS3;
mbtowc(&minmax3[i], multic, MB_LEN_MAX);
}
}
}
switch(c) {
case MASK1: return minmax1[flag];
case MASK2: return minmax2[flag];
case MASK3: return minmax3[flag];
}
/*NOTREACHED*/
}