From 99e577744871520707c37cfc4e6fe5d491ab7dcc Mon Sep 17 00:00:00 2001 From: Werner Almesberger Date: Sun, 20 May 2012 21:55:54 -0300 Subject: [PATCH] b2/: finish unit handling and move most of its processing to the match side The ${foo#unit} syntax didn't really make sense because it created a large number of potential error conditions on the assignment side and didn't help with finding compatible fields. With all this moved to the match side, an invalid syntax simply causes a mismatch. --- b2/SUBST | 22 +++++++++++---- b2/subex.c | 80 ++++++++++++++++++++++++++++++++++++++++++++---------- b2/subst.c | 72 ++++++++++++++++++++++++++---------------------- b2/subst.h | 5 +++- 4 files changed, 124 insertions(+), 55 deletions(-) diff --git a/b2/SUBST b/b2/SUBST index 0c3e1f9..c3811c2 100644 --- a/b2/SUBST +++ b/b2/SUBST @@ -1,14 +1,18 @@ REF=R[0-9]* { T=R - VAL=* { R=$$#R } + VAL=(#R) { R=$1 } TOL = 5% - FN=*% { TOL=${$#%} } + FN=*% { TOL=$$ } break REF // end break again } /* -pattern: () * ? . +pattern: + () | like in RE + * ? like in glob + (#U) expect a numeric value of unit U (use substring to get canonical value) + subst: $1 ... $field substring: @@ -17,9 +21,15 @@ variable: $foo, ... with curly braces: ${foo}, ... -unit conversion: - $foo#V - ${1#R} input variable: $$ + + Caveat: + + Wrong: FN=* { X=$FN } there is no variable called FN + Right: FN=* { X=$$ } yields the Fn field selected by FN + + Wrong: VAL=(#R) { R=$VAL } yields literal value + Wrong: VAL=(#R) { R=$$ } yields literal value + Right: VAL=(#R) { R=$1 } yield canonicalized value */ diff --git a/b2/subex.c b/b2/subex.c index 97dd9dd..5b2b9e2 100644 --- a/b2/subex.c +++ b/b2/subex.c @@ -13,8 +13,10 @@ #include #include #include +#include #include #include +#include #include "util.h" #include "vstring.h" @@ -32,17 +34,66 @@ static const char *fn = NULL, *f[FIELDS]; static struct subst jump_end; -/* - * TODO: decide what to do with units - */ + +static char *canonicalize(const char *s, char unit) +{ + char *res; + int res_len = 0; + int seen_dot = 0; + int seen_unit = 0; + char mult = 0; + + if (!unit) + return stralloc(s); + res = stralloc(""); + while (*s) { + if (*s == unit) { + assert(!seen_unit); + if (!s[1]) + break; + if (!seen_dot) + append_char(&res, &res_len, '.'); + seen_unit = seen_dot = 1; + s++; + continue; + } + if (*s == '.') { + assert(!seen_dot); + append_char(&res, &res_len, '.'); + seen_dot = 1; + } else if (*s == '-' || isdigit(*s)) { + append_char(&res, &res_len, *s); + s++; + continue; + } else if (strchr(MULT_CHARS, *s)) { + assert(!seen_unit); + assert(!mult); + mult = *s; + if (!seen_dot) + append_char(&res, &res_len, '.'); + seen_dot = 1; + } else { + abort(); + } + s++; + } + if (res_len && res[res_len-1] == '.') + res_len--; + if (mult) + append_char(&res, &res_len, mult); + append_char(&res, &res_len, unit); + return res; +} + static char *compose(const struct chunk *c, const struct var *in, const struct var *out, - const char *s, const regmatch_t *match) + const char *s, const regmatch_t *match, const char *units) { char *res = stralloc(""); - int res_len = 0; + int res_len = 0, len; const char *val; + char *tmp, *tmp2; int n; while (c) { @@ -69,16 +120,14 @@ static char *compose(const struct chunk *c, } if (match[n-1].rm_so == -1) yyerrorf("substring $%d out of range", n); -#if 0 - len = match[n-1].rm_eo-match[n-1].rm_so); + len = match[n-1].rm_eo-match[n-1].rm_so; tmp = alloc_size(len); memcpy(tmp, s+match[n-1].rm_so, len); tmp[len] = 0; - tmp2 = canonicalize(tmp, c->unit); + tmp2 = canonicalize(tmp, units ? units[n-1] : 0); append(&res, &res_len, tmp2); -#endif - append_n(&res, &res_len, s+match[n-1].rm_so, - match[n-1].rm_eo-match[n-1].rm_so); + free(tmp); + free(tmp2); break; default: abort(); @@ -147,7 +196,7 @@ static int do_match(const char *var, const regex_t *re, static const struct subst *recurse_sub(const struct subst *sub, const struct var *in, const char *s, const regmatch_t *match, - struct var **out) + const char *units, struct var **out) { const struct subst *jump; regmatch_t m_tmp[10]; @@ -161,12 +210,13 @@ static const struct subst *recurse_sub(const struct subst *sub, in, *out, &val, m_tmp)) break; jump = recurse_sub(sub->u.match.block, in, val, m_tmp, - out); + sub->u.match.units, out); if (jump && jump != sub) return jump; break; case st_assign: - tmp = compose(sub->u.assign.pat, in, *out, s, match); + tmp = compose(sub->u.assign.pat, in, *out, s, match, + units); do_assign(sub->u.assign.dst, out, tmp); break; case st_end: @@ -199,7 +249,7 @@ struct var *substitute(const struct subst *sub, const struct var *in) f[i] = unique(tmp); } } - recurse_sub(sub, in, NULL, NULL, &out); + recurse_sub(sub, in, NULL, NULL, NULL, &out); return out; } diff --git a/b2/subst.c b/b2/subst.c index 9c5e06b..2c85e83 100644 --- a/b2/subst.c +++ b/b2/subst.c @@ -38,11 +38,33 @@ static struct subst *alloc_subst(enum subst_type type) } -static char *prepare_re(const char *re) +/* + * With M the SI multiplier prefixes and U the unit character, our regexp + * is + * + * (-?[0-9]+\.?[[0-9]*M?U?|-?[0-9]+[UM][0-9]*) + * + * The first part is for things like 10, 1.2k, 3.3V, -2mA, etc. + * The second part is for things like 1k20, 1R2, etc. + */ + +static void unit_expr(char **res, int *res_len, char unit) +{ + append(res, res_len, "(-?[0-9]+\\.?[0-9]*[" MULT_CHARS "]?"); + append_char(res, res_len, unit); + append(res, res_len, "?|-?[0-9]+["); + append_char(res, res_len, unit); + append(res, res_len, MULT_CHARS "][0-9]*)"); +} + + +static char *prepare_re(const char *re, int *parens, char *units) { char *res = NULL; int res_len = 0; + *parens = 0; + memset(units, 0, 10); append_char(&res, &res_len, '^'); while (*re) { switch (*re) { @@ -61,6 +83,16 @@ static char *prepare_re(const char *re) append_n(&res, &res_len, re, 2); re++; break; + case '(': + (*parens)++; + if (re[1] == '#' && re[2] && isalpha(re[2]) && + re[3] == ')') { + units[*parens-1] = re[2]; + unit_expr(&res, &res_len, re[2]); + re += 3; + break; + } + /* fall through */ default: append_char(&res, &res_len, *re); } @@ -77,11 +109,11 @@ struct subst *subst_match(const char *src, const char *re) char error[1000]; struct subst *sub; char *tmp; - int err; + int parens, err; sub = alloc_subst(st_match); sub->u.match.src = src; - tmp = prepare_re(re); + tmp = prepare_re(re, &parens, sub->u.match.units); err = regcomp(&sub->u.match.re, tmp, REG_EXTENDED); free(tmp); if (err) { @@ -102,7 +134,6 @@ static void end_chunk(struct chunk ***last, const char *start, const char *s) c = alloc_type(struct chunk); c->type = ct_string; c->u.s = stralloc_n(start, s-start);; - c->unit = NULL; c->next = NULL; **last = c; *last = &c->next; @@ -154,28 +185,9 @@ static const char *parse_var(struct chunk *c, const char *s) c->u.sub = 0; } - if (*t != '#') { - if (braced) { - assert(*t == '}'); - t++; - } - return t; - } - - s = ++t; - while (*t) { - if (braced && *t == '}') - break; - if (!braced && t != s) - break; - t++; - } - if (s == t) - yyerror("invalid unit"); - c->unit = stralloc_n(s, t-s); if (braced) { - if (!*t) - yyerror("unterminated unit"); + if (*t != '}') + yyerror("invalid variable name"); t++; } return t; @@ -204,7 +216,6 @@ static struct chunk *parse_pattern(const char *s) end_chunk(&last, start, s); c = alloc_type(struct chunk); - c->unit = NULL; c->next = NULL; *last = c; last = &c->next; @@ -324,18 +335,13 @@ static void dump_chunks(FILE *file, const struct chunk *c) break; case ct_sub: if (c->u.sub) - fprintf(file, "${%d", c->u.sub); + fprintf(file, "$%d", c->u.sub); else - fprintf(file, "${$"); + fprintf(file, "$$"); break; default: abort(); } - if (c->type != ct_string) { - if (c->unit) - fprintf(file, "#%s", c->unit); - fprintf(file, "}"); - } c = c->next; } } diff --git a/b2/subst.h b/b2/subst.h index bca7a47..27f1b8a 100644 --- a/b2/subst.h +++ b/b2/subst.h @@ -31,7 +31,6 @@ struct chunk { const char *var; int sub; /* 0 if $$ */ } u; - const char *unit; /* NULL if no conversion specified */ struct chunk *next; }; @@ -50,6 +49,7 @@ struct subst { const char *src; regex_t re; struct subst *block; + char units[10]; } match; struct { const char *dst; @@ -62,6 +62,9 @@ struct subst { }; +#define MULT_CHARS "GMkmunpf" + + struct subst *subst_match(const char *src, const char *re); struct subst *subst_assign(const char *dst, const char *pat); struct subst *subst_end(void);