1
0
Files
irix-657m-src/eoe/cmd/rfind/lib/str5pack.c
2022-09-29 17:59:04 +03:00

78 lines
2.0 KiB
C

/*
* The str5pack routine converts a nul-terminated string
* into a single 32 bit word that encodes the first 5
* ASCII characters. The conversion is many to one,
* but it preserves the ASCII sorting order. That is,
* if string A produces encoding e(A), and string B produces
* encoding e(B), and if e(A) < e(B), then it must have
* been the case that string A < string B.
*
* The encoding borrows its idea from another encoding,
* of 3 characters into a 16 bit word, that is part of "my
* oral tradition". That encoding collapses the 256 possible
* character values into 40 values in the range {0..39},
* and encodes 3 characters using base-40 representation.
* It works because 40**3 is less than 2**16 (64K).
*
* The present encoding, reflecting the steady march of
* computer technology to larger word sizes, maps the
* 256 possible characters to the range {0..83}, and
* encodes 5 such collapsed values using base-84 representation.
* This one works because 84**5 < 2**32. Having 84 values
* allows most printable ASCII characters to be mapped
* one-to-one, including digits and upper and lower letters.
*/
#include <string.h>
#include "fsdump.h"
#define BASE 84
static unsigned scale[STR5LEN] = {BASE*BASE*BASE*BASE, BASE*BASE*BASE, BASE*BASE, BASE, 1};
str5_t str5pack (register char *s) {
register int i;
register str5_t v, val;
register unsigned char c;
val = 0;
if (s == NULL)
return val;
for (i=0; i < STR5LEN && (c = *s); i++, s++) {
v = (str5_t) (
c <= 0x1F ? 1 : /* SOH to US */
c <= 0x2A ? 2 : /* space to '*' */
c <= 0x7A ? (3 + (c - 0x2B)) : /* '+' to 'z' */
83 /* '{' to 0xff */
) ;
val += v * scale[i];
}
return val;
}
/*
* For debugging - show one possible inverse to an str5_t encoding.
*/
static char buf[STR5LEN+1];
char *str5unpack (str5_t val) {
int i;
str5_t v;
for (i = STR5LEN - 1; i >= 0; i--) {
v = val % BASE;
val /= BASE;
buf[i] = (
v == 0 ? 0 :
v == 1 ? 1 :
v == 2 ? 0x20 :
v <= 82 ? ( ( v - 3) + 0x2B ) :
'{'
) ;
}
if (val) buf[0] = '~';
return buf;
}