/*
 * gobble.c - Read a package database file in a hurry
 *
 * Written 2010 by Werner Almesberger
 * Copyright 2010 Werner Almesberger
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 */


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>

#include "util.h"
#include "id.h"
#include "pkg.h"
#include "qpkg.h"
#include "gobble.h"


#define	CHARS_AFTER_ERROR	20


#ifdef BREAKNECK_SPEED

#define	EXPECT(s)	do { buf += sizeof(s)-1; } while (0)

#else /* !BREAKNECK_SPEED */

#define	EXPECT(s)					\
	do {						\
		if (end-buf < sizeof(s)-1)		\
			FAIL;				\
		if (memcmp(buf, s, sizeof(s)-1))	\
			FAIL;				\
		buf += sizeof(s)-1;			\
	}						\
	while (0)

#endif


#define	NEXT	(buf == end ? '?' : *buf++)


#define	WHITESPACE					\
	do {						\
		if (buf == end)				\
			FAIL;				\
		if (*buf == '\n')			\
			break;				\
		if (!isspace(*buf))			\
			break;				\
		buf++;					\
	}						\
	while (0)


#define	ISTERM(c)					\
	((c) == ' ' || (c) == '\t' || (c) == '\n' ||	\
	 (c) == ',' || (c) == ')')


#define	ID(tree)					\
	({						\
		const char *start;			\
							\
		if (buf == end)				\
			FAIL;				\
		start = buf;				\
		while (buf != end && !ISTERM(*buf))	\
			buf++;				\
		make_id(tree, start, buf-start);	\
	})


#define	FAIL				\
	do {				\
		failed_at = __LINE__;	\
		goto fail;		\
	}				\
	while (0)


#define	DONE goto done


static void finish_pkg(struct pkg *new, struct jrb *jrb)
{
	struct pkg *old;

	if (!new->version) {
		fprintf(stderr, "package %.*s has no version\n",
		    ID2PF(new->id));
		exit(1);
	}
	if (!new->arch) {
		fprintf(stderr,
		    "package %.*s version %.*s has no architecture\n",
		    ID2PF(new->id), ID2PF(new->version));
		exit(1);
	}
	if (!new->filename && !(new->flags & QPKG_INSTALLED)) {
		fprintf(stderr,
		    "package %.*s version %.*s has no file name "
		    "(nor is it installed)\n",
		    ID2PF(new->id), ID2PF(new->version));
		exit(1);
	}

	for (old = new->more; old; old = old->more)
		if (old->version == new->version)
			goto compact;
	return;

compact:
	jrb->val = new->more;
	old->flags |= new->flags;
	free_pkg(new);
}


static void gobble_buf(const char *name, const char *buf, size_t len)
{
	const char *end = buf+len;
	int lineno = 1;
	struct pkg *pkg = NULL; /* current package */
	struct jrb *jrb = NULL; /* RB tree node of current package */
	struct ref **anchor = NULL;
	int i, failed_at = 0;

initial:
	if (buf == end)
		DONE;
	if (*buf == '\n') {
		lineno++;
		buf++;
		goto initial;
	}

	/* decode the tag */

	switch (*buf++) {
	case 'A':	/* Architecture // Auto-Installed */
		switch (NEXT) {
		case 'r':
			EXPECT("chitecture:");
			goto architecture;
		case 'u':
			EXPECT("to-Installed:");
			goto skip_data;
		default:
			FAIL;
		}

	case 'B':	/* Bugs */
		EXPECT("ugs:");
		goto skip_data;

	case 'C':	/* Conflicts // Conffiles  */
		EXPECT("onf");
		switch (NEXT) {
		case 'l':
			EXPECT("icts:");
			goto conflicts;
		case 'f':
			EXPECT("iles:");
			goto skip_data;
		default:
			FAIL;
		}

	case 'D':	/* Depends, Description */
		EXPECT("e");
		switch (NEXT) {
		case 'p':
			EXPECT("ends:");
			goto depends;
		case 's':
			EXPECT("cription:");
			goto skip_data;
		default:
			FAIL;
		}

	case 'F':	/* Filename */
		EXPECT("ilename:");
		goto filename;

	case 'H':	/* HomePage, Homepage */
		EXPECT("ome");
		switch (NEXT) {
		case 'P':
		case 'p':
			EXPECT("age:");
			goto skip_data;
		default:
			FAIL;
		}

	case 'I':	/* Installed-Size, Installed-Time */
		EXPECT("nstalled-");
		switch (NEXT) {
		case 'S':
			EXPECT("ize:");
			goto skip_data;
		case 'T':
			EXPECT("ime:");
			goto skip_data;
		default:
			FAIL;
		}

	case 'L':	/* License */
		EXPECT("icense:");
		goto skip_data;

	case 'M':	/* Maintainer, MD5Sum, MD5sum */
		switch (NEXT) {
		case 'a':
			EXPECT("intainer:");
			goto skip_data;
		case 'D':
			EXPECT("5");
			switch (NEXT) {
			case 'S':
			case 's':
				break;
			default:
				FAIL;
			}
			EXPECT("um:");
			goto skip_data;
		default:
			FAIL;
		}

	case 'O':	/* OE, Origin, Original-Maintainer */
		switch (NEXT) {
		case 'E':
			EXPECT(":");
			goto skip_data;
		case 'r':
			EXPECT("igin");
			switch (NEXT) {
			case ':':
				break;
			case 'a':
				EXPECT("l-Maintainer:");
				break;
			default:
				FAIL;
			}
			goto skip_data;
		default:
			FAIL;
		}
		goto skip_data;

	case 'P':	/* Package, Priority, Provides */
		switch (NEXT) {
		case 'a':
			EXPECT("ckage:");
			goto package;
		case 'r':
			break;
		default:
			FAIL;
		}
		switch (NEXT) {
		case 'i':
			EXPECT("ority:");
			goto skip_data;
		case 'o':
			EXPECT("vides:");
			goto provides;
		default:
			FAIL;
		}

	case 'R':	/* Recommends, Replaces */
		EXPECT("e");
		switch (NEXT) {
		case 'c':
			EXPECT("ommends:");
			goto skip_data;
		case 'p':
			EXPECT("laces:");
			goto skip_data;
		default:
			FAIL;
		}

	case 'S':	/* Section, SHA1, SHA256, Size, Source, Suggests
			   // Status */
		switch (NEXT) {
		case 'e':
			EXPECT("ction:");
			goto skip_data;
		case 'H':
			EXPECT("A");
			switch (NEXT) {
			case '1':
				EXPECT(":");
				break;
			case '2':
				EXPECT("56:");
				break;
			default:
				FAIL;
			}
			goto skip_data;
		case 'i':
			EXPECT("ze:");
			goto skip_data;
		case 'o':
			EXPECT("urce:");
			goto skip_data;
		case 'u':
			EXPECT("ggests:");
			goto skip_data;
		case 't':
			EXPECT("atus:");
			goto status;
		default:
			FAIL;
		}

	case 'T':	/* Task */
		EXPECT("ask:");
		goto skip_data;

	case 'V':	/* Version */
		EXPECT("ersion:");
		goto version;

	default:
		FAIL;
	}

conflicts:
	anchor = &pkg->conflicts;
	goto list_with_version;

depends:
	anchor = &pkg->depends;
	goto list_with_version;

package:
	if (pkg)
		finish_pkg(pkg, jrb);

	WHITESPACE;
	jrb = ID(packages);
	pkg = new_pkg(jrb);
	goto eol;

version:
	WHITESPACE;
	if (pkg->version)
		FAIL;
	pkg->version = ID(versions)->key;
	goto eol;

architecture:
	WHITESPACE;
	if (pkg->arch)
		FAIL;
	pkg->arch = buf;
	goto skip_data;

provides:
	anchor = &pkg->provides;
	/*
	 * There should never be a version in the provisions, so it's a bit
	 * wasteful to use a structure that has a version field. But then, code
	 * reuse is nice, too.
	 */
	goto list_with_version;

status:
	pkg->flags |= QPKG_INSTALLED;
	/* @@@ later */
	goto skip_data;

filename:
	WHITESPACE;
	if (pkg->filename)
		FAIL;
	pkg->filename = buf;
	goto skip_data;

eol:
	while (buf != end) {
		if (*buf == ' ' || *buf == '\t') {
			buf++;
			continue;
		}
		if (*buf++ != '\n')
			FAIL;
		lineno++;
		if (buf == end)
			DONE;
		if (*buf == ' ' || *buf == '\t')
			FAIL;
		goto initial;
	}
	DONE;

skip_data:
	while (buf != end) {
		if (*buf++ != '\n')
			continue;
		lineno++;
		if (buf == end)
			DONE;
		if (*buf != ' ' && *buf != '\t')
			goto initial;
	}
	DONE;

list_with_version:
	while (1) {
		struct ref *ref;

		WHITESPACE;
		ref = alloc_type(struct ref);
		ref->pkg = ID(packages)->key;

		/*
		 * Work around the Wireshark Anomaly
		 */
		if (buf != end && *buf == ')')
			buf++;

		WHITESPACE;
		if (buf == end || *buf != '(')
			ref->version = NULL;
		else {
			buf++;
			switch (NEXT) {
			case '=':
				ref->relop = rel_eq;
				break;
			case '<':
				switch (NEXT) {
				case ' ':
					ref->relop = rel_lt;
					break;
				case '=':
					ref->relop = rel_le;
					break;
				case '<':
					ref->relop = rel_ll;
					break;
				default:
					buf--;
				}
				break;
			case '>':
				switch (NEXT) {
				case '=':
					ref->relop = rel_ge;
					break;
				case '>':
					ref->relop = rel_gg;
					break;
				default:
					FAIL;
				}
				break;
			default:
				FAIL;
			}
			WHITESPACE;
			ref->version = ID(versions)->key;
			EXPECT(")");
		}
		*anchor = ref;
		ref->next = NULL;
		anchor = &ref->next;
		if (buf == end)
			DONE;
		if (*buf != ',')
			break;
		buf++;
	}
	anchor = NULL;
	goto eol;

done:
	if (pkg)
		finish_pkg(pkg, jrb);
	return;

fail:
	fprintf(stderr, "syntax derailment #%d at %s line %d: ",
	    failed_at, name, lineno);
	for (i = 0; i != CHARS_AFTER_ERROR && buf != end; i++) {
		if (*buf == '\n')
			fprintf(stderr, "\\n");
		else if (isspace(*buf))
			fputc(' ', stderr);
		else if (isprint(*buf))
			fputc(*buf, stderr);
		buf++;
	}
	fprintf(stderr, "%s\n", buf == end ? "": "...");
	exit(1);
}


/*
 * We should be able to test for __UCLIBC_HAS_ADVANCED_REALTIME__ specifically,
 * but that doesn't work for some reason. So let's just omit posix_madvise on
 * __UCLIBC__ in general.
 */


#if !defined(__UCLIBC__)

static int do_madvise(void *addr, size_t len, int advice)
{
	return posix_madvise(addr, len, advice);
}

#else /* __UCLIBC__ */

#define	do_madvise(addr, len, advice)	0

#endif /* __UCLIBC__ */


void gobble(const char *name)
{
	int fd;
	struct stat st;
	void *map;

	fd = open(name, O_RDONLY);
	if (fd < 0) {
		perror(name);
		exit(1);
	}
	if (fstat(fd, &st) < 0) {
		perror("fstat");
		exit(1);
	}
	map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
	if (map == MAP_FAILED) {
		perror("mmap");
		exit(1);
	}
	if (do_madvise(map, st.st_size, POSIX_MADV_WILLNEED) < 0) {
		perror("posix_madvise(POSIX_MADV_WILLNEED)");
		exit(1);
	}
	gobble_buf(name, map, st.st_size);
	if (do_madvise(map, st.st_size, POSIX_MADV_RANDOM) < 0) {
		perror("posix_madvise(POSIX_MADV_RANDOM)");
		exit(1);
	}
}