From 71d03c72818f36275d47ee0fe63c9ebb27fd0829 Mon Sep 17 00:00:00 2001 From: Werner Almesberger Date: Sun, 21 Aug 2016 11:34:12 -0300 Subject: [PATCH] eeshow/gfx/pdftoc.c: edit Cairo-generated PDF to insert TOC Based on schtoc.pl --- eeshow/Makefile | 2 +- eeshow/gfx/pdftoc.c | 346 ++++++++++++++++++++++++++++++++++++++++++++ eeshow/gfx/pdftoc.h | 27 ++++ 3 files changed, 374 insertions(+), 1 deletion(-) create mode 100644 eeshow/gfx/pdftoc.c create mode 100644 eeshow/gfx/pdftoc.h diff --git a/eeshow/Makefile b/eeshow/Makefile index ff4c14b..db3a01b 100644 --- a/eeshow/Makefile +++ b/eeshow/Makefile @@ -21,7 +21,7 @@ OBJS = main.o version.o \ gui/help.o gui/icons.o \ file/file.o file/git-util.o file/git-file.o file/git-hist.o \ gfx/style.o gfx/fig.o gfx/record.o gfx/cro.o gfx/diff.o gfx/gfx.o \ - gfx/text.o gfx/misc.o \ + gfx/text.o gfx/misc.o gfx/pdftoc.o \ misc/diag.o ICONS = delta diff diff --git a/eeshow/gfx/pdftoc.c b/eeshow/gfx/pdftoc.c new file mode 100644 index 0000000..5324823 --- /dev/null +++ b/eeshow/gfx/pdftoc.c @@ -0,0 +1,346 @@ +/* + * gfx/pdftoc.c - PDF writer with TOC generation + * + * Written 2016 by Werner Almesberger + * Copyright 2016 by Werner Almesberger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +/* + * Strongly influenced by https://neo900.org/git?p=misc;a=tree;f=schtoc + * + * PDF Reference: + * http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf + */ + + +#include +#include +#include +#include +#include + +#include "misc/util.h" +#include "misc/diag.h" +#include "gfx/pdftoc.h" + + +struct title { + char *s; + struct title *next; +}; + +struct object { + int gen; + unsigned pos; + bool is_page; +}; + +struct pdftoc { + FILE *file; + + enum state { + idle, /* between objects */ + object, /* inside an object */ + catalog,/* inside the catalog object */ + xref, /* stopped at xref */ + trailer,/* going through the trailer */ + } state; + + struct title *titles; + struct title **next_title; + unsigned n_titles; + + char *buf; + unsigned left; /* bytes left in buffer */ + unsigned offset; /* offset into buffer */ + unsigned pos; /* position in file */ + + struct object *objs; /* object array */ + struct object *curr_obj; + int top; /* highest object number; -1 if no objects */ + + int root; /* catalog dict */ + int info; /* information dict, 0 if absent */ +}; + + +static bool begins(const char *s, const char *pfx) +{ + return !strncmp(s, pfx, strlen(pfx)); +} + + +struct pdftoc *pdftoc_begin(const char *file) +{ + struct pdftoc *ctx; + + ctx = alloc_type(struct pdftoc); + if (file) { + ctx->file = fopen(file, "w"); + if (!ctx->file) + diag_pfatal(file); + } else { + ctx->file = stdout; + } + + ctx->state = idle; + + ctx->titles = NULL; + ctx->next_title = &ctx->titles; + ctx->n_titles = 0; + + ctx->buf = NULL; + ctx->left = 0; + ctx->offset = 0; + ctx->pos = 0; + + ctx->objs = NULL; + ctx->top = -1; + + ctx->root = 0; + ctx->info = 0; + + return ctx; +} + + +static void add_object(struct pdftoc *ctx, int id, int gen, unsigned pos) +{ + struct object *obj; + + if (id > ctx->top) { + ctx->objs = realloc(ctx->objs, + (id + 1) * sizeof(struct object)); + memset(ctx->objs + ctx->top + 1 , 0, + (id - ctx->top) * sizeof(struct object)); + ctx->top = id; + } + + obj = ctx->objs + id; + ctx->curr_obj = obj; + obj->gen = gen; + obj->pos = pos; + obj->is_page = 0; +} + + +static bool parse_object(struct pdftoc *ctx, const char *s) +{ + int id, gen; + int n = 0; + + if (sscanf(s, "%d %d obj%n", &id, &gen, &n) != 2 || !n) + return 0; + add_object(ctx, id, gen, ctx->pos); + return 1; +} + + +static void line(struct pdftoc *ctx, const char *s) +{ + + switch (ctx->state) { + case idle: + if (parse_object(ctx, s)) { + ctx->state = object; + break; + } + if (begins(s, "xref")) { + ctx->state = xref; + break; + } + break; + case object: + if (begins(s, "endobj")) { + ctx->state = idle; + break; + } + if (begins(s, "<< /Type /Page")) { + ctx->curr_obj->is_page = 1; + break; + } + if (begins(s, "<< /Type /Catalog")) { + ctx->state = catalog; + break; + } + break; + case catalog: + if (begins(s, ">>")) { + ctx->state = object; + ctx->pos += fprintf(ctx->file, + " /Outlines %u 0 R\n", + ctx->top + 1); + break; + } + break; + case xref: + abort(); + case trailer: + if (sscanf(s, " /Root %d 0 R", &ctx->root) == 1) + break; + if (sscanf(s, " /Info %d 0 R", &ctx->info) == 1) + break; + break; + default: + abort(); + } +} + + +static void parse_buffer(struct pdftoc *ctx, bool do_write) +{ + unsigned size, wrote; + char *nl; + + while (ctx->state != xref) { + nl = memchr(ctx->buf + ctx->offset, '\n', ctx->left); + if (!nl) + break; + *nl = 0; + size = nl - (ctx->buf + ctx->offset); + line(ctx, ctx->buf + ctx->offset); + *nl = '\n'; + if (ctx->state == xref) + break; + if (do_write) { + wrote = fwrite(ctx->buf + ctx->offset, 1, size + 1, + ctx->file); + if (wrote != size + 1) + diag_pfatal("fwrite"); + ctx->pos += size + 1; + } + ctx->offset += size + 1; + ctx->left -= size + 1; + } +} + + +bool pdftoc_write(struct pdftoc *ctx, const void *data, unsigned length) +{ + char *buf; + + buf = alloc_size(ctx->left + length + 1); + memcpy(buf, ctx->buf + ctx->offset, ctx->left); + memcpy(buf + ctx->left, data, length); + ctx->offset = 0; + ctx->left += length; + free(ctx->buf); + ctx->buf = buf; + + parse_buffer(ctx, 1); + + return 1; +} + + +void pdftoc_title(struct pdftoc *ctx, const char *title) +{ + struct title *t; + + t = alloc_type(struct title); + t->s = stralloc(title); + *ctx->next_title = t; + t->next = NULL; + ctx->next_title = &t->next; + ctx->n_titles++; +} + + +static void write_trailer(struct pdftoc *ctx) +{ + unsigned n = ctx->top + 1; + const struct object *obj = ctx->objs; + const struct object *end = ctx->objs + ctx->top + 1; + const struct title *t; + unsigned outline, tail; + + /* Outline root */ + + outline = n; + add_object(ctx, n, 0, ctx->pos); + tail = fprintf(ctx->file, + "%u 0 obj\n<<\n" + " /Count %u\n" + " /First %u 0 R\n" + " /Last %u 0 R\n" + ">>\nendobj\n", + n, ctx->n_titles, n + 1, n + ctx->n_titles); + + /* Outline items */ + + n++; + for (t = ctx->titles; t; t = t->next) { + while (!obj->is_page) { + assert(obj != end); + obj++; + } + add_object(ctx, n, 0, ctx->pos + tail); + tail += fprintf(ctx->file, + "%u 0 obj\n<<\n" + " /Title (%s)\n" + " /Parent %u 0 R\n", + n, t->s, outline); + if (t != ctx->titles) + tail += fprintf(ctx->file, + " /Prev %u 0 R\n", n - 1); + if (t->next) + tail += fprintf(ctx->file, + " /Next %u 0 R\n", n + 1); + tail += fprintf(ctx->file, + " /Dest [%u %u R /Fit]\n" + ">>\nendobj\n", + (unsigned) (obj - ctx->objs), obj->gen); + n++; + obj++; + } + + /* xref table */ + + fprintf(ctx->file, "xref\n0 %u\n", n); + for (obj = ctx->objs; obj != ctx->objs + ctx->top + 1; obj++) + fprintf(ctx->file, + "%010u %05u %c \n", + obj->pos, obj->pos ? 0 : 65535, obj->pos ? 'n' : 'f'); + + fprintf(ctx->file, + "trailer\n" + "<< /Size %u\n" + " /Root %u 0 R\n", + n, ctx->root); + if (ctx->info) + fprintf(ctx->file, " /Info %u 0 R\n", ctx->info); + fprintf(ctx->file, ">>\nstartxref\n%u\n%%%%EOF\n", ctx->pos + tail); +} + + +void pdftoc_end(struct pdftoc *ctx) +{ + struct title *next; + + assert(ctx->state == xref); + ctx->state = trailer; + parse_buffer(ctx, 0); + if (ctx->left) { + fatal("%u bytes left in buffer at end\n", ctx->left); + exit(1); + } + + write_trailer(ctx); + + if (fclose(ctx->file) < 0) + diag_pfatal("fclose"); + + while (ctx->titles) { + next = ctx->titles->next; + free(ctx->titles->s); + free(ctx->titles); + ctx->titles = next; + } + free(ctx->buf); + free(ctx); +} diff --git a/eeshow/gfx/pdftoc.h b/eeshow/gfx/pdftoc.h new file mode 100644 index 0000000..8ccdbe5 --- /dev/null +++ b/eeshow/gfx/pdftoc.h @@ -0,0 +1,27 @@ +/* + * gfx/pdftoc.h - PDF writer with TOC generation + * + * Written 2016 by Werner Almesberger + * Copyright 2016 by Werner Almesberger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + + +#ifndef GFX_PDFTOC_H +#define GFX_PDFTOC_H + +#include + + +struct pdftoc; + +struct pdftoc *pdftoc_begin(const char *file); +bool pdftoc_write(struct pdftoc *ctx, const void *data, unsigned length); +void pdftoc_title(struct pdftoc *ctx, const char *title); +void pdftoc_end(struct pdftoc *ctx); + +#endif /* !GFX_PDFTOC_H */