text2pdf

Git mirror of http://www.eprg.org/pdfcorner/text2pdf/
git clone git://git.finwo.net/app/text2pdf
Log | Files | Refs | README

commit b9bb773c66aec3810322bf0f20c7fbef3b5b295b
parent 8deaeadcec916b5add53e978adc8cdd44970b4b5
Author: finwo <finwo@pm.me>
Date:   Tue, 21 Jul 2020 13:49:18 +0200

More common argument parsing

Diffstat:
MMakefile | 21++++++++++-----------
Mconfig.mk | 7++++++-
Alib/argparse/argparse.c | 384+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/argparse/argparse.h | 130+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/main.c | 410+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dtext2pdf.c | 455-------------------------------------------------------------------------------
6 files changed, 940 insertions(+), 467 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,21 +1,20 @@ include config.mk -BIN =\ - text2pdf +SRC = $(wildcard src/*.c) +SRC += $(LIBSRC) +OBJ = $(SRC:.c=.o) +MAN = $(NAME:=.1) -SRC = $(BIN:=.c) -OBJ = $(BIN:=.o) -MAN = $(BIN:=.1) +default: $(NAME) -all: $(BIN) - -$(BIN): $(LIB) $(OBJ) +$(NAME): $(OBJ) + $(CC) $(INCLUDES) $(CFLAGS) $(CPPFLAGS) -o $@ $^ .o: $(CC) $(LDFLAGS) -o $@ $< $(LIB) .c.o: - $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ -c $< + $(CC) $(INCLUDES) $(CFLAGS) -o $@ -c $< install: all @@ -31,5 +30,5 @@ uninstall: for m in $(MAN); do rm -f $(DESTDIR)$(MANPREFIX)/man1/"$$m"; done clean: - rm $(BIN) - rm $(OBJ) + rm -f $(NAME) + rm -f $(OBJ) diff --git a/config.mk b/config.mk @@ -1,4 +1,5 @@ VERSION = 1.1 +NAME = text2pdf PREFIX = /usr/local MANPREFIX = $(PREFIX)/share/man @@ -6,5 +7,9 @@ MANPREFIX = $(PREFIX)/share/man CC = cc AR = ar -CFLAGS = -std=c89 -Wall -O3 +CFLAGS = -std=c99 -Wall -O3 LDFLAGS = -s -O3 + +INCLUDES = -Ilib/argparse +LIBSRC?= +LIBSRC+=lib/argparse/argparse.c diff --git a/lib/argparse/argparse.c b/lib/argparse/argparse.c @@ -0,0 +1,384 @@ +/** + * Copyright (C) 2012-2015 Yecheng Fu <cofyc.jackson at gmail dot com> + * All rights reserved. + * + * Use of this source code is governed by a MIT-style license that can be found + * in the LICENSE file. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <errno.h> +#include "argparse.h" + +#define OPT_UNSET 1 +#define OPT_LONG (1 << 1) + +static const char * +prefix_skip(const char *str, const char *prefix) +{ + size_t len = strlen(prefix); + return strncmp(str, prefix, len) ? NULL : str + len; +} + +static int +prefix_cmp(const char *str, const char *prefix) +{ + for (;; str++, prefix++) + if (!*prefix) { + return 0; + } else if (*str != *prefix) { + return (unsigned char)*prefix - (unsigned char)*str; + } +} + +static void +argparse_error(struct argparse *self, const struct argparse_option *opt, + const char *reason, int flags) +{ + (void)self; + if (flags & OPT_LONG) { + fprintf(stderr, "error: option `--%s` %s\n", opt->long_name, reason); + } else { + fprintf(stderr, "error: option `-%c` %s\n", opt->short_name, reason); + } + exit(1); +} + +static int +argparse_getvalue(struct argparse *self, const struct argparse_option *opt, + int flags) +{ + const char *s = NULL; + if (!opt->value) + goto skipped; + switch (opt->type) { + case ARGPARSE_OPT_BOOLEAN: + if (flags & OPT_UNSET) { + *(int *)opt->value = *(int *)opt->value - 1; + } else { + *(int *)opt->value = *(int *)opt->value + 1; + } + if (*(int *)opt->value < 0) { + *(int *)opt->value = 0; + } + break; + case ARGPARSE_OPT_BIT: + if (flags & OPT_UNSET) { + *(int *)opt->value &= ~opt->data; + } else { + *(int *)opt->value |= opt->data; + } + break; + case ARGPARSE_OPT_STRING: + if (self->optvalue) { + *(const char **)opt->value = self->optvalue; + self->optvalue = NULL; + } else if (self->argc > 1) { + self->argc--; + *(const char **)opt->value = *++self->argv; + } else { + argparse_error(self, opt, "requires a value", flags); + } + break; + case ARGPARSE_OPT_INTEGER: + errno = 0; + if (self->optvalue) { + *(int *)opt->value = strtol(self->optvalue, (char **)&s, 0); + self->optvalue = NULL; + } else if (self->argc > 1) { + self->argc--; + *(int *)opt->value = strtol(*++self->argv, (char **)&s, 0); + } else { + argparse_error(self, opt, "requires a value", flags); + } + if (errno) + argparse_error(self, opt, strerror(errno), flags); + if (s[0] != '\0') + argparse_error(self, opt, "expects an integer value", flags); + break; + case ARGPARSE_OPT_FLOAT: + errno = 0; + if (self->optvalue) { + *(float *)opt->value = strtof(self->optvalue, (char **)&s); + self->optvalue = NULL; + } else if (self->argc > 1) { + self->argc--; + *(float *)opt->value = strtof(*++self->argv, (char **)&s); + } else { + argparse_error(self, opt, "requires a value", flags); + } + if (errno) + argparse_error(self, opt, strerror(errno), flags); + if (s[0] != '\0') + argparse_error(self, opt, "expects a numerical value", flags); + break; + default: + assert(0); + } + +skipped: + if (opt->callback) { + return opt->callback(self, opt); + } + + return 0; +} + +static void +argparse_options_check(const struct argparse_option *options) +{ + for (; options->type != ARGPARSE_OPT_END; options++) { + switch (options->type) { + case ARGPARSE_OPT_END: + case ARGPARSE_OPT_BOOLEAN: + case ARGPARSE_OPT_BIT: + case ARGPARSE_OPT_INTEGER: + case ARGPARSE_OPT_FLOAT: + case ARGPARSE_OPT_STRING: + case ARGPARSE_OPT_GROUP: + continue; + default: + fprintf(stderr, "wrong option type: %d", options->type); + break; + } + } +} + +static int +argparse_short_opt(struct argparse *self, const struct argparse_option *options) +{ + for (; options->type != ARGPARSE_OPT_END; options++) { + if (options->short_name == *self->optvalue) { + self->optvalue = self->optvalue[1] ? self->optvalue + 1 : NULL; + return argparse_getvalue(self, options, 0); + } + } + return -2; +} + +static int +argparse_long_opt(struct argparse *self, const struct argparse_option *options) +{ + for (; options->type != ARGPARSE_OPT_END; options++) { + const char *rest; + int opt_flags = 0; + if (!options->long_name) + continue; + + rest = prefix_skip(self->argv[0] + 2, options->long_name); + if (!rest) { + // negation disabled? + if (options->flags & OPT_NONEG) { + continue; + } + // only OPT_BOOLEAN/OPT_BIT supports negation + if (options->type != ARGPARSE_OPT_BOOLEAN && options->type != + ARGPARSE_OPT_BIT) { + continue; + } + + if (prefix_cmp(self->argv[0] + 2, "no-")) { + continue; + } + rest = prefix_skip(self->argv[0] + 2 + 3, options->long_name); + if (!rest) + continue; + opt_flags |= OPT_UNSET; + } + if (*rest) { + if (*rest != '=') + continue; + self->optvalue = rest + 1; + } + return argparse_getvalue(self, options, opt_flags | OPT_LONG); + } + return -2; +} + +int +argparse_init(struct argparse *self, struct argparse_option *options, + const char *const *usages, int flags) +{ + memset(self, 0, sizeof(*self)); + self->options = options; + self->usages = usages; + self->flags = flags; + self->description = NULL; + self->epilog = NULL; + return 0; +} + +void +argparse_describe(struct argparse *self, const char *description, + const char *epilog) +{ + self->description = description; + self->epilog = epilog; +} + +int +argparse_parse(struct argparse *self, int argc, const char **argv) +{ + self->argc = argc - 1; + self->argv = argv + 1; + self->out = argv; + + argparse_options_check(self->options); + + for (; self->argc; self->argc--, self->argv++) { + const char *arg = self->argv[0]; + if (arg[0] != '-' || !arg[1]) { + if (self->flags & ARGPARSE_STOP_AT_NON_OPTION) { + goto end; + } + // if it's not option or is a single char '-', copy verbatim + self->out[self->cpidx++] = self->argv[0]; + continue; + } + // short option + if (arg[1] != '-') { + self->optvalue = arg + 1; + switch (argparse_short_opt(self, self->options)) { + case -1: + break; + case -2: + goto unknown; + } + while (self->optvalue) { + switch (argparse_short_opt(self, self->options)) { + case -1: + break; + case -2: + goto unknown; + } + } + continue; + } + // if '--' presents + if (!arg[2]) { + self->argc--; + self->argv++; + break; + } + // long option + switch (argparse_long_opt(self, self->options)) { + case -1: + break; + case -2: + goto unknown; + } + continue; + +unknown: + fprintf(stderr, "error: unknown option `%s`\n", self->argv[0]); + argparse_usage(self); + exit(1); + } + +end: + memmove(self->out + self->cpidx, self->argv, + self->argc * sizeof(*self->out)); + self->out[self->cpidx + self->argc] = NULL; + + return self->cpidx + self->argc; +} + +void +argparse_usage(struct argparse *self) +{ + if (self->usages) { + fprintf(stdout, "Usage: %s\n", *self->usages++); + while (*self->usages && **self->usages) + fprintf(stdout, " or: %s\n", *self->usages++); + } else { + fprintf(stdout, "Usage:\n"); + } + + // print description + if (self->description) + fprintf(stdout, "%s\n", self->description); + + fputc('\n', stdout); + + const struct argparse_option *options; + + // figure out best width + size_t usage_opts_width = 0; + size_t len; + options = self->options; + for (; options->type != ARGPARSE_OPT_END; options++) { + len = 0; + if ((options)->short_name) { + len += 2; + } + if ((options)->short_name && (options)->long_name) { + len += 2; // separator ", " + } + if ((options)->long_name) { + len += strlen((options)->long_name) + 2; + } + if (options->type == ARGPARSE_OPT_INTEGER) { + len += strlen("=<int>"); + } + if (options->type == ARGPARSE_OPT_FLOAT) { + len += strlen("=<flt>"); + } else if (options->type == ARGPARSE_OPT_STRING) { + len += strlen("=<str>"); + } + len = (len + 3) - ((len + 3) & 3); + if (usage_opts_width < len) { + usage_opts_width = len; + } + } + usage_opts_width += 4; // 4 spaces prefix + + options = self->options; + for (; options->type != ARGPARSE_OPT_END; options++) { + size_t pos = 0; + int pad = 0; + if (options->type == ARGPARSE_OPT_GROUP) { + fputc('\n', stdout); + fprintf(stdout, "%s", options->help); + fputc('\n', stdout); + continue; + } + pos = fprintf(stdout, " "); + if (options->short_name) { + pos += fprintf(stdout, "-%c", options->short_name); + } + if (options->long_name && options->short_name) { + pos += fprintf(stdout, ", "); + } + if (options->long_name) { + pos += fprintf(stdout, "--%s", options->long_name); + } + if (options->type == ARGPARSE_OPT_INTEGER) { + pos += fprintf(stdout, "=<int>"); + } else if (options->type == ARGPARSE_OPT_FLOAT) { + pos += fprintf(stdout, "=<flt>"); + } else if (options->type == ARGPARSE_OPT_STRING) { + pos += fprintf(stdout, "=<str>"); + } + if (pos <= usage_opts_width) { + pad = usage_opts_width - pos; + } else { + fputc('\n', stdout); + pad = usage_opts_width; + } + fprintf(stdout, "%*s%s\n", pad + 2, "", options->help); + } + + // print epilog + if (self->epilog) + fprintf(stdout, "%s\n", self->epilog); +} + +int +argparse_help_cb(struct argparse *self, const struct argparse_option *option) +{ + (void)option; + argparse_usage(self); + exit(0); +} diff --git a/lib/argparse/argparse.h b/lib/argparse/argparse.h @@ -0,0 +1,130 @@ +/** + * Copyright (C) 2012-2015 Yecheng Fu <cofyc.jackson at gmail dot com> + * All rights reserved. + * + * Use of this source code is governed by a MIT-style license that can be found + * in the LICENSE file. + */ +#ifndef ARGPARSE_H +#define ARGPARSE_H + +/* For c++ compatibility */ +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +struct argparse; +struct argparse_option; + +typedef int argparse_callback (struct argparse *self, + const struct argparse_option *option); + +enum argparse_flag { + ARGPARSE_STOP_AT_NON_OPTION = 1, +}; + +enum argparse_option_type { + /* special */ + ARGPARSE_OPT_END, + ARGPARSE_OPT_GROUP, + /* options with no arguments */ + ARGPARSE_OPT_BOOLEAN, + ARGPARSE_OPT_BIT, + /* options with arguments (optional or required) */ + ARGPARSE_OPT_INTEGER, + ARGPARSE_OPT_FLOAT, + ARGPARSE_OPT_STRING, +}; + +enum argparse_option_flags { + OPT_NONEG = 1, /* disable negation */ +}; + +/** + * argparse option + * + * `type`: + * holds the type of the option, you must have an ARGPARSE_OPT_END last in your + * array. + * + * `short_name`: + * the character to use as a short option name, '\0' if none. + * + * `long_name`: + * the long option name, without the leading dash, NULL if none. + * + * `value`: + * stores pointer to the value to be filled. + * + * `help`: + * the short help message associated to what the option does. + * Must never be NULL (except for ARGPARSE_OPT_END). + * + * `callback`: + * function is called when corresponding argument is parsed. + * + * `data`: + * associated data. Callbacks can use it like they want. + * + * `flags`: + * option flags. + */ +struct argparse_option { + enum argparse_option_type type; + const char short_name; + const char *long_name; + void *value; + const char *help; + argparse_callback *callback; + intptr_t data; + int flags; +}; + +/** + * argpparse + */ +struct argparse { + // user supplied + const struct argparse_option *options; + const char *const *usages; + int flags; + const char *description; // a description after usage + const char *epilog; // a description at the end + // internal context + int argc; + const char **argv; + const char **out; + int cpidx; + const char *optvalue; // current option value +}; + +// built-in callbacks +int argparse_help_cb(struct argparse *self, + const struct argparse_option *option); + +// built-in option macros +#define OPT_END() { ARGPARSE_OPT_END, 0, NULL, NULL, 0, NULL, 0, 0 } +#define OPT_BOOLEAN(...) { ARGPARSE_OPT_BOOLEAN, __VA_ARGS__ } +#define OPT_BIT(...) { ARGPARSE_OPT_BIT, __VA_ARGS__ } +#define OPT_INTEGER(...) { ARGPARSE_OPT_INTEGER, __VA_ARGS__ } +#define OPT_FLOAT(...) { ARGPARSE_OPT_FLOAT, __VA_ARGS__ } +#define OPT_STRING(...) { ARGPARSE_OPT_STRING, __VA_ARGS__ } +#define OPT_GROUP(h) { ARGPARSE_OPT_GROUP, 0, NULL, NULL, h, NULL, 0, 0 } +#define OPT_HELP() OPT_BOOLEAN('h', "help", NULL, \ + "show this help message and exit", \ + argparse_help_cb, 0, OPT_NONEG) + +int argparse_init(struct argparse *self, struct argparse_option *options, + const char *const *usages, int flags); +void argparse_describe(struct argparse *self, const char *description, + const char *epilog); +int argparse_parse(struct argparse *self, int argc, const char **argv); +void argparse_usage(struct argparse *self); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/main.c b/src/main.c @@ -0,0 +1,410 @@ +/************************************************************************** +This ugly, sparsely-commented program is the source for text2pdf version +1.1. It should be ANSI-conforming and compile on most platforms. You'll +need to change LF_EXTRA to 1 for machines which write 2 characters for \n. +These include PCs, of course. + +You may distribute the source or compiled versions free of charge. You may +not alter the source in any way other than those mentioned above without +the permission of the author, Phil Smith <phil@bagobytes.co.uk>. + +Please send any comments to the author. + +Copyright (c) Phil Smith, 1996 + +REVISION HISTORY + +Version 1.1 +11 Oct 96 Added handling of form-feed characters, removed need for tmp file, + put reference to resources in each page (avoid bug in Acrobat), + changed date format to PDF-1.1 standard. +12 Jun 96 Added check to avoid blank last page +12 Jun 96 Added LINE_END def to get round platform-specific \r, \n etc. +18 Mar 96 Added ISOLatin1Encoding option +**************************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "argparse.h" + +#ifndef SEEK_SET +#define SEEK_SET 0 +#endif + +#define LF_EXTRA 0 /* how many extra characters are written for \n */ + /* change to 1 for PCs (where \n => <CR><LF>) */ + +#define LINE_END '\015' /* CR used in xref table */ +#define FF 12 /* formfeed character (^L) */ + +char *appname = "text2pdf v1.1"; +char *progname = "text2pdf"; + +FILE *infile; +int pageNo = 0; +int pageObs[500]; +int curObj = 5; /* object number being or last written */ +long locations[1000]; + +char font[256]; +char *defaultFont = "Courier"; +int ISOEnc = 0; +int doFFs = 1; +int tab = 8; +int pointSize = 10; +int vertSpace = 12; +int lines = 0; +int cols = 80; /* max chars per output line */ +int columns = 1; /* number of columns */ + +/* Default paper is Letter size, as in distiller */ +int pageHeight = 792; +int pageWidth = 612; + +unsigned char buf[1024]; +unsigned long fpos = 0; + +void writestr(char *str) { + /* Everything written to the PDF file goes through this function. */ + /* This means we can keep track of the file position without using */ + /* ftell on a real (tmp) file. However, PCs write out 2 characters */ + /* for \n, so we need this ugly loop to keep fpos correct */ + + fpos += strlen(str); + while (*str) { + if (*str == '\n') fpos += LF_EXTRA; + putchar(*str++); + } +} + + +void WriteHeader(char *title){ + + struct tm *ltime; + time_t clock; + char datestring[30]; + + time(&clock); + ltime = localtime(&clock); + + strftime(datestring, 30, "D:%Y%m%d%H%M%S", ltime); + + writestr("%PDF-1.1\n"); + locations[1] = fpos; + writestr("1 0 obj\n"); + writestr("<<\n"); + sprintf(buf, "/CreationDate (%s)\n", datestring); writestr(buf); + sprintf(buf, "/Producer (%s (\\251 Phil Smith, 1996))\n", appname); writestr(buf); + if (title) {sprintf(buf, "/Title (%s)\n", title); writestr(buf);} + writestr(">>\n"); + writestr("endobj\n"); + + locations[2] = fpos; + writestr("2 0 obj\n"); + writestr("<<\n"); + writestr("/Type /Catalog\n"); + writestr("/Pages 3 0 R\n"); + writestr(">>\n"); + writestr("endobj\n"); + + locations[4] = fpos; + writestr("4 0 obj\n"); + writestr("<<\n"); + writestr("/Type /Font\n"); + writestr("/Subtype /Type1\n"); + writestr("/Name /F1\n"); + sprintf(buf, "/BaseFont %s\n", font); writestr(buf); + if (ISOEnc) { + writestr("/Encoding <<\n"); + writestr("/Differences [ 0 /.notdef /.notdef /.notdef /.notdef\n"); + writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); + writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); + writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); + writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); + writestr("/.notdef /.notdef /.notdef /.notdef /space /exclam\n"); + writestr("/quotedbl /numbersign /dollar /percent /ampersand\n"); + writestr("/quoteright /parenleft /parenright /asterisk /plus /comma\n"); + writestr("/hyphen /period /slash /zero /one /two /three /four /five\n"); + writestr("/six /seven /eight /nine /colon /semicolon /less /equal\n"); + writestr("/greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L\n"); + writestr("/M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft\n"); + writestr("/backslash /bracketright /asciicircum /underscore\n"); + writestr("/quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p\n"); + writestr("/q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright\n"); + writestr("/asciitilde /.notdef /.notdef /.notdef /.notdef /.notdef\n"); + writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); + writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); + writestr("/dotlessi /grave /acute /circumflex /tilde /macron /breve\n"); + writestr("/dotaccent /dieresis /.notdef /ring /cedilla /.notdef\n"); + writestr("/hungarumlaut /ogonek /caron /space /exclamdown /cent\n"); + writestr("/sterling /currency /yen /brokenbar /section /dieresis\n"); + writestr("/copyright /ordfeminine /guillemotleft /logicalnot /hyphen\n"); + writestr("/registered /macron /degree /plusminus /twosuperior\n"); + writestr("/threesuperior /acute /mu /paragraph /periodcentered\n"); + writestr("/cedilla /onesuperior /ordmasculine /guillemotright\n"); + writestr("/onequarter /onehalf /threequarters /questiondown /Agrave\n"); + writestr("/Aacute /Acircumflex /Atilde /Adieresis /Aring /AE\n"); + writestr("/Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave\n"); + writestr("/Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve\n"); + writestr("/Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash\n"); + writestr("/Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn\n"); + writestr("/germandbls /agrave /aacute /acircumflex /atilde /adieresis\n"); + writestr("/aring /ae /ccedilla /egrave /eacute /ecircumflex\n"); + writestr("/edieresis /igrave /iacute /icircumflex /idieresis /eth\n"); + writestr("/ntilde /ograve /oacute /ocircumflex /otilde /odieresis\n"); + writestr("/divide /oslash /ugrave /uacute /ucircumflex /udieresis\n"); + writestr("/yacute /thorn /ydieresis ]\n"); + writestr(">>\n"); + } + + writestr(">>\n"); + writestr("endobj\n"); + + locations[5] = fpos; + writestr("5 0 obj\n"); + writestr("<<\n"); + writestr(" /Font << /F1 4 0 R >>\n"); + writestr(" /ProcSet [ /PDF /Text ]\n"); + writestr(">>\n"); + writestr("endobj\n"); +} + +long StartPage(){ + long strmPos; + + locations[++curObj] = fpos; + pageObs[++pageNo] = curObj; + sprintf(buf, "%d 0 obj\n", curObj); writestr(buf); + writestr("<<\n"); + writestr("/Type /Page\n"); + writestr("/Parent 3 0 R\n"); + writestr("/Resources 5 0 R\n"); + sprintf(buf, "/Contents %d 0 R\n", ++curObj); writestr(buf); + writestr(">>\n"); + writestr("endobj\n"); + + locations[curObj] = fpos; + sprintf(buf, "%d 0 obj\n", curObj); writestr(buf); + writestr("<<\n"); + sprintf(buf, "/Length %d 0 R\n", curObj + 1); writestr(buf); + writestr(">>\n"); + writestr("stream\n"); + strmPos = fpos; + + writestr("BT\n"); + sprintf(buf, "/F1 %d Tf\n", pointSize); writestr(buf); + sprintf(buf, "1 0 0 1 50 %d Tm\n", pageHeight - 40); writestr(buf); + sprintf(buf, "%d TL\n", vertSpace); writestr(buf); + + return strmPos; +} + +void EndPage(long streamStart){ + long streamEnd; + + writestr("ET\n"); + streamEnd = fpos; + writestr("endstream\n"); + writestr("endobj\n"); + + locations[++curObj] = fpos; + sprintf(buf, "%d 0 obj\n", curObj); writestr(buf); + sprintf(buf, "%lu\n", streamEnd - streamStart); writestr(buf); + writestr("endobj\n"); +} + +void WritePages(){ + int atEOF = 0; + int atFF; + int atBOP; + long beginstream; + int lineNo, charNo; + int ch, column; + int padding, i; + + while (!atEOF) { + beginstream = StartPage(); + column = 1; + while (column++ <= columns) { + atFF = 0; + atBOP = 0; + lineNo = 0; + while (lineNo++ < lines && !atEOF && !atFF) { + writestr("("); + charNo = 0; + while (charNo++<cols && + (ch = getc(infile))!=EOF && + !(ch==FF && doFFs) && + ch!='\n') { + if (ch >= 32 && ch <= 127) { + if (ch == '(' || ch == ')' || ch == '\\') writestr("\\"); + sprintf(buf, "%c", (char)ch); writestr(buf); + } else { + if (ch == 9) { + padding = tab - ((charNo - 1) % tab); + for (i = 1; i <= padding; i++) writestr(" "); + charNo += (padding - 1); + } else { + if (ch != FF) { + /* write \xxx form for dodgy character */ + sprintf(buf, "\\%.3o", ch); writestr(buf); + } else { + /* don't print anything for a FF */ + charNo--; + } + } + } + } + writestr(")'\n"); + + /* messy stuff to handle formfeeds. Yuk! */ + if (ch == EOF) atEOF = 1; + if (ch == FF) atFF = 1; + if (lineNo == lines) atBOP = 1; + if (atBOP) { + ch = getc(infile); + if (ch == FF) ch = getc(infile); + if (ch == EOF) atEOF = 1; + else ungetc(ch, infile); + } + else if (atFF) { + ch = getc(infile); + if (ch == EOF) atEOF = 1; + else ungetc(ch, infile); + } + } + + if (column <= columns) { + sprintf(buf, "1 0 0 1 %d %d Tm\n", + (pageWidth / 2) + 25, pageHeight - 40); + writestr(buf); + } + } + EndPage(beginstream); + } +} + +void WriteRest(){ + long xref; + int i; + + locations[3] = fpos; + writestr("3 0 obj\n"); + writestr("<<\n"); + writestr("/Type /Pages\n"); + sprintf(buf, "/Count %d\n", pageNo); writestr(buf); + sprintf(buf, "/MediaBox [ 0 0 %d %d ]\n", pageWidth, pageHeight); writestr(buf); + writestr("/Kids [ "); + for (i = 1; i <= pageNo; i++) {sprintf(buf, "%d 0 R ", pageObs[i]); writestr(buf);} + writestr("]\n"); + writestr(">>\n"); + writestr("endobj\n"); + + xref = fpos; + writestr("xref\n"); + sprintf(buf, "0 %d\n", curObj + 1); writestr(buf); + /* note that \n is translated by writestr */ + sprintf(buf, "0000000000 65535 f %c", LINE_END); writestr(buf); + for (i = 1; i <= curObj; i++) { + sprintf(buf, "%.10ld 00000 n %c", locations[i], LINE_END); + writestr(buf); + } + + writestr("trailer\n"); + writestr("<<\n"); + sprintf(buf, "/Size %d\n", curObj + 1); writestr(buf); + writestr("/Root 2 0 R\n"); + writestr("/Info 1 0 R\n"); + writestr(">>\n"); + + writestr("startxref\n"); + sprintf(buf, "%ld\n", xref); writestr(buf); + writestr("%%EOF\n"); +} + +static const char *const usage[] = { + "text2pdf [options] [filename]", + NULL +}; + +int main(int argc, char **argv){ + int i = 1; + int tmp, landscape = 0; + char *ifilename = NULL; + int pageFormat = -1; + + strcpy(font, "/"); + strcat(font, defaultFont); + infile = stdin; /* default */ + + // Define arguments + struct argparse_option options[] = { + OPT_HELP(), + OPT_STRING( 'f', "font" , font , "use PostScript font (must be in standard 14, default: Courier)" ), + OPT_BIT( 'I', "iso" , &ISOEnc , "use ISOLatin1Encoding" , NULL, 1), + OPT_INTEGER('s', "size" , &pointSize , "use font at given pointsize" ), + OPT_INTEGER('v', "vert" , &vertSpace , "use given line spacing in points" ), + OPT_INTEGER('l', "lines" , &lines , "lines per page (default 60, determined automatically if unspecified)"), + OPT_INTEGER('c', "chars" , &cols , "maximum characters per line (default: 80)" ), + OPT_INTEGER('t', "tab" , &tab , "spaces per tab character (default: 8)" ), + OPT_BIT( 'F', "ff" , &doFFs , "ignore formfeed characters (^L)" , NULL, 2), + OPT_INTEGER('C', "columns" , &columns , "columns to format the page in" ), + OPT_INTEGER('x', "width" , &pageWidth , "page width in points" ), + OPT_INTEGER('y', "height" , &pageHeight, "page height in points" ), + OPT_INTEGER('A', NULL , &pageFormat, "use A <int> size paper" ), + OPT_BIT( 'L', "landscape", &landscape , "landscape mode" , NULL, 1), + OPT_END(), + }; + + // Parse arguments + struct argparse argparse; + argparse_init(&argparse, options, usage, 0); + argparse_describe(&argparse, + "\ntext2pdf makes a 7-bit clean PDF file (version 1.1) from any input file. It reads from standard input or a named file, and writes the PDF file to standard output.", + "\ntext2pdf v1.1 (c) Phil Smith, 1996" + ); + argc = argparse_parse(&argparse, argc, argv); + + // Handle inverted formfeed + if (doFFs & 2) { + doFFs = 0; + } + + // Handle pageformat + switch(pageFormat) { + case 3: + pageWidth = 842; + pageHeight = 1190; + break; + case 4: + pageWidth = 595; + pageHeight = 842; + break; + } + + // Parse leftovers + for(int i = 0; i<argc; i++) { + ifilename = argv[i]; + } + + // Swap width/height for landscape + if (landscape) { + tmp = pageHeight; + pageHeight = pageWidth; + pageWidth = tmp; + } + + // Calculate lines per page + if (lines == 0) lines = (pageHeight - 72) / vertSpace; + if (lines < 1) lines = 1; + /* happens to give 60 as default */ + + WriteHeader(ifilename); + WritePages(); + WriteRest(); + + return 0; +} diff --git a/text2pdf.c b/text2pdf.c @@ -1,455 +0,0 @@ -/************************************************************************** -This ugly, sparsely-commented program is the source for text2pdf version -1.1. It should be ANSI-conforming and compile on most platforms. You'll -need to change LF_EXTRA to 1 for machines which write 2 characters for \n. -These include PCs, of course. - -You may distribute the source or compiled versions free of charge. You may -not alter the source in any way other than those mentioned above without -the permission of the author, Phil Smith <phil@bagobytes.co.uk>. - -Please send any comments to the author. - -Copyright (c) Phil Smith, 1996 - -REVISION HISTORY - -Version 1.1 -11 Oct 96 Added handling of form-feed characters, removed need for tmp file, - put reference to resources in each page (avoid bug in Acrobat), - changed date format to PDF-1.1 standard. -12 Jun 96 Added check to avoid blank last page -12 Jun 96 Added LINE_END def to get round platform-specific \r, \n etc. -18 Mar 96 Added ISOLatin1Encoding option -**************************************************************************/ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <time.h> - -#ifndef SEEK_SET -#define SEEK_SET 0 -#endif - -#define LF_EXTRA 0 /* how many extra characters are written for \n */ - /* change to 1 for PCs (where \n => <CR><LF>) */ - -#define LINE_END '\015' /* CR used in xref table */ -#define FF 12 /* formfeed character (^L) */ - -char *appname = "text2pdf v1.1"; -char *progname = "text2pdf"; - -FILE *infile; -int pageNo = 0; -int pageObs[500]; -int curObj = 5; /* object number being or last written */ -long locations[1000]; - -char font[256]; -char *defaultFont = "Courier"; -int ISOEnc = 0; -int doFFs = 1; -int tab = 8; -int pointSize = 10; -int vertSpace = 12; -int lines = 0; -int cols = 80; /* max chars per output line */ -int columns = 1; /* number of columns */ - -/* Default paper is Letter size, as in distiller */ -int pageHeight = 792; -int pageWidth = 612; - -unsigned char buf[1024]; -unsigned long fpos = 0; - -void writestr(char *str) { - /* Everything written to the PDF file goes through this function. */ - /* This means we can keep track of the file position without using */ - /* ftell on a real (tmp) file. However, PCs write out 2 characters */ - /* for \n, so we need this ugly loop to keep fpos correct */ - - fpos += strlen(str); - while (*str) { - if (*str == '\n') fpos += LF_EXTRA; - putchar(*str++); - } -} - - -void WriteHeader(char *title){ - - struct tm *ltime; - time_t clock; - char datestring[30]; - - time(&clock); - ltime = localtime(&clock); - - strftime(datestring, 30, "D:%Y%m%d%H%M%S", ltime); - - writestr("%PDF-1.1\n"); - locations[1] = fpos; - writestr("1 0 obj\n"); - writestr("<<\n"); - sprintf(buf, "/CreationDate (%s)\n", datestring); writestr(buf); - sprintf(buf, "/Producer (%s (\\251 Phil Smith, 1996))\n", appname); writestr(buf); - if (title) {sprintf(buf, "/Title (%s)\n", title); writestr(buf);} - writestr(">>\n"); - writestr("endobj\n"); - - locations[2] = fpos; - writestr("2 0 obj\n"); - writestr("<<\n"); - writestr("/Type /Catalog\n"); - writestr("/Pages 3 0 R\n"); - writestr(">>\n"); - writestr("endobj\n"); - - locations[4] = fpos; - writestr("4 0 obj\n"); - writestr("<<\n"); - writestr("/Type /Font\n"); - writestr("/Subtype /Type1\n"); - writestr("/Name /F1\n"); - sprintf(buf, "/BaseFont %s\n", font); writestr(buf); - if (ISOEnc) { - writestr("/Encoding <<\n"); - writestr("/Differences [ 0 /.notdef /.notdef /.notdef /.notdef\n"); - writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); - writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); - writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); - writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); - writestr("/.notdef /.notdef /.notdef /.notdef /space /exclam\n"); - writestr("/quotedbl /numbersign /dollar /percent /ampersand\n"); - writestr("/quoteright /parenleft /parenright /asterisk /plus /comma\n"); - writestr("/hyphen /period /slash /zero /one /two /three /four /five\n"); - writestr("/six /seven /eight /nine /colon /semicolon /less /equal\n"); - writestr("/greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L\n"); - writestr("/M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft\n"); - writestr("/backslash /bracketright /asciicircum /underscore\n"); - writestr("/quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p\n"); - writestr("/q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright\n"); - writestr("/asciitilde /.notdef /.notdef /.notdef /.notdef /.notdef\n"); - writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); - writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); - writestr("/dotlessi /grave /acute /circumflex /tilde /macron /breve\n"); - writestr("/dotaccent /dieresis /.notdef /ring /cedilla /.notdef\n"); - writestr("/hungarumlaut /ogonek /caron /space /exclamdown /cent\n"); - writestr("/sterling /currency /yen /brokenbar /section /dieresis\n"); - writestr("/copyright /ordfeminine /guillemotleft /logicalnot /hyphen\n"); - writestr("/registered /macron /degree /plusminus /twosuperior\n"); - writestr("/threesuperior /acute /mu /paragraph /periodcentered\n"); - writestr("/cedilla /onesuperior /ordmasculine /guillemotright\n"); - writestr("/onequarter /onehalf /threequarters /questiondown /Agrave\n"); - writestr("/Aacute /Acircumflex /Atilde /Adieresis /Aring /AE\n"); - writestr("/Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave\n"); - writestr("/Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve\n"); - writestr("/Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash\n"); - writestr("/Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn\n"); - writestr("/germandbls /agrave /aacute /acircumflex /atilde /adieresis\n"); - writestr("/aring /ae /ccedilla /egrave /eacute /ecircumflex\n"); - writestr("/edieresis /igrave /iacute /icircumflex /idieresis /eth\n"); - writestr("/ntilde /ograve /oacute /ocircumflex /otilde /odieresis\n"); - writestr("/divide /oslash /ugrave /uacute /ucircumflex /udieresis\n"); - writestr("/yacute /thorn /ydieresis ]\n"); - writestr(">>\n"); - } - - writestr(">>\n"); - writestr("endobj\n"); - - locations[5] = fpos; - writestr("5 0 obj\n"); - writestr("<<\n"); - writestr(" /Font << /F1 4 0 R >>\n"); - writestr(" /ProcSet [ /PDF /Text ]\n"); - writestr(">>\n"); - writestr("endobj\n"); -} - -long StartPage(){ - long strmPos; - - locations[++curObj] = fpos; - pageObs[++pageNo] = curObj; - sprintf(buf, "%d 0 obj\n", curObj); writestr(buf); - writestr("<<\n"); - writestr("/Type /Page\n"); - writestr("/Parent 3 0 R\n"); - writestr("/Resources 5 0 R\n"); - sprintf(buf, "/Contents %d 0 R\n", ++curObj); writestr(buf); - writestr(">>\n"); - writestr("endobj\n"); - - locations[curObj] = fpos; - sprintf(buf, "%d 0 obj\n", curObj); writestr(buf); - writestr("<<\n"); - sprintf(buf, "/Length %d 0 R\n", curObj + 1); writestr(buf); - writestr(">>\n"); - writestr("stream\n"); - strmPos = fpos; - - writestr("BT\n"); - sprintf(buf, "/F1 %d Tf\n", pointSize); writestr(buf); - sprintf(buf, "1 0 0 1 50 %d Tm\n", pageHeight - 40); writestr(buf); - sprintf(buf, "%d TL\n", vertSpace); writestr(buf); - - return strmPos; -} - -void EndPage(long streamStart){ - long streamEnd; - - writestr("ET\n"); - streamEnd = fpos; - writestr("endstream\n"); - writestr("endobj\n"); - - locations[++curObj] = fpos; - sprintf(buf, "%d 0 obj\n", curObj); writestr(buf); - sprintf(buf, "%lu\n", streamEnd - streamStart); writestr(buf); - writestr("endobj\n"); -} - -void WritePages(){ - int atEOF = 0; - int atFF; - int atBOP; - long beginstream; - int lineNo, charNo; - int ch, column; - int padding, i; - - while (!atEOF) { - beginstream = StartPage(); - column = 1; - while (column++ <= columns) { - atFF = 0; - atBOP = 0; - lineNo = 0; - while (lineNo++ < lines && !atEOF && !atFF) { - writestr("("); - charNo = 0; - while (charNo++<cols && - (ch = getc(infile))!=EOF && - !(ch==FF && doFFs) && - ch!='\n') { - if (ch >= 32 && ch <= 127) { - if (ch == '(' || ch == ')' || ch == '\\') writestr("\\"); - sprintf(buf, "%c", (char)ch); writestr(buf); - } else { - if (ch == 9) { - padding = tab - ((charNo - 1) % tab); - for (i = 1; i <= padding; i++) writestr(" "); - charNo += (padding - 1); - } else { - if (ch != FF) { - /* write \xxx form for dodgy character */ - sprintf(buf, "\\%.3o", ch); writestr(buf); - } else { - /* don't print anything for a FF */ - charNo--; - } - } - } - } - writestr(")'\n"); - - /* messy stuff to handle formfeeds. Yuk! */ - if (ch == EOF) atEOF = 1; - if (ch == FF) atFF = 1; - if (lineNo == lines) atBOP = 1; - if (atBOP) { - ch = getc(infile); - if (ch == FF) ch = getc(infile); - if (ch == EOF) atEOF = 1; - else ungetc(ch, infile); - } - else if (atFF) { - ch = getc(infile); - if (ch == EOF) atEOF = 1; - else ungetc(ch, infile); - } - } - - if (column <= columns) { - sprintf(buf, "1 0 0 1 %d %d Tm\n", - (pageWidth / 2) + 25, pageHeight - 40); - writestr(buf); - } - } - EndPage(beginstream); - } -} - -void WriteRest(){ - long xref; - int i; - - locations[3] = fpos; - writestr("3 0 obj\n"); - writestr("<<\n"); - writestr("/Type /Pages\n"); - sprintf(buf, "/Count %d\n", pageNo); writestr(buf); - sprintf(buf, "/MediaBox [ 0 0 %d %d ]\n", pageWidth, pageHeight); writestr(buf); - writestr("/Kids [ "); - for (i = 1; i <= pageNo; i++) {sprintf(buf, "%d 0 R ", pageObs[i]); writestr(buf);} - writestr("]\n"); - writestr(">>\n"); - writestr("endobj\n"); - - xref = fpos; - writestr("xref\n"); - sprintf(buf, "0 %d\n", curObj + 1); writestr(buf); - /* note that \n is translated by writestr */ - sprintf(buf, "0000000000 65535 f %c", LINE_END); writestr(buf); - for (i = 1; i <= curObj; i++) { - sprintf(buf, "%.10ld 00000 n %c", locations[i], LINE_END); - writestr(buf); - } - - writestr("trailer\n"); - writestr("<<\n"); - sprintf(buf, "/Size %d\n", curObj + 1); writestr(buf); - writestr("/Root 2 0 R\n"); - writestr("/Info 1 0 R\n"); - writestr(">>\n"); - - writestr("startxref\n"); - sprintf(buf, "%ld\n", xref); writestr(buf); - writestr("%%EOF\n"); -} - - -void ShowHelp(){ - - printf("\n%s [options] [filename]\n\n", progname); - printf(" %s makes a 7-bit clean PDF file (version 1.1) from any input file.\n", progname); - printf(" It reads from standard input or a named file, and writes the PDF file\n"); - printf(" to standard output.\n"); - printf("\n There are various options as follows:\n\n"); - printf(" -h\t\tshow this message\n"); - printf(" -f<font>\tuse PostScript <font> (must be in standard 14, default: Courier)\n"); - printf(" -I\t\tuse ISOLatin1Encoding\n"); - printf(" -s<size>\tuse font at given pointsize (default %d)\n", pointSize); - printf(" -v<dist>\tuse given line spacing (default %d points)\n", vertSpace); - printf(" -l<lines>\tlines per page (default 60, determined automatically\n\t\tif unspecified)\n"); - printf(" -c<chars>\tmaximum characters per line (default 80)\n"); - printf(" -t<spaces>\tspaces per tab character (default 8)\n"); - printf(" -F\t\tignore formfeed characters (^L)\n"); - printf(" -A4\t\tuse A4 paper (default Letter)\n"); - printf(" -A3\t\tuse A3 paper (default Letter)\n"); - printf(" -x<width>\tindependent paper width in points\n"); - printf(" -y<height>\tindependent paper height in points\n"); - printf(" -2\t\tformat in 2 columns\n"); - printf(" -L\t\tlandscape mode\n"); - printf("\n Note that where one variable is implied by two options, the second option\n takes precedence for that variable. (e.g. -A4 -y500)\n"); - printf(" In landscape mode, page width and height are simply swapped over before\n formatting, no matter how or when they were defined.\n"); - printf("\n%s (c) Phil Smith, 1996\n", appname); -} - -int main(int argc, char **argv){ - int i = 1; - int tmp, landscape = 0; - char *ifilename = NULL; - - strcpy(font, "/"); - strcat(font, defaultFont); - infile = stdin; /* default */ - - while (i < argc) { - if (*argv[i] != '-') { /* input filename */ - ifilename = argv[i]; - if (!(infile = fopen(ifilename, "r"))) { - fprintf(stderr, "%s: couldn't open input file `%s'\n", progname, ifilename); - exit(0); - } - } else { - switch (*++argv[i]) { - case 'h': - ShowHelp(); - exit(0); - case 'f': - strcpy(font, "/"); - strcat(font, ++argv[i]); - break; - case 'I': - ISOEnc = 1; - break; - case 'F': - doFFs = 0; - break; - case 's': - pointSize = atoi(++argv[i]); - if (pointSize < 1) pointSize = 1; - break; - case 'v': - vertSpace = atoi(++argv[i]); - if (vertSpace < 1) vertSpace = 1; - break; - case 'l': - lines = atoi(++argv[i]); - if (lines < 1) lines = 1; - break; - case 'c': - cols = atoi(++argv[i]); - if (cols < 4) cols = 4; - break; - case '2': - columns = 2; - break; - case 't': - tab = atoi(++argv[i]); - if (tab < 1) tab = 1; - break; - case 'A': - switch (*++argv[i]) { - case '3': - pageWidth = 842; - pageHeight = 1190; - break; - case '4': - pageWidth = 595; - pageHeight = 842; - break; - default: - fprintf(stderr, "%s: ignoring unknown paper size: A%s\n", progname, argv[i]); - } - break; - case 'x': - pageWidth = atoi(++argv[i]); - if (pageWidth < 72) pageWidth = 72; - break; - case 'y': - pageHeight = atoi(++argv[i]); - if (pageHeight < 72) pageHeight = 72; - break; - case 'L': - landscape = 1; - break; - default: - fprintf(stderr, "%s: ignoring invalid switch: -%s\n", progname, argv[i]); - } - } - i++; - } - - if (landscape) { - tmp = pageHeight; - pageHeight = pageWidth; - pageWidth = tmp; - } - - if (lines == 0) lines = (pageHeight - 72) / vertSpace; - if (lines < 1) lines = 1; - /* happens to give 60 as default */ - - WriteHeader(ifilename); - WritePages(); - WriteRest(); - - return 0; -}