text2pdf

Git mirror of http://www.eprg.org/pdfcorner/text2pdf/
git clone git://git.finwo.net/app/text2pdf
Log | Files | Refs | README

main.c (13931B)


      1 /**************************************************************************
      2 This ugly, sparsely-commented program is the source for text2pdf version
      3 1.1.  It should be ANSI-conforming and compile on most platforms.  You'll
      4 need to change LF_EXTRA to 1 for machines which write 2 characters for \n.
      5 These include PCs, of course.
      6 
      7 You may distribute the source or compiled versions free of charge.  You may
      8 not alter the source in any way other than those mentioned above without
      9 the permission of the author, Phil Smith <phil@bagobytes.co.uk>.
     10 
     11 Please send any comments to the author.
     12 
     13 Copyright (c) Phil Smith, 1996
     14 
     15 REVISION HISTORY
     16 
     17 Version 1.1
     18 11 Oct 96 Added handling of form-feed characters, removed need for tmp file,
     19           put reference to resources in each page (avoid bug in Acrobat),
     20 	  changed date format to PDF-1.1 standard.
     21 12 Jun 96 Added check to avoid blank last page
     22 12 Jun 96 Added LINE_END def to get round platform-specific \r, \n etc.
     23 18 Mar 96 Added ISOLatin1Encoding option
     24 **************************************************************************/
     25 
     26 #include <stdio.h>
     27 #include <stdlib.h>
     28 #include <string.h>
     29 #include <time.h>
     30 
     31 #include "argparse.h"
     32 
     33 #ifndef SEEK_SET
     34 #define SEEK_SET 0
     35 #endif
     36 
     37 #define LF_EXTRA 0  /* how many extra characters are written for \n */
     38                     /* change to 1 for PCs (where \n => <CR><LF>) */
     39 
     40 #define LINE_END '\015'  /* CR used in xref table */
     41 #define FF 12            /* formfeed character (^L) */
     42 
     43 char *appname = "text2pdf v1.1";
     44 char *progname = "text2pdf";
     45 
     46 FILE *infile;
     47 int pageNo = 0;
     48 int pageObs[500];
     49 int curObj = 5;  /* object number being or last written */
     50 long locations[1000];
     51 
     52 char *font = NULL;
     53 char *defaultFont = "Courier";
     54 int ISOEnc = 0;
     55 int doFFs = 1;
     56 int tab = 8;
     57 int pointSize = 10;
     58 int vertSpace = 12;
     59 int lines = 0;
     60 int cols = 80;  /* max chars per output line */
     61 int columns = 1;  /* number of columns */
     62 
     63 /* Default paper is Letter size, as in distiller */
     64 int pageHeight = 792;
     65 int pageWidth = 612;
     66 
     67 unsigned char buf[1024];
     68 unsigned long fpos = 0;
     69 
     70 void writestr(char *str) {
     71   /* Everything written to the PDF file goes through this function. */
     72   /* This means we can keep track of the file position without using */
     73   /* ftell on a real (tmp) file.  However, PCs write out 2 characters */
     74   /* for \n, so we need this ugly loop to keep fpos correct */
     75 
     76   fpos += strlen(str);
     77   while (*str) {
     78     if (*str == '\n') fpos += LF_EXTRA;
     79     putchar(*str++);
     80   }
     81 }
     82 
     83 
     84 void WriteHeader(char *title){
     85 
     86   struct tm *ltime;
     87   time_t clock;
     88   char datestring[30];
     89 
     90   time(&clock);
     91   ltime = localtime(&clock);
     92 
     93   strftime(datestring, 30, "D:%Y%m%d%H%M%S", ltime);
     94 
     95   writestr("%PDF-1.1\n");
     96   locations[1] = fpos;
     97   writestr("1 0 obj\n");
     98   writestr("<<\n");
     99   sprintf(buf, "/CreationDate (%s)\n", datestring); writestr(buf);
    100   sprintf(buf, "/Producer (%s (\\251 Phil Smith, 1996))\n", appname); writestr(buf);
    101   if (title) {sprintf(buf, "/Title (%s)\n", title); writestr(buf);}
    102   writestr(">>\n");
    103   writestr("endobj\n");
    104 
    105   locations[2] = fpos;
    106   writestr("2 0 obj\n");
    107   writestr("<<\n");
    108   writestr("/Type /Catalog\n");
    109   writestr("/Pages 3 0 R\n");
    110   writestr(">>\n");
    111   writestr("endobj\n");
    112 
    113   locations[4] = fpos;
    114   writestr("4 0 obj\n");
    115   writestr("<<\n");
    116   writestr("/Type /Font\n");
    117   writestr("/Subtype /Type1\n");
    118   writestr("/Name /F1\n");
    119   sprintf(buf, "/BaseFont %s\n", font); writestr(buf);
    120   if (ISOEnc) {
    121     writestr("/Encoding <<\n");
    122     writestr("/Differences [ 0 /.notdef /.notdef /.notdef /.notdef\n");
    123     writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n");
    124     writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n");
    125     writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n");
    126     writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n");
    127     writestr("/.notdef /.notdef /.notdef /.notdef /space /exclam\n");
    128     writestr("/quotedbl /numbersign /dollar /percent /ampersand\n");
    129     writestr("/quoteright /parenleft /parenright /asterisk /plus /comma\n");
    130     writestr("/hyphen /period /slash /zero /one /two /three /four /five\n");
    131     writestr("/six /seven /eight /nine /colon /semicolon /less /equal\n");
    132     writestr("/greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L\n");
    133     writestr("/M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft\n");
    134     writestr("/backslash /bracketright /asciicircum /underscore\n");
    135     writestr("/quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p\n");
    136     writestr("/q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright\n");
    137     writestr("/asciitilde /.notdef /.notdef /.notdef /.notdef /.notdef\n");
    138     writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n");
    139     writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n");
    140     writestr("/dotlessi /grave /acute /circumflex /tilde /macron /breve\n");
    141     writestr("/dotaccent /dieresis /.notdef /ring /cedilla /.notdef\n");
    142     writestr("/hungarumlaut /ogonek /caron /space /exclamdown /cent\n");
    143     writestr("/sterling /currency /yen /brokenbar /section /dieresis\n");
    144     writestr("/copyright /ordfeminine /guillemotleft /logicalnot /hyphen\n");
    145     writestr("/registered /macron /degree /plusminus /twosuperior\n");
    146     writestr("/threesuperior /acute /mu /paragraph /periodcentered\n");
    147     writestr("/cedilla /onesuperior /ordmasculine /guillemotright\n");
    148     writestr("/onequarter /onehalf /threequarters /questiondown /Agrave\n");
    149     writestr("/Aacute /Acircumflex /Atilde /Adieresis /Aring /AE\n");
    150     writestr("/Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave\n");
    151     writestr("/Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve\n");
    152     writestr("/Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash\n");
    153     writestr("/Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn\n");
    154     writestr("/germandbls /agrave /aacute /acircumflex /atilde /adieresis\n");
    155     writestr("/aring /ae /ccedilla /egrave /eacute /ecircumflex\n");
    156     writestr("/edieresis /igrave /iacute /icircumflex /idieresis /eth\n");
    157     writestr("/ntilde /ograve /oacute /ocircumflex /otilde /odieresis\n");
    158     writestr("/divide /oslash /ugrave /uacute /ucircumflex /udieresis\n");
    159     writestr("/yacute /thorn /ydieresis ]\n");
    160     writestr(">>\n");
    161   }
    162 
    163   writestr(">>\n");
    164   writestr("endobj\n");
    165 
    166   locations[5] = fpos;
    167   writestr("5 0 obj\n");
    168   writestr("<<\n");
    169   writestr("  /Font << /F1 4 0 R >>\n");
    170   writestr("  /ProcSet [ /PDF /Text ]\n");
    171   writestr(">>\n");
    172   writestr("endobj\n");
    173 }
    174 
    175 long StartPage(){
    176   long strmPos;
    177 
    178   locations[++curObj] = fpos;
    179   pageObs[++pageNo] = curObj;
    180   sprintf(buf, "%d 0 obj\n", curObj); writestr(buf);
    181   writestr("<<\n");
    182   writestr("/Type /Page\n");
    183   writestr("/Parent 3 0 R\n");
    184   writestr("/Resources 5 0 R\n");
    185   sprintf(buf, "/Contents %d 0 R\n", ++curObj); writestr(buf);
    186   writestr(">>\n");
    187   writestr("endobj\n");
    188   
    189   locations[curObj] = fpos;
    190   sprintf(buf, "%d 0 obj\n", curObj); writestr(buf);
    191   writestr("<<\n");
    192   sprintf(buf, "/Length %d 0 R\n", curObj + 1); writestr(buf);
    193   writestr(">>\n");
    194   writestr("stream\n");
    195   strmPos = fpos;
    196 
    197   writestr("BT\n");
    198   sprintf(buf, "/F1 %d Tf\n", pointSize); writestr(buf);
    199   sprintf(buf, "1 0 0 1 50 %d Tm\n", pageHeight - 40); writestr(buf);
    200   sprintf(buf, "%d TL\n", vertSpace); writestr(buf);
    201 
    202   return strmPos;
    203 }
    204 
    205 void EndPage(long streamStart){
    206   long streamEnd;
    207 
    208   writestr("ET\n");
    209   streamEnd = fpos;
    210   writestr("endstream\n");
    211   writestr("endobj\n");
    212 
    213   locations[++curObj] = fpos;
    214   sprintf(buf, "%d 0 obj\n", curObj); writestr(buf);
    215   sprintf(buf, "%lu\n", streamEnd - streamStart); writestr(buf);
    216   writestr("endobj\n");
    217 }
    218 
    219 void WritePages(){
    220   int atEOF = 0;
    221   int atFF;
    222   int atBOP;
    223   long beginstream;
    224   int lineNo, charNo;
    225   int ch, column;
    226   int padding, i;
    227 
    228   while (!atEOF) {
    229     beginstream = StartPage();
    230     column = 1;
    231     while (column++ <= columns) {
    232       atFF = 0;
    233       atBOP = 0;
    234       lineNo = 0;
    235       while (lineNo++ < lines && !atEOF && !atFF) {
    236 	writestr("(");
    237 	charNo = 0;
    238 	while (charNo++<cols &&
    239 	       (ch = getc(infile))!=EOF &&
    240 	       !(ch==FF && doFFs) &&
    241 	       ch!='\n') {
    242 	  if (ch >= 32 && ch <= 127) {
    243 	    if (ch == '(' || ch == ')' || ch == '\\') writestr("\\");
    244 	    sprintf(buf, "%c", (char)ch); writestr(buf);
    245 	  } else {
    246 	    if (ch == 9) {
    247 	      padding = tab - ((charNo - 1) % tab);
    248 	      for (i = 1; i <= padding; i++) writestr(" ");
    249 	      charNo += (padding - 1);
    250 	    } else {
    251 	      if (ch != FF) {
    252 		/* write \xxx form for dodgy character */
    253 		sprintf(buf, "\\%.3o", ch); writestr(buf);
    254 	      } else {
    255 		/* don't print anything for a FF */
    256 		charNo--;
    257 	      }
    258 	    }
    259 	  }
    260 	}
    261 	writestr(")'\n");
    262 
    263 	/* messy stuff to handle formfeeds.  Yuk! */
    264 	if (ch == EOF) atEOF = 1;
    265 	if (ch == FF) atFF = 1;
    266 	if (lineNo == lines) atBOP = 1;
    267 	if (atBOP) {
    268 	  ch = getc(infile);
    269 	  if (ch == FF) ch = getc(infile);
    270 	  if (ch == EOF) atEOF = 1;
    271 	  else ungetc(ch, infile);
    272 	}
    273 	else if (atFF) {
    274 	  ch = getc(infile);
    275 	  if (ch == EOF) atEOF = 1;
    276 	  else ungetc(ch, infile);
    277 	}
    278       }
    279 
    280       if (column <= columns) {
    281 	sprintf(buf, "1 0 0 1 %d %d Tm\n",
    282 		(pageWidth / 2) + 25, pageHeight - 40);
    283 	writestr(buf);
    284       }
    285     }
    286     EndPage(beginstream);
    287   }
    288 }
    289 
    290 void WriteRest(){
    291   long xref;
    292   int i;
    293 
    294   locations[3] = fpos;
    295   writestr("3 0 obj\n");
    296   writestr("<<\n");
    297   writestr("/Type /Pages\n");
    298   sprintf(buf, "/Count %d\n", pageNo); writestr(buf);
    299   sprintf(buf, "/MediaBox [ 0 0 %d %d ]\n", pageWidth, pageHeight); writestr(buf);
    300   writestr("/Kids [ ");
    301   for (i = 1; i <= pageNo; i++) {sprintf(buf, "%d 0 R ", pageObs[i]); writestr(buf);}
    302   writestr("]\n");
    303   writestr(">>\n");
    304   writestr("endobj\n");
    305 
    306   xref = fpos;
    307   writestr("xref\n");
    308   sprintf(buf, "0 %d\n", curObj + 1); writestr(buf);
    309   /* note that \n is translated by writestr */
    310   sprintf(buf, "0000000000 65535 f %c", LINE_END); writestr(buf);
    311   for (i = 1; i <= curObj; i++) {
    312     sprintf(buf, "%.10ld 00000 n %c", locations[i], LINE_END);
    313     writestr(buf);
    314   }
    315 
    316   writestr("trailer\n");
    317   writestr("<<\n");
    318   sprintf(buf, "/Size %d\n", curObj + 1); writestr(buf);
    319   writestr("/Root 2 0 R\n");
    320   writestr("/Info 1 0 R\n");
    321   writestr(">>\n");
    322 
    323   writestr("startxref\n");
    324   sprintf(buf, "%ld\n", xref); writestr(buf);
    325   writestr("%%EOF\n");
    326 }
    327 
    328 static const char *const usage[] = {
    329   "text2pdf [options] [filename]",
    330   NULL
    331 };
    332 
    333 int main(int argc, char **argv){
    334   int i = 1;
    335   int tmp, landscape = 0;
    336   const char *ifilename = NULL;
    337   const char *title     = NULL;
    338   int pageFormat  = -1;
    339 
    340   infile = stdin;  /* default */
    341 
    342   // Define arguments
    343   struct argparse_option options[] = {
    344     OPT_HELP(),
    345     OPT_STRING( 'T', "title"    , &title      , "title inside the document"                                           ),
    346     OPT_STRING( 'f', "font"     , &font       , "use PostScript font (must be in standard 14, default: Courier)"      ),
    347     OPT_BIT(    'I', "iso"      , &ISOEnc    , "use ISOLatin1Encoding"                                               , NULL, 1),
    348     OPT_INTEGER('s', "size"     , &pointSize , "use font at given pointsize"                                         ),
    349     OPT_INTEGER('v', "vert"     , &vertSpace , "use given line spacing in points"                                    ),
    350     OPT_INTEGER('l', "lines"    , &lines     , "lines per page (default 60, determined automatically if unspecified)"),
    351     OPT_INTEGER('c', "chars"    , &cols      , "maximum characters per line (default: 80)"                           ),
    352     OPT_INTEGER('t', "tab"      , &tab       , "spaces per tab character (default: 8)"                               ),
    353     OPT_BIT(    'F', "ff"       , &doFFs     , "ignore formfeed characters (^L)"                                     , NULL, 2),
    354     OPT_INTEGER('C', "columns"  , &columns   , "columns to format the page in"                                       ),
    355     OPT_INTEGER('x', "width"    , &pageWidth , "page width in points"                                                ),
    356     OPT_INTEGER('y', "height"   , &pageHeight, "page height in points"                                               ),
    357     OPT_INTEGER('A', NULL       , &pageFormat, "use A <int> size paper"                                              ),
    358     OPT_BIT(    'L', "landscape", &landscape , "landscape mode"                                                      , NULL, 1),
    359     OPT_END(),
    360   };
    361 
    362   // Parse arguments
    363   struct argparse argparse;
    364   argparse_init(&argparse, options, usage, 0);
    365   argparse_describe(&argparse,
    366       "\ntext2pdf makes a 7-bit clean PDF file (version 1.1) from any input file. It reads from standard input or a named file, and writes the PDF file to standard output.",
    367       "\ntext2pdf v1.1 (c) Phil Smith, 1996"
    368   );
    369   argc = argparse_parse(&argparse, argc, argv);
    370 
    371   // Handle inverted formfeed
    372   if (doFFs & 2) {
    373     doFFs = 0;
    374   }
    375 
    376   // Handle pageformat
    377   switch(pageFormat) {
    378     case 3:
    379       pageWidth  = 842;
    380       pageHeight = 1190;
    381       break;
    382     case 4:
    383       pageWidth  = 595;
    384       pageHeight = 842;
    385       break;
    386   }
    387 
    388   // Parse leftovers
    389   for(int i = 0; i<argc; i++) {
    390     ifilename = argv[i];
    391   }
    392 
    393   // Open filename if given
    394   if (ifilename) {
    395     if (!title) {
    396       title = ifilename;
    397     }
    398     infile = fopen(ifilename, "r");
    399     if (!infile) {
    400       fprintf(stderr, "%s: couldn't open input file `%s'\n", progname, ifilename);
    401       exit(1);
    402     }
    403   }
    404 
    405   // Swap width/height for landscape
    406   if (landscape) {
    407     tmp        = pageHeight;
    408     pageHeight = pageWidth;
    409     pageWidth  = tmp;
    410   }
    411 
    412   // Fallback font
    413   if (!font) {
    414     font = defaultFont;
    415   }
    416 
    417   // Prefix font with "/"
    418   char *aTmp = calloc(strlen(font)+2, sizeof(char));
    419   strcat(aTmp,"/");
    420   strcat(aTmp,font);
    421   font = aTmp;
    422 
    423   // Calculate lines per page
    424   if (lines == 0) lines = (pageHeight - 72) / vertSpace;
    425   if (lines < 1) lines = 1;
    426   /* happens to give 60 as default */
    427 
    428   WriteHeader(title);
    429   WritePages();
    430   WriteRest();
    431 
    432   return 0;
    433 }