main.c (13931B)
1 /************************************************************************** 2 This ugly, sparsely-commented program is the source for text2pdf version 3 1.1. It should be ANSI-conforming and compile on most platforms. You'll 4 need to change LF_EXTRA to 1 for machines which write 2 characters for \n. 5 These include PCs, of course. 6 7 You may distribute the source or compiled versions free of charge. You may 8 not alter the source in any way other than those mentioned above without 9 the permission of the author, Phil Smith <phil@bagobytes.co.uk>. 10 11 Please send any comments to the author. 12 13 Copyright (c) Phil Smith, 1996 14 15 REVISION HISTORY 16 17 Version 1.1 18 11 Oct 96 Added handling of form-feed characters, removed need for tmp file, 19 put reference to resources in each page (avoid bug in Acrobat), 20 changed date format to PDF-1.1 standard. 21 12 Jun 96 Added check to avoid blank last page 22 12 Jun 96 Added LINE_END def to get round platform-specific \r, \n etc. 23 18 Mar 96 Added ISOLatin1Encoding option 24 **************************************************************************/ 25 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <time.h> 30 31 #include "argparse.h" 32 33 #ifndef SEEK_SET 34 #define SEEK_SET 0 35 #endif 36 37 #define LF_EXTRA 0 /* how many extra characters are written for \n */ 38 /* change to 1 for PCs (where \n => <CR><LF>) */ 39 40 #define LINE_END '\015' /* CR used in xref table */ 41 #define FF 12 /* formfeed character (^L) */ 42 43 char *appname = "text2pdf v1.1"; 44 char *progname = "text2pdf"; 45 46 FILE *infile; 47 int pageNo = 0; 48 int pageObs[500]; 49 int curObj = 5; /* object number being or last written */ 50 long locations[1000]; 51 52 char *font = NULL; 53 char *defaultFont = "Courier"; 54 int ISOEnc = 0; 55 int doFFs = 1; 56 int tab = 8; 57 int pointSize = 10; 58 int vertSpace = 12; 59 int lines = 0; 60 int cols = 80; /* max chars per output line */ 61 int columns = 1; /* number of columns */ 62 63 /* Default paper is Letter size, as in distiller */ 64 int pageHeight = 792; 65 int pageWidth = 612; 66 67 unsigned char buf[1024]; 68 unsigned long fpos = 0; 69 70 void writestr(char *str) { 71 /* Everything written to the PDF file goes through this function. */ 72 /* This means we can keep track of the file position without using */ 73 /* ftell on a real (tmp) file. However, PCs write out 2 characters */ 74 /* for \n, so we need this ugly loop to keep fpos correct */ 75 76 fpos += strlen(str); 77 while (*str) { 78 if (*str == '\n') fpos += LF_EXTRA; 79 putchar(*str++); 80 } 81 } 82 83 84 void WriteHeader(char *title){ 85 86 struct tm *ltime; 87 time_t clock; 88 char datestring[30]; 89 90 time(&clock); 91 ltime = localtime(&clock); 92 93 strftime(datestring, 30, "D:%Y%m%d%H%M%S", ltime); 94 95 writestr("%PDF-1.1\n"); 96 locations[1] = fpos; 97 writestr("1 0 obj\n"); 98 writestr("<<\n"); 99 sprintf(buf, "/CreationDate (%s)\n", datestring); writestr(buf); 100 sprintf(buf, "/Producer (%s (\\251 Phil Smith, 1996))\n", appname); writestr(buf); 101 if (title) {sprintf(buf, "/Title (%s)\n", title); writestr(buf);} 102 writestr(">>\n"); 103 writestr("endobj\n"); 104 105 locations[2] = fpos; 106 writestr("2 0 obj\n"); 107 writestr("<<\n"); 108 writestr("/Type /Catalog\n"); 109 writestr("/Pages 3 0 R\n"); 110 writestr(">>\n"); 111 writestr("endobj\n"); 112 113 locations[4] = fpos; 114 writestr("4 0 obj\n"); 115 writestr("<<\n"); 116 writestr("/Type /Font\n"); 117 writestr("/Subtype /Type1\n"); 118 writestr("/Name /F1\n"); 119 sprintf(buf, "/BaseFont %s\n", font); writestr(buf); 120 if (ISOEnc) { 121 writestr("/Encoding <<\n"); 122 writestr("/Differences [ 0 /.notdef /.notdef /.notdef /.notdef\n"); 123 writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); 124 writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); 125 writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); 126 writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); 127 writestr("/.notdef /.notdef /.notdef /.notdef /space /exclam\n"); 128 writestr("/quotedbl /numbersign /dollar /percent /ampersand\n"); 129 writestr("/quoteright /parenleft /parenright /asterisk /plus /comma\n"); 130 writestr("/hyphen /period /slash /zero /one /two /three /four /five\n"); 131 writestr("/six /seven /eight /nine /colon /semicolon /less /equal\n"); 132 writestr("/greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L\n"); 133 writestr("/M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft\n"); 134 writestr("/backslash /bracketright /asciicircum /underscore\n"); 135 writestr("/quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p\n"); 136 writestr("/q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright\n"); 137 writestr("/asciitilde /.notdef /.notdef /.notdef /.notdef /.notdef\n"); 138 writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); 139 writestr("/.notdef /.notdef /.notdef /.notdef /.notdef /.notdef\n"); 140 writestr("/dotlessi /grave /acute /circumflex /tilde /macron /breve\n"); 141 writestr("/dotaccent /dieresis /.notdef /ring /cedilla /.notdef\n"); 142 writestr("/hungarumlaut /ogonek /caron /space /exclamdown /cent\n"); 143 writestr("/sterling /currency /yen /brokenbar /section /dieresis\n"); 144 writestr("/copyright /ordfeminine /guillemotleft /logicalnot /hyphen\n"); 145 writestr("/registered /macron /degree /plusminus /twosuperior\n"); 146 writestr("/threesuperior /acute /mu /paragraph /periodcentered\n"); 147 writestr("/cedilla /onesuperior /ordmasculine /guillemotright\n"); 148 writestr("/onequarter /onehalf /threequarters /questiondown /Agrave\n"); 149 writestr("/Aacute /Acircumflex /Atilde /Adieresis /Aring /AE\n"); 150 writestr("/Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave\n"); 151 writestr("/Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve\n"); 152 writestr("/Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash\n"); 153 writestr("/Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn\n"); 154 writestr("/germandbls /agrave /aacute /acircumflex /atilde /adieresis\n"); 155 writestr("/aring /ae /ccedilla /egrave /eacute /ecircumflex\n"); 156 writestr("/edieresis /igrave /iacute /icircumflex /idieresis /eth\n"); 157 writestr("/ntilde /ograve /oacute /ocircumflex /otilde /odieresis\n"); 158 writestr("/divide /oslash /ugrave /uacute /ucircumflex /udieresis\n"); 159 writestr("/yacute /thorn /ydieresis ]\n"); 160 writestr(">>\n"); 161 } 162 163 writestr(">>\n"); 164 writestr("endobj\n"); 165 166 locations[5] = fpos; 167 writestr("5 0 obj\n"); 168 writestr("<<\n"); 169 writestr(" /Font << /F1 4 0 R >>\n"); 170 writestr(" /ProcSet [ /PDF /Text ]\n"); 171 writestr(">>\n"); 172 writestr("endobj\n"); 173 } 174 175 long StartPage(){ 176 long strmPos; 177 178 locations[++curObj] = fpos; 179 pageObs[++pageNo] = curObj; 180 sprintf(buf, "%d 0 obj\n", curObj); writestr(buf); 181 writestr("<<\n"); 182 writestr("/Type /Page\n"); 183 writestr("/Parent 3 0 R\n"); 184 writestr("/Resources 5 0 R\n"); 185 sprintf(buf, "/Contents %d 0 R\n", ++curObj); writestr(buf); 186 writestr(">>\n"); 187 writestr("endobj\n"); 188 189 locations[curObj] = fpos; 190 sprintf(buf, "%d 0 obj\n", curObj); writestr(buf); 191 writestr("<<\n"); 192 sprintf(buf, "/Length %d 0 R\n", curObj + 1); writestr(buf); 193 writestr(">>\n"); 194 writestr("stream\n"); 195 strmPos = fpos; 196 197 writestr("BT\n"); 198 sprintf(buf, "/F1 %d Tf\n", pointSize); writestr(buf); 199 sprintf(buf, "1 0 0 1 50 %d Tm\n", pageHeight - 40); writestr(buf); 200 sprintf(buf, "%d TL\n", vertSpace); writestr(buf); 201 202 return strmPos; 203 } 204 205 void EndPage(long streamStart){ 206 long streamEnd; 207 208 writestr("ET\n"); 209 streamEnd = fpos; 210 writestr("endstream\n"); 211 writestr("endobj\n"); 212 213 locations[++curObj] = fpos; 214 sprintf(buf, "%d 0 obj\n", curObj); writestr(buf); 215 sprintf(buf, "%lu\n", streamEnd - streamStart); writestr(buf); 216 writestr("endobj\n"); 217 } 218 219 void WritePages(){ 220 int atEOF = 0; 221 int atFF; 222 int atBOP; 223 long beginstream; 224 int lineNo, charNo; 225 int ch, column; 226 int padding, i; 227 228 while (!atEOF) { 229 beginstream = StartPage(); 230 column = 1; 231 while (column++ <= columns) { 232 atFF = 0; 233 atBOP = 0; 234 lineNo = 0; 235 while (lineNo++ < lines && !atEOF && !atFF) { 236 writestr("("); 237 charNo = 0; 238 while (charNo++<cols && 239 (ch = getc(infile))!=EOF && 240 !(ch==FF && doFFs) && 241 ch!='\n') { 242 if (ch >= 32 && ch <= 127) { 243 if (ch == '(' || ch == ')' || ch == '\\') writestr("\\"); 244 sprintf(buf, "%c", (char)ch); writestr(buf); 245 } else { 246 if (ch == 9) { 247 padding = tab - ((charNo - 1) % tab); 248 for (i = 1; i <= padding; i++) writestr(" "); 249 charNo += (padding - 1); 250 } else { 251 if (ch != FF) { 252 /* write \xxx form for dodgy character */ 253 sprintf(buf, "\\%.3o", ch); writestr(buf); 254 } else { 255 /* don't print anything for a FF */ 256 charNo--; 257 } 258 } 259 } 260 } 261 writestr(")'\n"); 262 263 /* messy stuff to handle formfeeds. Yuk! */ 264 if (ch == EOF) atEOF = 1; 265 if (ch == FF) atFF = 1; 266 if (lineNo == lines) atBOP = 1; 267 if (atBOP) { 268 ch = getc(infile); 269 if (ch == FF) ch = getc(infile); 270 if (ch == EOF) atEOF = 1; 271 else ungetc(ch, infile); 272 } 273 else if (atFF) { 274 ch = getc(infile); 275 if (ch == EOF) atEOF = 1; 276 else ungetc(ch, infile); 277 } 278 } 279 280 if (column <= columns) { 281 sprintf(buf, "1 0 0 1 %d %d Tm\n", 282 (pageWidth / 2) + 25, pageHeight - 40); 283 writestr(buf); 284 } 285 } 286 EndPage(beginstream); 287 } 288 } 289 290 void WriteRest(){ 291 long xref; 292 int i; 293 294 locations[3] = fpos; 295 writestr("3 0 obj\n"); 296 writestr("<<\n"); 297 writestr("/Type /Pages\n"); 298 sprintf(buf, "/Count %d\n", pageNo); writestr(buf); 299 sprintf(buf, "/MediaBox [ 0 0 %d %d ]\n", pageWidth, pageHeight); writestr(buf); 300 writestr("/Kids [ "); 301 for (i = 1; i <= pageNo; i++) {sprintf(buf, "%d 0 R ", pageObs[i]); writestr(buf);} 302 writestr("]\n"); 303 writestr(">>\n"); 304 writestr("endobj\n"); 305 306 xref = fpos; 307 writestr("xref\n"); 308 sprintf(buf, "0 %d\n", curObj + 1); writestr(buf); 309 /* note that \n is translated by writestr */ 310 sprintf(buf, "0000000000 65535 f %c", LINE_END); writestr(buf); 311 for (i = 1; i <= curObj; i++) { 312 sprintf(buf, "%.10ld 00000 n %c", locations[i], LINE_END); 313 writestr(buf); 314 } 315 316 writestr("trailer\n"); 317 writestr("<<\n"); 318 sprintf(buf, "/Size %d\n", curObj + 1); writestr(buf); 319 writestr("/Root 2 0 R\n"); 320 writestr("/Info 1 0 R\n"); 321 writestr(">>\n"); 322 323 writestr("startxref\n"); 324 sprintf(buf, "%ld\n", xref); writestr(buf); 325 writestr("%%EOF\n"); 326 } 327 328 static const char *const usage[] = { 329 "text2pdf [options] [filename]", 330 NULL 331 }; 332 333 int main(int argc, char **argv){ 334 int i = 1; 335 int tmp, landscape = 0; 336 const char *ifilename = NULL; 337 const char *title = NULL; 338 int pageFormat = -1; 339 340 infile = stdin; /* default */ 341 342 // Define arguments 343 struct argparse_option options[] = { 344 OPT_HELP(), 345 OPT_STRING( 'T', "title" , &title , "title inside the document" ), 346 OPT_STRING( 'f', "font" , &font , "use PostScript font (must be in standard 14, default: Courier)" ), 347 OPT_BIT( 'I', "iso" , &ISOEnc , "use ISOLatin1Encoding" , NULL, 1), 348 OPT_INTEGER('s', "size" , &pointSize , "use font at given pointsize" ), 349 OPT_INTEGER('v', "vert" , &vertSpace , "use given line spacing in points" ), 350 OPT_INTEGER('l', "lines" , &lines , "lines per page (default 60, determined automatically if unspecified)"), 351 OPT_INTEGER('c', "chars" , &cols , "maximum characters per line (default: 80)" ), 352 OPT_INTEGER('t', "tab" , &tab , "spaces per tab character (default: 8)" ), 353 OPT_BIT( 'F', "ff" , &doFFs , "ignore formfeed characters (^L)" , NULL, 2), 354 OPT_INTEGER('C', "columns" , &columns , "columns to format the page in" ), 355 OPT_INTEGER('x', "width" , &pageWidth , "page width in points" ), 356 OPT_INTEGER('y', "height" , &pageHeight, "page height in points" ), 357 OPT_INTEGER('A', NULL , &pageFormat, "use A <int> size paper" ), 358 OPT_BIT( 'L', "landscape", &landscape , "landscape mode" , NULL, 1), 359 OPT_END(), 360 }; 361 362 // Parse arguments 363 struct argparse argparse; 364 argparse_init(&argparse, options, usage, 0); 365 argparse_describe(&argparse, 366 "\ntext2pdf makes a 7-bit clean PDF file (version 1.1) from any input file. It reads from standard input or a named file, and writes the PDF file to standard output.", 367 "\ntext2pdf v1.1 (c) Phil Smith, 1996" 368 ); 369 argc = argparse_parse(&argparse, argc, argv); 370 371 // Handle inverted formfeed 372 if (doFFs & 2) { 373 doFFs = 0; 374 } 375 376 // Handle pageformat 377 switch(pageFormat) { 378 case 3: 379 pageWidth = 842; 380 pageHeight = 1190; 381 break; 382 case 4: 383 pageWidth = 595; 384 pageHeight = 842; 385 break; 386 } 387 388 // Parse leftovers 389 for(int i = 0; i<argc; i++) { 390 ifilename = argv[i]; 391 } 392 393 // Open filename if given 394 if (ifilename) { 395 if (!title) { 396 title = ifilename; 397 } 398 infile = fopen(ifilename, "r"); 399 if (!infile) { 400 fprintf(stderr, "%s: couldn't open input file `%s'\n", progname, ifilename); 401 exit(1); 402 } 403 } 404 405 // Swap width/height for landscape 406 if (landscape) { 407 tmp = pageHeight; 408 pageHeight = pageWidth; 409 pageWidth = tmp; 410 } 411 412 // Fallback font 413 if (!font) { 414 font = defaultFont; 415 } 416 417 // Prefix font with "/" 418 char *aTmp = calloc(strlen(font)+2, sizeof(char)); 419 strcat(aTmp,"/"); 420 strcat(aTmp,font); 421 font = aTmp; 422 423 // Calculate lines per page 424 if (lines == 0) lines = (pageHeight - 72) / vertSpace; 425 if (lines < 1) lines = 1; 426 /* happens to give 60 as default */ 427 428 WriteHeader(title); 429 WritePages(); 430 WriteRest(); 431 432 return 0; 433 }