Working on the parser - specifications - Specification and standard documents

commit 6d2de1da320e7fdc250c6be012ddaae6d4266297
parent 00d149119f20891614bd573d7ca9b6a15396cb5e
Author: finwo <finwo@pm.me>
Date:   Fri, 17 Aug 2018 18:07:22 +0200

Working on the parser

Diffstat:
M compile.php  | 205 +++++++++++++++++++++++++++++++++++++++++++------------------------------------
D src/0000.md  | 136 -------------------------------------------------------------------------------
A src/0000.spec  | 127 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
R src/RFC2119.md -> src/RFC2119.spec  | 0

4 files changed, 239 insertions(+), 229 deletions(-)
diff --git a/compile.php b/compile.php
@@ -1,6 +1,7 @@
 <?php
 
 if(!defined('DS')) define('DS',DIRECTORY_SEPARATOR);
+if(!defined('EOL')) define('EOL',"\n");
 require(__DIR__.DS.'vendor'.DS.'autoload.php');
 $params = array();
 @array_shift($argv);
@@ -30,31 +31,14 @@ function flatten( $data, $prefix = '', &$output = [] ) {
 function unflatten( $flatArray, &$output = [] ) {
     static $accessor = null;
     $accessor = $accessor ?: new \Finwo\PropertyAccessor\PropertyAccessor(true);
-    foreach ($flatArray as $key => $value ) $accessor->set($output,$key,$value,'.');
-    return $output;
-}
-
-function configFile( $file, &$output = [] ) {
-    $fd = null;
-    if(is_array($file)) $file = implode(DS,$file);
-    if(is_string($file) && file_exists($file)) { $fd = fopen($file,'r'); $file = null; }
-    if(is_string($file)) {
-        $fd = fopen('php://temp','r+');
-        fwrite($fd,$file);
-        fseek($fd,0);
-        $file = null;
-    }
-    if(!$fd) return null;
-    while(!feof($fd)) {                                  // Loop until we're out of file
-        $line = fgets($fd);                              // Fetch the line we're about to process
-        $line = @array_shift(str_getcsv($line,'#'));     // Strip comments
-        $line = trim($line);                             // Trim line (we don't care about whitespace)
-        if(strlen($line)==0) continue;                   // Blank line = skip
-        $line = array_map('trim',str_getcsv($line,':')); // Split key & value
-        if(count($line)!=2) continue;                    // Only 1 key & 1 value supported
-        $output[strtolower($line[0])]=$line[1];          // Flat keys
+    if(is_string($flatArray)) {
+        $flatArray = str_replace(["\r\n","\r"],"\n",$flatArray);
+        $flatArray = array_filter(explode("\n",$flatArray));
+        $flatArray = array_map(function($line){return array_map('trim',explode(':',$line));},$flatArray);
+        $flatArray = array_map(function($kv){$kv[0]=strtolower($kv[0]);return $kv;},$flatArray);
+        $flatArray = array_reduce($flatArray,function($acc,$kv){$acc[$kv[0]]=$kv[1];return $acc;},[]);
     }
-    fclose($fd);
+    foreach ($flatArray as $key => $value ) $accessor->set($output,$key,$value,'.');
     return $output;
 }
 
@@ -64,27 +48,11 @@ function fcopy($src,$dst=null) {
     return $dst;
 }
 
-// Make sure we get the minimum params
-if(!isset($params['spec'])) perror('Spec missing from the parameters');
-$spec = $params['spec'];
-
-// Ensure the said spec has a src
-if(!file_exists('src'.DS.$spec.'.md')) perror('Given source does not exist');
-$filename = implode(DS,[__DIR__,'src',$spec.'.md']);
-
-// Fetch the file's headers
-$fd       = fopen($filename,'r+');
-$headers  = '';
-while(!feof($fd)) {
-    $line = trim(fgets($fd));
-    if(!$line) break;
-    $headers .= $line . "\n";
-}
-$headers = unflatten(configFile($headers));
-
 class Pipe {
     protected $processor = null;
     protected $next      = null;
+    protected $writable  = true;
+    protected $userdata  = null;
     public function __construct( $processor, Pipe $next = null ) {
         if(is_array($processor)) {
             $this->processor = array_shift($processor);
@@ -95,69 +63,120 @@ class Pipe {
         }
     }
     public function write( $chunk ) {
-        if(is_resource($this->processor)) return fwrite($this->processor,$chunk);
-        if(is_callable($this->processor)) return call_user_func($this->processor,$chunk,$this->next);
+        if(!$this->writable) return false;
+        if(is_resource($this->processor)&&$chunk) return fwrite($this->processor,$chunk);
+        if(is_callable($this->processor)) return call_user_func_array($this->processor,[$chunk,$this->next,&$this->userdata]);
         return false;
     }
+    public function end( $chunk = null ) {
+        if(!is_null($chunk)) $this->write($chunk);
+        $this->write(null);
+        if(is_resource($this->processor)) $this->processor = fclose($this->processor);
+        if($this->next instanceof Pipe) $this->next->end();
+    }
 }
 function normalize_newlines( $chunk, Pipe $next ) {
-    $next->write(str_replace(["\r\n","\r"],"\n",$chunk));
+    if(is_null($chunk)) return;
+    $next->write(str_replace(["\r\n","\r"],EOL,$chunk));
 }
 function inclusions( $chunk, Pipe $next ) {
-    if(substr($chunk,0,1)=='<') {
-        $fd = fopen('src'.DS.substr(trim($chunk),1).'.md','r');
-        while(!feof($fd)) $next->write(fgets($fd));
+    if(is_null($chunk)) return;
+    $indent = strlen(rtrim($chunk))-strlen(trim($chunk));
+    if(substr($chunk,$indent,1)=='<') {
+        $fd = fopen('src'.DS.substr(trim($chunk),1).'.spec','r');
+        while(!feof($fd)) $next->write(str_repeat(' ',$indent).fgets($fd));
         fclose($fd);
     } else {
         $next->write($chunk);
     }
 }
+function to_chars( $chunk, Pipe $next ) {
+    if(is_null($chunk)) return;
+    foreach (str_split($chunk) as $chr) $next->write($chr);
+}
+function group_symbols( $char, Pipe $next, &$data ) {
+    $data = $data ?: '';
+    switch($char) {
+        case '`':
+            $data .= $char;
+            break;
+        default:
+            if(strlen($data)) $next->write($data);
+            $data = '';
+            $next->write($char);
+    }
+}
+function group_unbreakables( $symbol, Pipe $next, &$data ) {
+    $data = $data ?: [];
+    $cnt  = count($data);
+    $top  = $cnt ? $data[$cnt-1] : null;
+    switch($symbol) {
+        case '```':
+        case '`':
+        case '"':
+        case "'":
+            if($cnt) {
+                if($top[0]==$symbol) {
+                    $me = array_pop($data);
+                    if($cnt==1) {
+                        $next->write($me[1].$symbol.EOL);
+                    } else {
+                        $data[$cnt-2][1] .= $me[1].$symbol;
+                    }
+                } else {
+                    $data[] = [$symbol,$symbol];
+                }
+            } else {
+                $data[] = [$symbol,$symbol];
+            }
+            break;
+        default:
+            if($cnt) {
+                $data[$cnt-1][1] .= $symbol;
+            } else {
+                $next->write($symbol.EOL);
+            }
+            break;
+    }
+}
+
+// Make sure we get the minimum params
+if(!isset($params['spec'])) perror('Spec missing from the parameters');
+$spec = $params['spec'];
+
+// Ensure the said spec has a src
+if(!file_exists('src'.DS.$spec.'.spec')) perror('Given source does not exist');
+$filename = implode(DS,[__DIR__,'src',$spec.'.spec']);
+
+// Fetch the file's headers
+$fd       = fopen($filename,'r+');
+$headers  = '';
+while(!feof($fd)) {
+    $line = trim(fgets($fd));
+    if(!$line) break;
+    $headers .= $line . "\n";
+}
+$headers = unflatten($headers);
 
 // Build the processing pipe
-$outfd = fopen('spec'.DS.'spec'.$spec.'.txt','c+');
-ftruncate($outfd,0);
-$pipe = new Pipe([
-    'normalize_newlines',
-    'inclusions',
-    $outfd,
-]);
+$pipes = [
+    "txt" => new Pipe([
+        'normalize_newlines',
+        'inclusions',
+        'to_chars',
+        'group_symbols',
+        'group_unbreakables',
+        fopen('spec'.DS.'spec'.$spec.'.txt','w+'),
+    ]),
+];
 
-// Run data through the pipe
-var_dump($pipe);
-while(!feof($fd)) $pipe->write(fgets($fd));
-var_dump($pipe);
+// Run data through the pipes
+while(!feof($fd)) {
+    $line = fgets($fd);
+    foreach ($pipes as $pipe)
+        $pipe->write($line);
+}
 
-//// Handle file inclusions
-//$temp_fd = fopen('php://temp','c+');
-//while(!feof($process_fd)) {
-//    $line = fgets($process_fd);
-//    if(substr($line,0,1)==='<') {
-//        $fd = fopen('src'.DS.trim(substr($line, 1)) . '.md', 'r');
-//        var_dump('src'.DS.trim(substr($line, 1)) . '.md');
-//        fcopy($fd, $temp_fd);
-//        fclose($fd);
-//    } else {
-//        fwrite($temp_fd,$line);
-//    }
-//}
-//
-//// Rewind again
-//fseek($process_fd,0);
-//ftruncate($process_fd,0);
-//fseek($temp_fd,0);
-//fcopy($temp_fd,$process_fd);
-//fseek($process_fd,0);
-//fseek($temp_fd,0);
-//ftruncate($temp_fd,0);
-//
-//
-//
-//
-//
-//echo stream_get_contents($process_fd);
-//
-//
-////var_dump($config);
-////var_dump($contents);
-//var_dump($headers);
-//var_dump($params);
+// Close the pipes
+foreach ($pipes as $pipe)
+    $pipe->end();
diff --git a/src/0000.md b/src/0000.md
@@ -1,136 +0,0 @@
-Date: 2018-08-15
-author: Robin Bron <robin@finwo.nl>
-organization: Ratus B.V.
-
-# Specification Format
-
-## Conventions
-
-<RFC2119
-
-## Character encoding
-
-Plain-text files for specifications MUST use the `CP437` standard with the
-exclusion of character code 0x0A which represents a line feed as specified in
-`RFC20`.
-
-## Line definition
-
-A line of text is a sequence of 0 or more characters followed by a line feed
-character. For the sake of and clarity, the ending line feed character is part
-of the line.
-
-Lines MUST NOT exceed 72 characters in length, including the ending line feed
-character. A line is called a blank line if it consists of only a line feed
-charachter.
-
-### Line numbering
-
-To ensure the following page dimension section is clear, we need to define how
-lines are numbered.
-
-Assuming a document is in digital format and has a length of greater than
-0 bytes, the first character in the document is part of line 0. Numbering lines
-from 0 instead of 1 gives us an advantage of clarity in the next section.
-
-## Pages
-
-A page is a sequence of 60 lines. That means for every line number n, the line
-is the start of a new page when $$ n mod 60 = 0 $$.
-
-### Page header
-
-The first line of a page SHOULD consist of a left-aligned spec number
-indicator, a centered (short) document title and a right-aligned publishing
-date (see [Document header][document header]). The second line of a page MUST
-always be blank, excluding the first page of the document.
-
-### Page footer
-
-The last line of a page MUST consist of a left-align last name of the author
-and a right-aligned page number between square brackets. The second-to-last
-line of a page must be blank, just like the second line of a page.
-
-## Paragraphs
-
-A paragraph is a sequence of consecutive lines containing characters other than
-only a line feed. Paragraphs are separated by either a blank line or a page
-break. Paragraphs MUST NOT span multiple pages, limiting their size to 56
-lines.
-
-## Document header
-
-The first lines of the first page of a specification document SHALL always
-contain left-aligned description headers (see
-[Descriptive header][descriptive header]) and right-aligned author
-identification and a right-aligned publishing date.
-
-After the initial lines (see [Descriptive header][descriptive header] through
-[Publish date][publish date]), the document title is REQUIRED to be written on
-the first page of the document. For it's specification, see section
-[document title][document title].
-
-Further information on the first page should give a quick description of the
-contents of the document.
-
-### Descriptive header
-    
-Each descriptive header is made up of a key and a value. Whitespace is not
-allowed in both the key and the value. Whitespace can only be included in the
-value by wrapping the value in quote characters.
-
-The key of the header consists of all characters of the line up to the first
-semicolon, excluding the semicolon itself and omitting all white-space
-characters.
-
-The value of the header starts at the first non-whitespace character after the
-first semicolon of the line. If the first character is a quote, the value ends
-at the next quote in the line. If the first character is not a quote, the value
-ends at the next whitespace character.
-
-### Short author identification
-    
-In order to allow authors to take some credit and to track who has written
-what, the author's name MUST be added right-aligned on the first line of the
-first page of the document. To prevent mixing notations between documents, the
-names SHOULD be written as only the first letters of all given names in
-capitals, separated by dots, a space and the Family name starting with a
-capital. When written by a group with a name, the short author identification
-string SHOULD state the group's name.
-
-### Publish date
-
-Because a document is unlikely to have been written within a day, a publish
-date is simply the month's name starting with a capital followed by the year,
-both following the Gregorian calendar.
-
-## Document footer
-   
-The document SHOULD close, starting on a new page, with all informative
-resources which were used to write the document, noting their keyword and
-document title. When possible, a URL to the resource SHOULD be included.
-
-After the informative resources, the document SHOULD end with one or several
-pages dedicated to the information of the author(s) and if possible their
-contact information.
-
-## Section titles
-   
-Section titles SHOULD be a short text about the subject the section describes.
-Whether it is simply the keyword of what it explains, a problem statement or
-other type of text is up to the author as long as it's relevant to the
-section's body.
-
-A section title MUST start with a capital character & MUST NOT contain any
-other capital letters, excluding where they are required in names or
-abbreviations.
-
-## Document title
-   
-The title of the document should clearly state the main subject of the document
-and it's contents. Each word of the document title must start with a capital
-character when noted as the title of the document.
-
-On the first page of the document, the title should be centered horizontally
-and have at least 2 blank lines both above and below it. The document title
-SHOULD be as close to the document's descriptive headers.
diff --git a/src/0000.spec b/src/0000.spec
@@ -0,0 +1,127 @@
+Date: 2018-08-15
+author: Robin Bron <robin@finwo.nl>
+organization: Ratus B.V.
+title: Specification Format
+
+Convections:
+  <RFC2119
+
+Code block test:
+  ```javascript
+    // This is a code block test
+    console.log('Hello World');
+  ```
+
+Character encoding:
+  Plain-text files for specifications MUST use the `CP437` standard with the
+  exclusion of character code 0x0A which represents a line feed as specified in
+  `RFC20`.
+
+Line definition:
+  A line of text is a sequence of 0 or more characters followed by a line feed
+  character. For the sake of and clarity, the ending line feed character is
+  part of the line.
+
+  Lines MUST NOT exceed 72 characters in length, including the ending line feed
+  character. A line is called a blank line if it consists of only a line feed
+  character.
+
+Line numbering:
+  To ensure the following page dimension section is clear, we need to define
+  how lines are numbered.
+
+  Assuming a document is in digital format and has a length of greater than
+  0 bytes, the first character in the document is part of line 0. Numbering
+  lines from 0 instead of 1 gives us an advantage of clarity in the next
+  section.
+
+Pages:
+  A page is a sequence of 60 lines. That means for every line number n, the
+  line is the start of a new page when $$ n mod 60 = 0 $$.
+
+Pages:Page header:
+  The first line of a page SHOULD consist of a left-aligned spec number
+  indicator, a centered (short) document title and a right-aligned publishing
+  date (see [Document header][document header]). The second line of a page MUST
+  always be blank, excluding the first page of the document.
+
+Pages:Page footer:
+  The last line of a page MUST consist of a left-align last name of the author
+  and a right-aligned page number between square brackets. The second-to-last
+  line of a page must be blank, just like the second line of a page.
+
+Paragraphs:
+  A paragraph is a sequence of consecutive lines containing characters other
+  than only a line feed. Paragraphs are separated by either a blank line or a
+  page break. Paragraphs MUST NOT span multiple pages, limiting their size to
+  56 lines.
+
+Document header:
+  The first lines of the first page of a specification document SHALL always
+  contain left-aligned description headers (see
+  [Descriptive header][descriptive header]) and right-aligned author
+  identification and a right-aligned publishing date.
+
+  After the initial lines (see [Descriptive header][descriptive header] through
+  [Publish date][publish date]), the document title is REQUIRED to be written
+  on the first page of the document. For it's specification, see section
+  [document title][document title].
+
+  Further information on the first page should give a quick description of the
+  contents of the document.
+
+Document header:Descriptive header:
+  Each descriptive header is made up of a key and a value. Whitespace is not
+  allowed in both the key and the value. Whitespace can only be included in the
+  value by wrapping the value in quote characters.
+
+  The key of the header consists of all characters of the line up to the first
+  semicolon, excluding the semicolon itself and omitting all white-space
+  characters.
+
+  The value of the header starts at the first non-whitespace character after
+  the first semicolon of the line. If the first character is a quote, the value
+  ends at the next quote in the line. If the first character is not a quote,
+  the value ends at the next whitespace character.
+
+Document header:Short author identification:
+  In order to allow authors to take some credit and to track who has written
+  what, the author's name MUST be added right-aligned on the first line of the
+  first page of the document. To prevent mixing notations between documents,
+  the names SHOULD be written as only the first letters of all given names in
+  capitals, separated by dots, a space and the Family name starting with a
+  capital. When written by a group with a name, the short author identification
+  string SHOULD state the group's name.
+
+Document header:Publish date:
+  Because a document is unlikely to have been written within a day, a publish
+  date is simply the month's name starting with a capital followed by the year,
+  both following the Gregorian calendar.
+
+Document footer:
+  The document SHOULD close, starting on a new page, with all informative
+  resources which were used to write the document, noting their keyword and
+  document title. When possible, a URL to the resource SHOULD be included.
+
+  After the informative resources, the document SHOULD end with one or several
+  pages dedicated to the information of the author(s) and if possible their
+  contact information.
+
+Section titles:
+  Section titles SHOULD be a short text about the subject the section
+  describes. Whether it is simply the keyword of what it explains, a problem
+  statement or other type of text is up to the author as long as it's relevant
+  to the section's body.
+
+  A section title MUST start with a capital character & MUST NOT contain any
+  other capital letters, excluding where they are required in names or
+  abbreviations.
+
+Document title:
+  The title of the document should clearly state the main subject of the
+  document and it's contents. Each word of the document title must start with a
+  capital character when noted as the title of the document.
+
+  On the first page of the document, the title should be centered horizontally
+  and have at least 2 blank lines both above and below it. The document title
+  SHOULD be as close to the document's descriptive headers as possible.
diff --git a/src/RFC2119.md b/src/RFC2119.spec

	specifications Specification and standard documents
	git clone git://git.finwo.net/misc/specifications
	Log \| Files \| Refs \| README \| LICENSE

M	compile.php	\|	205	+++++++++++++++++++++++++++++++++++++++++++------------------------------------
D	src/0000.md	\|	136	-------------------------------------------------------------------------------
A	src/0000.spec	\|	127	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
R	src/RFC2119.md -> src/RFC2119.spec	\|	0