commit 6d2de1da320e7fdc250c6be012ddaae6d4266297
parent 00d149119f20891614bd573d7ca9b6a15396cb5e
Author: finwo <finwo@pm.me>
Date: Fri, 17 Aug 2018 18:07:22 +0200
Working on the parser
Diffstat:
4 files changed, 239 insertions(+), 229 deletions(-)
diff --git a/compile.php b/compile.php
@@ -1,6 +1,7 @@
<?php
if(!defined('DS')) define('DS',DIRECTORY_SEPARATOR);
+if(!defined('EOL')) define('EOL',"\n");
require(__DIR__.DS.'vendor'.DS.'autoload.php');
$params = array();
@array_shift($argv);
@@ -30,31 +31,14 @@ function flatten( $data, $prefix = '', &$output = [] ) {
function unflatten( $flatArray, &$output = [] ) {
static $accessor = null;
$accessor = $accessor ?: new \Finwo\PropertyAccessor\PropertyAccessor(true);
- foreach ($flatArray as $key => $value ) $accessor->set($output,$key,$value,'.');
- return $output;
-}
-
-function configFile( $file, &$output = [] ) {
- $fd = null;
- if(is_array($file)) $file = implode(DS,$file);
- if(is_string($file) && file_exists($file)) { $fd = fopen($file,'r'); $file = null; }
- if(is_string($file)) {
- $fd = fopen('php://temp','r+');
- fwrite($fd,$file);
- fseek($fd,0);
- $file = null;
- }
- if(!$fd) return null;
- while(!feof($fd)) { // Loop until we're out of file
- $line = fgets($fd); // Fetch the line we're about to process
- $line = @array_shift(str_getcsv($line,'#')); // Strip comments
- $line = trim($line); // Trim line (we don't care about whitespace)
- if(strlen($line)==0) continue; // Blank line = skip
- $line = array_map('trim',str_getcsv($line,':')); // Split key & value
- if(count($line)!=2) continue; // Only 1 key & 1 value supported
- $output[strtolower($line[0])]=$line[1]; // Flat keys
+ if(is_string($flatArray)) {
+ $flatArray = str_replace(["\r\n","\r"],"\n",$flatArray);
+ $flatArray = array_filter(explode("\n",$flatArray));
+ $flatArray = array_map(function($line){return array_map('trim',explode(':',$line));},$flatArray);
+ $flatArray = array_map(function($kv){$kv[0]=strtolower($kv[0]);return $kv;},$flatArray);
+ $flatArray = array_reduce($flatArray,function($acc,$kv){$acc[$kv[0]]=$kv[1];return $acc;},[]);
}
- fclose($fd);
+ foreach ($flatArray as $key => $value ) $accessor->set($output,$key,$value,'.');
return $output;
}
@@ -64,27 +48,11 @@ function fcopy($src,$dst=null) {
return $dst;
}
-// Make sure we get the minimum params
-if(!isset($params['spec'])) perror('Spec missing from the parameters');
-$spec = $params['spec'];
-
-// Ensure the said spec has a src
-if(!file_exists('src'.DS.$spec.'.md')) perror('Given source does not exist');
-$filename = implode(DS,[__DIR__,'src',$spec.'.md']);
-
-// Fetch the file's headers
-$fd = fopen($filename,'r+');
-$headers = '';
-while(!feof($fd)) {
- $line = trim(fgets($fd));
- if(!$line) break;
- $headers .= $line . "\n";
-}
-$headers = unflatten(configFile($headers));
-
class Pipe {
protected $processor = null;
protected $next = null;
+ protected $writable = true;
+ protected $userdata = null;
public function __construct( $processor, Pipe $next = null ) {
if(is_array($processor)) {
$this->processor = array_shift($processor);
@@ -95,69 +63,120 @@ class Pipe {
}
}
public function write( $chunk ) {
- if(is_resource($this->processor)) return fwrite($this->processor,$chunk);
- if(is_callable($this->processor)) return call_user_func($this->processor,$chunk,$this->next);
+ if(!$this->writable) return false;
+ if(is_resource($this->processor)&&$chunk) return fwrite($this->processor,$chunk);
+ if(is_callable($this->processor)) return call_user_func_array($this->processor,[$chunk,$this->next,&$this->userdata]);
return false;
}
+ public function end( $chunk = null ) {
+ if(!is_null($chunk)) $this->write($chunk);
+ $this->write(null);
+ if(is_resource($this->processor)) $this->processor = fclose($this->processor);
+ if($this->next instanceof Pipe) $this->next->end();
+ }
}
function normalize_newlines( $chunk, Pipe $next ) {
- $next->write(str_replace(["\r\n","\r"],"\n",$chunk));
+ if(is_null($chunk)) return;
+ $next->write(str_replace(["\r\n","\r"],EOL,$chunk));
}
function inclusions( $chunk, Pipe $next ) {
- if(substr($chunk,0,1)=='<') {
- $fd = fopen('src'.DS.substr(trim($chunk),1).'.md','r');
- while(!feof($fd)) $next->write(fgets($fd));
+ if(is_null($chunk)) return;
+ $indent = strlen(rtrim($chunk))-strlen(trim($chunk));
+ if(substr($chunk,$indent,1)=='<') {
+ $fd = fopen('src'.DS.substr(trim($chunk),1).'.spec','r');
+ while(!feof($fd)) $next->write(str_repeat(' ',$indent).fgets($fd));
fclose($fd);
} else {
$next->write($chunk);
}
}
+function to_chars( $chunk, Pipe $next ) {
+ if(is_null($chunk)) return;
+ foreach (str_split($chunk) as $chr) $next->write($chr);
+}
+function group_symbols( $char, Pipe $next, &$data ) {
+ $data = $data ?: '';
+ switch($char) {
+ case '`':
+ $data .= $char;
+ break;
+ default:
+ if(strlen($data)) $next->write($data);
+ $data = '';
+ $next->write($char);
+ }
+}
+function group_unbreakables( $symbol, Pipe $next, &$data ) {
+ $data = $data ?: [];
+ $cnt = count($data);
+ $top = $cnt ? $data[$cnt-1] : null;
+ switch($symbol) {
+ case '```':
+ case '`':
+ case '"':
+ case "'":
+ if($cnt) {
+ if($top[0]==$symbol) {
+ $me = array_pop($data);
+ if($cnt==1) {
+ $next->write($me[1].$symbol.EOL);
+ } else {
+ $data[$cnt-2][1] .= $me[1].$symbol;
+ }
+ } else {
+ $data[] = [$symbol,$symbol];
+ }
+ } else {
+ $data[] = [$symbol,$symbol];
+ }
+ break;
+ default:
+ if($cnt) {
+ $data[$cnt-1][1] .= $symbol;
+ } else {
+ $next->write($symbol.EOL);
+ }
+ break;
+ }
+}
+
+// Make sure we get the minimum params
+if(!isset($params['spec'])) perror('Spec missing from the parameters');
+$spec = $params['spec'];
+
+// Ensure the said spec has a src
+if(!file_exists('src'.DS.$spec.'.spec')) perror('Given source does not exist');
+$filename = implode(DS,[__DIR__,'src',$spec.'.spec']);
+
+// Fetch the file's headers
+$fd = fopen($filename,'r+');
+$headers = '';
+while(!feof($fd)) {
+ $line = trim(fgets($fd));
+ if(!$line) break;
+ $headers .= $line . "\n";
+}
+$headers = unflatten($headers);
// Build the processing pipe
-$outfd = fopen('spec'.DS.'spec'.$spec.'.txt','c+');
-ftruncate($outfd,0);
-$pipe = new Pipe([
- 'normalize_newlines',
- 'inclusions',
- $outfd,
-]);
+$pipes = [
+ "txt" => new Pipe([
+ 'normalize_newlines',
+ 'inclusions',
+ 'to_chars',
+ 'group_symbols',
+ 'group_unbreakables',
+ fopen('spec'.DS.'spec'.$spec.'.txt','w+'),
+ ]),
+];
-// Run data through the pipe
-var_dump($pipe);
-while(!feof($fd)) $pipe->write(fgets($fd));
-var_dump($pipe);
+// Run data through the pipes
+while(!feof($fd)) {
+ $line = fgets($fd);
+ foreach ($pipes as $pipe)
+ $pipe->write($line);
+}
-//// Handle file inclusions
-//$temp_fd = fopen('php://temp','c+');
-//while(!feof($process_fd)) {
-// $line = fgets($process_fd);
-// if(substr($line,0,1)==='<') {
-// $fd = fopen('src'.DS.trim(substr($line, 1)) . '.md', 'r');
-// var_dump('src'.DS.trim(substr($line, 1)) . '.md');
-// fcopy($fd, $temp_fd);
-// fclose($fd);
-// } else {
-// fwrite($temp_fd,$line);
-// }
-//}
-//
-//// Rewind again
-//fseek($process_fd,0);
-//ftruncate($process_fd,0);
-//fseek($temp_fd,0);
-//fcopy($temp_fd,$process_fd);
-//fseek($process_fd,0);
-//fseek($temp_fd,0);
-//ftruncate($temp_fd,0);
-//
-//
-//
-//
-//
-//echo stream_get_contents($process_fd);
-//
-//
-////var_dump($config);
-////var_dump($contents);
-//var_dump($headers);
-//var_dump($params);
+// Close the pipes
+foreach ($pipes as $pipe)
+ $pipe->end();
diff --git a/src/0000.md b/src/0000.md
@@ -1,136 +0,0 @@
-Date: 2018-08-15
-author: Robin Bron <robin@finwo.nl>
-organization: Ratus B.V.
-
-# Specification Format
-
-## Conventions
-
-<RFC2119
-
-## Character encoding
-
-Plain-text files for specifications MUST use the `CP437` standard with the
-exclusion of character code 0x0A which represents a line feed as specified in
-`RFC20`.
-
-## Line definition
-
-A line of text is a sequence of 0 or more characters followed by a line feed
-character. For the sake of and clarity, the ending line feed character is part
-of the line.
-
-Lines MUST NOT exceed 72 characters in length, including the ending line feed
-character. A line is called a blank line if it consists of only a line feed
-charachter.
-
-### Line numbering
-
-To ensure the following page dimension section is clear, we need to define how
-lines are numbered.
-
-Assuming a document is in digital format and has a length of greater than
-0 bytes, the first character in the document is part of line 0. Numbering lines
-from 0 instead of 1 gives us an advantage of clarity in the next section.
-
-## Pages
-
-A page is a sequence of 60 lines. That means for every line number n, the line
-is the start of a new page when $$ n mod 60 = 0 $$.
-
-### Page header
-
-The first line of a page SHOULD consist of a left-aligned spec number
-indicator, a centered (short) document title and a right-aligned publishing
-date (see [Document header][document header]). The second line of a page MUST
-always be blank, excluding the first page of the document.
-
-### Page footer
-
-The last line of a page MUST consist of a left-align last name of the author
-and a right-aligned page number between square brackets. The second-to-last
-line of a page must be blank, just like the second line of a page.
-
-## Paragraphs
-
-A paragraph is a sequence of consecutive lines containing characters other than
-only a line feed. Paragraphs are separated by either a blank line or a page
-break. Paragraphs MUST NOT span multiple pages, limiting their size to 56
-lines.
-
-## Document header
-
-The first lines of the first page of a specification document SHALL always
-contain left-aligned description headers (see
-[Descriptive header][descriptive header]) and right-aligned author
-identification and a right-aligned publishing date.
-
-After the initial lines (see [Descriptive header][descriptive header] through
-[Publish date][publish date]), the document title is REQUIRED to be written on
-the first page of the document. For it's specification, see section
-[document title][document title].
-
-Further information on the first page should give a quick description of the
-contents of the document.
-
-### Descriptive header
-
-Each descriptive header is made up of a key and a value. Whitespace is not
-allowed in both the key and the value. Whitespace can only be included in the
-value by wrapping the value in quote characters.
-
-The key of the header consists of all characters of the line up to the first
-semicolon, excluding the semicolon itself and omitting all white-space
-characters.
-
-The value of the header starts at the first non-whitespace character after the
-first semicolon of the line. If the first character is a quote, the value ends
-at the next quote in the line. If the first character is not a quote, the value
-ends at the next whitespace character.
-
-### Short author identification
-
-In order to allow authors to take some credit and to track who has written
-what, the author's name MUST be added right-aligned on the first line of the
-first page of the document. To prevent mixing notations between documents, the
-names SHOULD be written as only the first letters of all given names in
-capitals, separated by dots, a space and the Family name starting with a
-capital. When written by a group with a name, the short author identification
-string SHOULD state the group's name.
-
-### Publish date
-
-Because a document is unlikely to have been written within a day, a publish
-date is simply the month's name starting with a capital followed by the year,
-both following the Gregorian calendar.
-
-## Document footer
-
-The document SHOULD close, starting on a new page, with all informative
-resources which were used to write the document, noting their keyword and
-document title. When possible, a URL to the resource SHOULD be included.
-
-After the informative resources, the document SHOULD end with one or several
-pages dedicated to the information of the author(s) and if possible their
-contact information.
-
-## Section titles
-
-Section titles SHOULD be a short text about the subject the section describes.
-Whether it is simply the keyword of what it explains, a problem statement or
-other type of text is up to the author as long as it's relevant to the
-section's body.
-
-A section title MUST start with a capital character & MUST NOT contain any
-other capital letters, excluding where they are required in names or
-abbreviations.
-
-## Document title
-
-The title of the document should clearly state the main subject of the document
-and it's contents. Each word of the document title must start with a capital
-character when noted as the title of the document.
-
-On the first page of the document, the title should be centered horizontally
-and have at least 2 blank lines both above and below it. The document title
-SHOULD be as close to the document's descriptive headers.
diff --git a/src/0000.spec b/src/0000.spec
@@ -0,0 +1,127 @@
+Date: 2018-08-15
+author: Robin Bron <robin@finwo.nl>
+organization: Ratus B.V.
+title: Specification Format
+
+Convections:
+ <RFC2119
+
+Code block test:
+ ```javascript
+ // This is a code block test
+ console.log('Hello World');
+ ```
+
+Character encoding:
+ Plain-text files for specifications MUST use the `CP437` standard with the
+ exclusion of character code 0x0A which represents a line feed as specified in
+ `RFC20`.
+
+Line definition:
+ A line of text is a sequence of 0 or more characters followed by a line feed
+ character. For the sake of and clarity, the ending line feed character is
+ part of the line.
+
+ Lines MUST NOT exceed 72 characters in length, including the ending line feed
+ character. A line is called a blank line if it consists of only a line feed
+ character.
+
+Line numbering:
+ To ensure the following page dimension section is clear, we need to define
+ how lines are numbered.
+
+ Assuming a document is in digital format and has a length of greater than
+ 0 bytes, the first character in the document is part of line 0. Numbering
+ lines from 0 instead of 1 gives us an advantage of clarity in the next
+ section.
+
+Pages:
+ A page is a sequence of 60 lines. That means for every line number n, the
+ line is the start of a new page when $$ n mod 60 = 0 $$.
+
+Pages:Page header:
+ The first line of a page SHOULD consist of a left-aligned spec number
+ indicator, a centered (short) document title and a right-aligned publishing
+ date (see [Document header][document header]). The second line of a page MUST
+ always be blank, excluding the first page of the document.
+
+Pages:Page footer:
+ The last line of a page MUST consist of a left-align last name of the author
+ and a right-aligned page number between square brackets. The second-to-last
+ line of a page must be blank, just like the second line of a page.
+
+Paragraphs:
+ A paragraph is a sequence of consecutive lines containing characters other
+ than only a line feed. Paragraphs are separated by either a blank line or a
+ page break. Paragraphs MUST NOT span multiple pages, limiting their size to
+ 56 lines.
+
+Document header:
+ The first lines of the first page of a specification document SHALL always
+ contain left-aligned description headers (see
+ [Descriptive header][descriptive header]) and right-aligned author
+ identification and a right-aligned publishing date.
+
+ After the initial lines (see [Descriptive header][descriptive header] through
+ [Publish date][publish date]), the document title is REQUIRED to be written
+ on the first page of the document. For it's specification, see section
+ [document title][document title].
+
+ Further information on the first page should give a quick description of the
+ contents of the document.
+
+Document header:Descriptive header:
+ Each descriptive header is made up of a key and a value. Whitespace is not
+ allowed in both the key and the value. Whitespace can only be included in the
+ value by wrapping the value in quote characters.
+
+ The key of the header consists of all characters of the line up to the first
+ semicolon, excluding the semicolon itself and omitting all white-space
+ characters.
+
+ The value of the header starts at the first non-whitespace character after
+ the first semicolon of the line. If the first character is a quote, the value
+ ends at the next quote in the line. If the first character is not a quote,
+ the value ends at the next whitespace character.
+
+Document header:Short author identification:
+ In order to allow authors to take some credit and to track who has written
+ what, the author's name MUST be added right-aligned on the first line of the
+ first page of the document. To prevent mixing notations between documents,
+ the names SHOULD be written as only the first letters of all given names in
+ capitals, separated by dots, a space and the Family name starting with a
+ capital. When written by a group with a name, the short author identification
+ string SHOULD state the group's name.
+
+Document header:Publish date:
+ Because a document is unlikely to have been written within a day, a publish
+ date is simply the month's name starting with a capital followed by the year,
+ both following the Gregorian calendar.
+
+Document footer:
+ The document SHOULD close, starting on a new page, with all informative
+ resources which were used to write the document, noting their keyword and
+ document title. When possible, a URL to the resource SHOULD be included.
+
+ After the informative resources, the document SHOULD end with one or several
+ pages dedicated to the information of the author(s) and if possible their
+ contact information.
+
+Section titles:
+ Section titles SHOULD be a short text about the subject the section
+ describes. Whether it is simply the keyword of what it explains, a problem
+ statement or other type of text is up to the author as long as it's relevant
+ to the section's body.
+
+ A section title MUST start with a capital character & MUST NOT contain any
+ other capital letters, excluding where they are required in names or
+ abbreviations.
+
+Document title:
+ The title of the document should clearly state the main subject of the
+ document and it's contents. Each word of the document title must start with a
+ capital character when noted as the title of the document.
+
+ On the first page of the document, the title should be centered horizontally
+ and have at least 2 blank lines both above and below it. The document title
+ SHOULD be as close to the document's descriptive headers as possible.
diff --git a/src/RFC2119.md b/src/RFC2119.spec