specifications

Specification and standard documents
git clone git://git.finwo.net/misc/specifications
Log | Files | Refs | README | LICENSE

commit 6d2de1da320e7fdc250c6be012ddaae6d4266297
parent 00d149119f20891614bd573d7ca9b6a15396cb5e
Author: finwo <finwo@pm.me>
Date:   Fri, 17 Aug 2018 18:07:22 +0200

Working on the parser

Diffstat:
Mcompile.php | 205+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Dsrc/0000.md | 136-------------------------------------------------------------------------------
Asrc/0000.spec | 127+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rsrc/RFC2119.md -> src/RFC2119.spec | 0
4 files changed, 239 insertions(+), 229 deletions(-)

diff --git a/compile.php b/compile.php @@ -1,6 +1,7 @@ <?php if(!defined('DS')) define('DS',DIRECTORY_SEPARATOR); +if(!defined('EOL')) define('EOL',"\n"); require(__DIR__.DS.'vendor'.DS.'autoload.php'); $params = array(); @array_shift($argv); @@ -30,31 +31,14 @@ function flatten( $data, $prefix = '', &$output = [] ) { function unflatten( $flatArray, &$output = [] ) { static $accessor = null; $accessor = $accessor ?: new \Finwo\PropertyAccessor\PropertyAccessor(true); - foreach ($flatArray as $key => $value ) $accessor->set($output,$key,$value,'.'); - return $output; -} - -function configFile( $file, &$output = [] ) { - $fd = null; - if(is_array($file)) $file = implode(DS,$file); - if(is_string($file) && file_exists($file)) { $fd = fopen($file,'r'); $file = null; } - if(is_string($file)) { - $fd = fopen('php://temp','r+'); - fwrite($fd,$file); - fseek($fd,0); - $file = null; - } - if(!$fd) return null; - while(!feof($fd)) { // Loop until we're out of file - $line = fgets($fd); // Fetch the line we're about to process - $line = @array_shift(str_getcsv($line,'#')); // Strip comments - $line = trim($line); // Trim line (we don't care about whitespace) - if(strlen($line)==0) continue; // Blank line = skip - $line = array_map('trim',str_getcsv($line,':')); // Split key & value - if(count($line)!=2) continue; // Only 1 key & 1 value supported - $output[strtolower($line[0])]=$line[1]; // Flat keys + if(is_string($flatArray)) { + $flatArray = str_replace(["\r\n","\r"],"\n",$flatArray); + $flatArray = array_filter(explode("\n",$flatArray)); + $flatArray = array_map(function($line){return array_map('trim',explode(':',$line));},$flatArray); + $flatArray = array_map(function($kv){$kv[0]=strtolower($kv[0]);return $kv;},$flatArray); + $flatArray = array_reduce($flatArray,function($acc,$kv){$acc[$kv[0]]=$kv[1];return $acc;},[]); } - fclose($fd); + foreach ($flatArray as $key => $value ) $accessor->set($output,$key,$value,'.'); return $output; } @@ -64,27 +48,11 @@ function fcopy($src,$dst=null) { return $dst; } -// Make sure we get the minimum params -if(!isset($params['spec'])) perror('Spec missing from the parameters'); -$spec = $params['spec']; - -// Ensure the said spec has a src -if(!file_exists('src'.DS.$spec.'.md')) perror('Given source does not exist'); -$filename = implode(DS,[__DIR__,'src',$spec.'.md']); - -// Fetch the file's headers -$fd = fopen($filename,'r+'); -$headers = ''; -while(!feof($fd)) { - $line = trim(fgets($fd)); - if(!$line) break; - $headers .= $line . "\n"; -} -$headers = unflatten(configFile($headers)); - class Pipe { protected $processor = null; protected $next = null; + protected $writable = true; + protected $userdata = null; public function __construct( $processor, Pipe $next = null ) { if(is_array($processor)) { $this->processor = array_shift($processor); @@ -95,69 +63,120 @@ class Pipe { } } public function write( $chunk ) { - if(is_resource($this->processor)) return fwrite($this->processor,$chunk); - if(is_callable($this->processor)) return call_user_func($this->processor,$chunk,$this->next); + if(!$this->writable) return false; + if(is_resource($this->processor)&&$chunk) return fwrite($this->processor,$chunk); + if(is_callable($this->processor)) return call_user_func_array($this->processor,[$chunk,$this->next,&$this->userdata]); return false; } + public function end( $chunk = null ) { + if(!is_null($chunk)) $this->write($chunk); + $this->write(null); + if(is_resource($this->processor)) $this->processor = fclose($this->processor); + if($this->next instanceof Pipe) $this->next->end(); + } } function normalize_newlines( $chunk, Pipe $next ) { - $next->write(str_replace(["\r\n","\r"],"\n",$chunk)); + if(is_null($chunk)) return; + $next->write(str_replace(["\r\n","\r"],EOL,$chunk)); } function inclusions( $chunk, Pipe $next ) { - if(substr($chunk,0,1)=='<') { - $fd = fopen('src'.DS.substr(trim($chunk),1).'.md','r'); - while(!feof($fd)) $next->write(fgets($fd)); + if(is_null($chunk)) return; + $indent = strlen(rtrim($chunk))-strlen(trim($chunk)); + if(substr($chunk,$indent,1)=='<') { + $fd = fopen('src'.DS.substr(trim($chunk),1).'.spec','r'); + while(!feof($fd)) $next->write(str_repeat(' ',$indent).fgets($fd)); fclose($fd); } else { $next->write($chunk); } } +function to_chars( $chunk, Pipe $next ) { + if(is_null($chunk)) return; + foreach (str_split($chunk) as $chr) $next->write($chr); +} +function group_symbols( $char, Pipe $next, &$data ) { + $data = $data ?: ''; + switch($char) { + case '`': + $data .= $char; + break; + default: + if(strlen($data)) $next->write($data); + $data = ''; + $next->write($char); + } +} +function group_unbreakables( $symbol, Pipe $next, &$data ) { + $data = $data ?: []; + $cnt = count($data); + $top = $cnt ? $data[$cnt-1] : null; + switch($symbol) { + case '```': + case '`': + case '"': + case "'": + if($cnt) { + if($top[0]==$symbol) { + $me = array_pop($data); + if($cnt==1) { + $next->write($me[1].$symbol.EOL); + } else { + $data[$cnt-2][1] .= $me[1].$symbol; + } + } else { + $data[] = [$symbol,$symbol]; + } + } else { + $data[] = [$symbol,$symbol]; + } + break; + default: + if($cnt) { + $data[$cnt-1][1] .= $symbol; + } else { + $next->write($symbol.EOL); + } + break; + } +} + +// Make sure we get the minimum params +if(!isset($params['spec'])) perror('Spec missing from the parameters'); +$spec = $params['spec']; + +// Ensure the said spec has a src +if(!file_exists('src'.DS.$spec.'.spec')) perror('Given source does not exist'); +$filename = implode(DS,[__DIR__,'src',$spec.'.spec']); + +// Fetch the file's headers +$fd = fopen($filename,'r+'); +$headers = ''; +while(!feof($fd)) { + $line = trim(fgets($fd)); + if(!$line) break; + $headers .= $line . "\n"; +} +$headers = unflatten($headers); // Build the processing pipe -$outfd = fopen('spec'.DS.'spec'.$spec.'.txt','c+'); -ftruncate($outfd,0); -$pipe = new Pipe([ - 'normalize_newlines', - 'inclusions', - $outfd, -]); +$pipes = [ + "txt" => new Pipe([ + 'normalize_newlines', + 'inclusions', + 'to_chars', + 'group_symbols', + 'group_unbreakables', + fopen('spec'.DS.'spec'.$spec.'.txt','w+'), + ]), +]; -// Run data through the pipe -var_dump($pipe); -while(!feof($fd)) $pipe->write(fgets($fd)); -var_dump($pipe); +// Run data through the pipes +while(!feof($fd)) { + $line = fgets($fd); + foreach ($pipes as $pipe) + $pipe->write($line); +} -//// Handle file inclusions -//$temp_fd = fopen('php://temp','c+'); -//while(!feof($process_fd)) { -// $line = fgets($process_fd); -// if(substr($line,0,1)==='<') { -// $fd = fopen('src'.DS.trim(substr($line, 1)) . '.md', 'r'); -// var_dump('src'.DS.trim(substr($line, 1)) . '.md'); -// fcopy($fd, $temp_fd); -// fclose($fd); -// } else { -// fwrite($temp_fd,$line); -// } -//} -// -//// Rewind again -//fseek($process_fd,0); -//ftruncate($process_fd,0); -//fseek($temp_fd,0); -//fcopy($temp_fd,$process_fd); -//fseek($process_fd,0); -//fseek($temp_fd,0); -//ftruncate($temp_fd,0); -// -// -// -// -// -//echo stream_get_contents($process_fd); -// -// -////var_dump($config); -////var_dump($contents); -//var_dump($headers); -//var_dump($params); +// Close the pipes +foreach ($pipes as $pipe) + $pipe->end(); diff --git a/src/0000.md b/src/0000.md @@ -1,136 +0,0 @@ -Date: 2018-08-15 -author: Robin Bron <robin@finwo.nl> -organization: Ratus B.V. - -# Specification Format - -## Conventions - -<RFC2119 - -## Character encoding - -Plain-text files for specifications MUST use the `CP437` standard with the -exclusion of character code 0x0A which represents a line feed as specified in -`RFC20`. - -## Line definition - -A line of text is a sequence of 0 or more characters followed by a line feed -character. For the sake of and clarity, the ending line feed character is part -of the line. - -Lines MUST NOT exceed 72 characters in length, including the ending line feed -character. A line is called a blank line if it consists of only a line feed -charachter. - -### Line numbering - -To ensure the following page dimension section is clear, we need to define how -lines are numbered. - -Assuming a document is in digital format and has a length of greater than -0 bytes, the first character in the document is part of line 0. Numbering lines -from 0 instead of 1 gives us an advantage of clarity in the next section. - -## Pages - -A page is a sequence of 60 lines. That means for every line number n, the line -is the start of a new page when $$ n mod 60 = 0 $$. - -### Page header - -The first line of a page SHOULD consist of a left-aligned spec number -indicator, a centered (short) document title and a right-aligned publishing -date (see [Document header][document header]). The second line of a page MUST -always be blank, excluding the first page of the document. - -### Page footer - -The last line of a page MUST consist of a left-align last name of the author -and a right-aligned page number between square brackets. The second-to-last -line of a page must be blank, just like the second line of a page. - -## Paragraphs - -A paragraph is a sequence of consecutive lines containing characters other than -only a line feed. Paragraphs are separated by either a blank line or a page -break. Paragraphs MUST NOT span multiple pages, limiting their size to 56 -lines. - -## Document header - -The first lines of the first page of a specification document SHALL always -contain left-aligned description headers (see -[Descriptive header][descriptive header]) and right-aligned author -identification and a right-aligned publishing date. - -After the initial lines (see [Descriptive header][descriptive header] through -[Publish date][publish date]), the document title is REQUIRED to be written on -the first page of the document. For it's specification, see section -[document title][document title]. - -Further information on the first page should give a quick description of the -contents of the document. - -### Descriptive header - -Each descriptive header is made up of a key and a value. Whitespace is not -allowed in both the key and the value. Whitespace can only be included in the -value by wrapping the value in quote characters. - -The key of the header consists of all characters of the line up to the first -semicolon, excluding the semicolon itself and omitting all white-space -characters. - -The value of the header starts at the first non-whitespace character after the -first semicolon of the line. If the first character is a quote, the value ends -at the next quote in the line. If the first character is not a quote, the value -ends at the next whitespace character. - -### Short author identification - -In order to allow authors to take some credit and to track who has written -what, the author's name MUST be added right-aligned on the first line of the -first page of the document. To prevent mixing notations between documents, the -names SHOULD be written as only the first letters of all given names in -capitals, separated by dots, a space and the Family name starting with a -capital. When written by a group with a name, the short author identification -string SHOULD state the group's name. - -### Publish date - -Because a document is unlikely to have been written within a day, a publish -date is simply the month's name starting with a capital followed by the year, -both following the Gregorian calendar. - -## Document footer - -The document SHOULD close, starting on a new page, with all informative -resources which were used to write the document, noting their keyword and -document title. When possible, a URL to the resource SHOULD be included. - -After the informative resources, the document SHOULD end with one or several -pages dedicated to the information of the author(s) and if possible their -contact information. - -## Section titles - -Section titles SHOULD be a short text about the subject the section describes. -Whether it is simply the keyword of what it explains, a problem statement or -other type of text is up to the author as long as it's relevant to the -section's body. - -A section title MUST start with a capital character & MUST NOT contain any -other capital letters, excluding where they are required in names or -abbreviations. - -## Document title - -The title of the document should clearly state the main subject of the document -and it's contents. Each word of the document title must start with a capital -character when noted as the title of the document. - -On the first page of the document, the title should be centered horizontally -and have at least 2 blank lines both above and below it. The document title -SHOULD be as close to the document's descriptive headers. diff --git a/src/0000.spec b/src/0000.spec @@ -0,0 +1,127 @@ +Date: 2018-08-15 +author: Robin Bron <robin@finwo.nl> +organization: Ratus B.V. +title: Specification Format + +Convections: + <RFC2119 + +Code block test: + ```javascript + // This is a code block test + console.log('Hello World'); + ``` + +Character encoding: + Plain-text files for specifications MUST use the `CP437` standard with the + exclusion of character code 0x0A which represents a line feed as specified in + `RFC20`. + +Line definition: + A line of text is a sequence of 0 or more characters followed by a line feed + character. For the sake of and clarity, the ending line feed character is + part of the line. + + Lines MUST NOT exceed 72 characters in length, including the ending line feed + character. A line is called a blank line if it consists of only a line feed + character. + +Line numbering: + To ensure the following page dimension section is clear, we need to define + how lines are numbered. + + Assuming a document is in digital format and has a length of greater than + 0 bytes, the first character in the document is part of line 0. Numbering + lines from 0 instead of 1 gives us an advantage of clarity in the next + section. + +Pages: + A page is a sequence of 60 lines. That means for every line number n, the + line is the start of a new page when $$ n mod 60 = 0 $$. + +Pages:Page header: + The first line of a page SHOULD consist of a left-aligned spec number + indicator, a centered (short) document title and a right-aligned publishing + date (see [Document header][document header]). The second line of a page MUST + always be blank, excluding the first page of the document. + +Pages:Page footer: + The last line of a page MUST consist of a left-align last name of the author + and a right-aligned page number between square brackets. The second-to-last + line of a page must be blank, just like the second line of a page. + +Paragraphs: + A paragraph is a sequence of consecutive lines containing characters other + than only a line feed. Paragraphs are separated by either a blank line or a + page break. Paragraphs MUST NOT span multiple pages, limiting their size to + 56 lines. + +Document header: + The first lines of the first page of a specification document SHALL always + contain left-aligned description headers (see + [Descriptive header][descriptive header]) and right-aligned author + identification and a right-aligned publishing date. + + After the initial lines (see [Descriptive header][descriptive header] through + [Publish date][publish date]), the document title is REQUIRED to be written + on the first page of the document. For it's specification, see section + [document title][document title]. + + Further information on the first page should give a quick description of the + contents of the document. + +Document header:Descriptive header: + Each descriptive header is made up of a key and a value. Whitespace is not + allowed in both the key and the value. Whitespace can only be included in the + value by wrapping the value in quote characters. + + The key of the header consists of all characters of the line up to the first + semicolon, excluding the semicolon itself and omitting all white-space + characters. + + The value of the header starts at the first non-whitespace character after + the first semicolon of the line. If the first character is a quote, the value + ends at the next quote in the line. If the first character is not a quote, + the value ends at the next whitespace character. + +Document header:Short author identification: + In order to allow authors to take some credit and to track who has written + what, the author's name MUST be added right-aligned on the first line of the + first page of the document. To prevent mixing notations between documents, + the names SHOULD be written as only the first letters of all given names in + capitals, separated by dots, a space and the Family name starting with a + capital. When written by a group with a name, the short author identification + string SHOULD state the group's name. + +Document header:Publish date: + Because a document is unlikely to have been written within a day, a publish + date is simply the month's name starting with a capital followed by the year, + both following the Gregorian calendar. + +Document footer: + The document SHOULD close, starting on a new page, with all informative + resources which were used to write the document, noting their keyword and + document title. When possible, a URL to the resource SHOULD be included. + + After the informative resources, the document SHOULD end with one or several + pages dedicated to the information of the author(s) and if possible their + contact information. + +Section titles: + Section titles SHOULD be a short text about the subject the section + describes. Whether it is simply the keyword of what it explains, a problem + statement or other type of text is up to the author as long as it's relevant + to the section's body. + + A section title MUST start with a capital character & MUST NOT contain any + other capital letters, excluding where they are required in names or + abbreviations. + +Document title: + The title of the document should clearly state the main subject of the + document and it's contents. Each word of the document title must start with a + capital character when noted as the title of the document. + + On the first page of the document, the title should be centered horizontally + and have at least 2 blank lines both above and below it. The document title + SHOULD be as close to the document's descriptive headers as possible. diff --git a/src/RFC2119.md b/src/RFC2119.spec