/** * PEG.js grammar for reading MediaWiki parser tests files * 2011-07-20 Brion Vibber */ { /* File-scope initializer */ namespace Wikimedia\Parsoid\ParserTests; use Wikimedia\Parsoid\Utils\PHPUtils; } { /* * Empty class-scope initializer. * * Needed so the initializer above doesn't * become the class-scope initializer. */ } testfile = format? chunk+ eol = "\n" whitespace = [ \t]+ ws = whitespace rest_of_line = c:([^\n]*) eol { return implode($c); } line = (!"!!") line:rest_of_line { return $line; } text = lines:line* { return implode("\n", $lines); } chunk = comment / article / test / l: line { return [ 'type' => 'line', 'text' => $l ]; } / hooks / functionhooks format = "!!" ws? version_keyword ws+ v:$([0-9]+) rest_of_line { return [ 'type' => 'version', 'text' => $v ]; } version_keyword = [vV] [eE] [rR] [sS] [iI] [oO] [nN] comment = "#" text:rest_of_line { return [ 'type' => 'comment', 'text' => $text ]; } empty = eol / ws eol { return [ 'type' => 'empty' ]; } article = start_article title:line start_text text:text end_article { return [ 'type' => 'article', 'title' => $title, 'text' => $text ]; } start_article = "!!" ws? "article" ws? eol start_text = "!!" ws? "text" ws? eol end_article = "!!" ws? "endarticle" ws? eol // function hooks functionhooks = start_functionhooks text:text end_functionhooks { return [ 'type' => 'functionhooks', 'text' => $text ]; } start_functionhooks = "!!" ws? "functionhooks" ":"? ws? eol end_functionhooks = "!!" ws? "endfunctionhooks" ":"? ws? eol end_test = "!!" ws? "end" ws? eol test = start_test title:text sections:(section / option_section)* end_test { $test = [ 'type' => 'test', 'title' => $title ]; foreach ( $sections as $section ) { $test[$section['name']] = $section['text']; } // pegjs parser handles item options as follows: // item option value of item.options.parsoid // undefined // parsoid "" // parsoid=wt2html "wt2html" // parsoid=wt2html,wt2wt ["wt2html","wt2wt"] // parsoid={"modes":["wt2wt"]} {modes:['wt2wt']} // treat 'parsoid=xxx,yyy' in options section as shorthand for // 'parsoid={modes:["xxx","yyy"]}' if ( isset($test['options']['parsoid'] ) ) { if ($test['options']['parsoid'] === '') { $test['options']['parsoid'] = []; } if ( is_string( $test['options']['parsoid'] ) ) { $test['options']['parsoid'] = [ $test['options']['parsoid'] ]; } if ( is_array( $test['options']['parsoid'] ) && !isset( $test['options']['parsoid']['modes'] ) ) { $test['options']['parsoid'] = [ 'modes' => $test['options']['parsoid'] ]; } } return $test; } section = "!!" ws? (!"end") (!"options") name:(c:[^ \t\r\n]+ { return implode( $c ); }) rest_of_line text:text { return [ 'name' => $name, 'text' => $text ]; } option_section = "!!" ws? "options" ws? eol opts:option_list? { $o = []; if ( $opts && count($opts) > 0 ) { foreach ( $opts as $opt ) { $o[$opt['k']] = $opt['v']; } } return [ 'name' => 'options', 'text' => $o ]; } option_list = o:an_option [ \t\n]+ rest:option_list? { $result = [ $o ]; if ( $rest && count( $rest ) > 0 ) { $result = array_merge( $result, $rest ); } return $result; } // from PHP parser in tests/parser/parserTest.inc:parseOptions() // foo // foo=bar // foo="bar baz" // foo=[[bar baz]] // foo={...json...} // foo=bar,"baz quux",[[bat]] an_option = k:option_name v:option_value? { return [ 'k' => strtolower( $k ), 'v' => $v ?? '' ]; } option_name = c:[^ \t\n=!]+ { return implode($c); } option_value = ws? "=" ws? ovl:option_value_list { return count( $ovl ) === 1 ? $ovl[0] : $ovl; } option_value_list = v:an_option_value rest:( ws? "," ws? ovl:option_value_list { return $ovl; })? { $result = [ $v ]; if ( $rest && count( $rest ) > 0 ) { $result = array_merge( $result, $rest ); } return $result; } an_option_value = v:(link_target_value / quoted_value / plain_value / json_value) { if ( $v[0] === '"' || $v[0] === '{' ) { // } is needed to make pegjs happy return PHPUtils::jsonDecode( $v, false ); } return $v; } link_target_value = "[[" v:[^\]]* "]]" { // Perhaps we should canonicalize the title? // Protect with JSON.stringify just in case the link target starts with // double-quote or open-brace. return PHPUtils::jsonEncode( implode( $v ) ); } quoted_value = [\"] v:( [^\\\"] / ("\\" c:. { return "\\" . $c; } ) )* [\"] { return '"' . implode( $v ) . '"'; } plain_value = v:[^ \t\n\"\'\[\]=,!\{]+ { return implode( $v ); } json_value = "{" v:( [^\"\{\}] / quoted_value / json_value )* "}" { return "{" . implode( $v ) . "}"; } start_test = "!!" ws? "test" ws? eol end_test = "!!" ws? "end" ws? eol hooks = start_hooks text:text end_hooks { return [ 'type' => 'hooks', 'text' => $text ]; } start_hooks = "!!" ws? "hooks" ":"? ws? eol end_hooks = "!!" ws? "endhooks" ws? eol