%{ /* Original C version https://github.com/jgm/peg-markdown/ * Copyright 2008 John MacFarlane (jgm at berkeley dot edu). * * Modifications and translation from C into Go * based on markdown_parser.leg and utility_functions.c * Copyright 2010 Michael Teichgräber (mt at wmipf dot de) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License or the MIT * license. See LICENSE for details. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ package markdown // PEG grammar and parser actions for markdown syntax. import ( "fmt" "strings" "log" "sync" ) // Semantic value of a parsing action. type element struct { key int contents children *element next *element } // Information (label, URL and title) for a link. type link struct { label *element url string title string } // Union for contents of an Element (string, list, or link). type contents struct { str string *link } // Types of semantic values returned by parsers. const ( LIST = iota /* A generic list of values. For ordered and bullet lists, see below. */ RAW /* Raw markdown to be processed further */ SPACE LINEBREAK ELLIPSIS EMDASH ENDASH APOSTROPHE SINGLEQUOTED DOUBLEQUOTED STR LINK IMAGE CODE HTML EMPH STRONG PLAIN PARA LISTITEM BULLETLIST ORDEREDLIST H1 /* Code assumes that H1..6 are in order. */ H2 H3 H4 H5 H6 BLOCKQUOTE VERBATIM HTMLBLOCK HRULE REFERENCE NOTE DEFINITIONLIST DEFTITLE DEFDATA numVAL ) type Doc struct { parser *yyParser extension Extensions tree *element /* Results of parse. */ references *element /* List of link references found. */ notes *element /* List of footnotes found. */ } %} %userstate *Doc %YYSTYPE *element Doc = a:StartList ( Block { a = cons($$, a) } )* { p.tree = reverse(a) } commit Block = BlankLine* ( BlockQuote | Verbatim | Note | Reference | HorizontalRule | Heading | DefinitionList | OrderedList | BulletList | HtmlBlock | StyleBlock | Para | Plain ) Para = NonindentSpace a:Inlines BlankLine+ { $$ = a; $$.key = PARA } Plain = a:Inlines { $$ = a; $$.key = PLAIN } AtxInline = !Newline !(Sp '#'* Sp Newline) Inline AtxStart = &'#' < ( "######" | "#####" | "####" | "###" | "##" | "#" ) > { $$ = mk_element(H1 + (len(yytext) - 1)) } AtxHeading = s:AtxStart Sp a:StartList ( AtxInline { a = cons($$, a) } )+ (Sp '#'* Sp)? Newline { $$ = mk_list(s.key, a) s = nil } SetextHeading = SetextHeading1 | SetextHeading2 SetextBottom1 = "===" '='* Newline SetextBottom2 = "---" '-'* Newline SetextHeading1 = &(RawLine SetextBottom1) a:StartList ( !Endline Inline { a = cons($$, a) } )+ Newline SetextBottom1 { $$ = mk_list(H1, a) } SetextHeading2 = &(RawLine SetextBottom2) a:StartList ( !Endline Inline { a = cons($$, a) } )+ Newline SetextBottom2 { $$ = mk_list(H2, a) } Heading = AtxHeading | SetextHeading BlockQuote = a:BlockQuoteRaw { $$ = mk_element(BLOCKQUOTE) $$.children = a } BlockQuoteRaw = a:StartList (( '>' ' '? Line { a = cons($$, a) } ) ( !'>' !BlankLine Line { a = cons($$, a) } )* ( BlankLine { a = cons(mk_str("\n"), a) } )* )+ { $$ = mk_str_from_list(a, true) $$.key = RAW } NonblankIndentedLine = !BlankLine IndentedLine VerbatimChunk = a:StartList ( BlankLine { a = cons(mk_str("\n"), a) } )* ( NonblankIndentedLine { a = cons($$, a) } )+ { $$ = mk_str_from_list(a, false) } Verbatim = a:StartList ( VerbatimChunk { a = cons($$, a) } )+ { $$ = mk_str_from_list(a, false) $$.key = VERBATIM } HorizontalRule = NonindentSpace ( '*' Sp '*' Sp '*' (Sp '*')* | '-' Sp '-' Sp '-' (Sp '-')* | '_' Sp '_' Sp '_' (Sp '_')*) Sp Newline BlankLine+ { $$ = mk_element(HRULE) } Bullet = !HorizontalRule NonindentSpace ('+' | '*' | '-') Spacechar+ BulletList = &Bullet (ListTight | ListLoose) { $$.key = BULLETLIST } ListTight = a:StartList ( ListItem { a = cons($$, a) } )+ BlankLine* !(Bullet | Enumerator | DefMarker) { $$ = mk_list(LIST, a) } ListLoose = a:StartList ( b:ListItem BlankLine* { li := b.children li.contents.str += "\n\n" a = cons(b, a) } )+ { $$ = mk_list(LIST, a) } ListItem = ( Bullet | Enumerator | DefMarker ) a:StartList ListBlock { a = cons($$, a) } ( ListContinuationBlock { a = cons($$, a) } )* { raw := mk_str_from_list(a, false) raw.key = RAW $$ = mk_element(LISTITEM) $$.children = raw } ListBlock = a:StartList Line { a = cons($$, a) } ( ListBlockLine { a = cons($$, a) } )* { $$ = mk_str_from_list(a, false) } ListContinuationBlock = a:StartList ( < BlankLine* > { if len(yytext) == 0 { a = cons(mk_str("\001"), a) // block separator } else { a = cons(mk_str(yytext), a) } } ) ( Indent ListBlock { a = cons($$, a) } )+ { $$ = mk_str_from_list(a, false) } Enumerator = NonindentSpace [0-9]+ '.' Spacechar+ OrderedList = &Enumerator (ListTight | ListLoose) { $$.key = ORDEREDLIST } ListBlockLine = !( (Indent? (Bullet | Enumerator)) | DefMarker ) !BlankLine !HorizontalRule OptionallyIndentedLine # Parsers for different kinds of block-level HTML content. # This is repetitive due to constraints of PEG grammar. HtmlBlockOpenAddress = '<' Spnl ("address" | "ADDRESS") Spnl HtmlAttribute* '>' HtmlBlockCloseAddress = '<' Spnl '/' ("address" | "ADDRESS") Spnl '>' HtmlBlockOpenBlockquote = '<' Spnl ("blockquote" | "BLOCKQUOTE") Spnl HtmlAttribute* '>' HtmlBlockCloseBlockquote = '<' Spnl '/' ("blockquote" | "BLOCKQUOTE") Spnl '>' HtmlBlockOpenCenter = '<' Spnl ("center" | "CENTER") Spnl HtmlAttribute* '>' HtmlBlockCloseCenter = '<' Spnl '/' ("center" | "CENTER") Spnl '>' HtmlBlockOpenDir = '<' Spnl ("dir" | "DIR") Spnl HtmlAttribute* '>' HtmlBlockCloseDir = '<' Spnl '/' ("dir" | "DIR") Spnl '>' HtmlBlockOpenDiv = '<' Spnl ("div" | "DIV") Spnl HtmlAttribute* '>' HtmlBlockCloseDiv = '<' Spnl '/' ("div" | "DIV") Spnl '>' HtmlBlockOpenDl = '<' Spnl ("dl" | "DL") Spnl HtmlAttribute* '>' HtmlBlockCloseDl = '<' Spnl '/' ("dl" | "DL") Spnl '>' HtmlBlockOpenFieldset = '<' Spnl ("fieldset" | "FIELDSET") Spnl HtmlAttribute* '>' HtmlBlockCloseFieldset = '<' Spnl '/' ("fieldset" | "FIELDSET") Spnl '>' HtmlBlockOpenForm = '<' Spnl ("form" | "FORM") Spnl HtmlAttribute* '>' HtmlBlockCloseForm = '<' Spnl '/' ("form" | "FORM") Spnl '>' HtmlBlockOpenH1 = '<' Spnl ("h1" | "H1") Spnl HtmlAttribute* '>' HtmlBlockCloseH1 = '<' Spnl '/' ("h1" | "H1") Spnl '>' HtmlBlockOpenH2 = '<' Spnl ("h2" | "H2") Spnl HtmlAttribute* '>' HtmlBlockCloseH2 = '<' Spnl '/' ("h2" | "H2") Spnl '>' HtmlBlockOpenH3 = '<' Spnl ("h3" | "H3") Spnl HtmlAttribute* '>' HtmlBlockCloseH3 = '<' Spnl '/' ("h3" | "H3") Spnl '>' HtmlBlockOpenH4 = '<' Spnl ("h4" | "H4") Spnl HtmlAttribute* '>' HtmlBlockCloseH4 = '<' Spnl '/' ("h4" | "H4") Spnl '>' HtmlBlockOpenH5 = '<' Spnl ("h5" | "H5") Spnl HtmlAttribute* '>' HtmlBlockCloseH5 = '<' Spnl '/' ("h5" | "H5") Spnl '>' HtmlBlockOpenH6 = '<' Spnl ("h6" | "H6") Spnl HtmlAttribute* '>' HtmlBlockCloseH6 = '<' Spnl '/' ("h6" | "H6") Spnl '>' HtmlBlockOpenMenu = '<' Spnl ("menu" | "MENU") Spnl HtmlAttribute* '>' HtmlBlockCloseMenu = '<' Spnl '/' ("menu" | "MENU") Spnl '>' HtmlBlockOpenNoframes = '<' Spnl ("noframes" | "NOFRAMES") Spnl HtmlAttribute* '>' HtmlBlockCloseNoframes = '<' Spnl '/' ("noframes" | "NOFRAMES") Spnl '>' HtmlBlockOpenNoscript = '<' Spnl ("noscript" | "NOSCRIPT") Spnl HtmlAttribute* '>' HtmlBlockCloseNoscript = '<' Spnl '/' ("noscript" | "NOSCRIPT") Spnl '>' HtmlBlockOpenOl = '<' Spnl ("ol" | "OL") Spnl HtmlAttribute* '>' HtmlBlockCloseOl = '<' Spnl '/' ("ol" | "OL") Spnl '>' HtmlBlockOpenP = '<' Spnl ("p" | "P") Spnl HtmlAttribute* '>' HtmlBlockCloseP = '<' Spnl '/' ("p" | "P") Spnl '>' HtmlBlockOpenPre = '<' Spnl ("pre" | "PRE") Spnl HtmlAttribute* '>' HtmlBlockClosePre = '<' Spnl '/' ("pre" | "PRE") Spnl '>' HtmlBlockOpenTable = '<' Spnl ("table" | "TABLE") Spnl HtmlAttribute* '>' HtmlBlockCloseTable = '<' Spnl '/' ("table" | "TABLE") Spnl '>' HtmlBlockOpenUl = '<' Spnl ("ul" | "UL") Spnl HtmlAttribute* '>' HtmlBlockCloseUl = '<' Spnl '/' ("ul" | "UL") Spnl '>' HtmlBlockOpenDd = '<' Spnl ("dd" | "DD") Spnl HtmlAttribute* '>' HtmlBlockCloseDd = '<' Spnl '/' ("dd" | "DD") Spnl '>' HtmlBlockOpenDt = '<' Spnl ("dt" | "DT") Spnl HtmlAttribute* '>' HtmlBlockCloseDt = '<' Spnl '/' ("dt" | "DT") Spnl '>' HtmlBlockOpenFrameset = '<' Spnl ("frameset" | "FRAMESET") Spnl HtmlAttribute* '>' HtmlBlockCloseFrameset = '<' Spnl '/' ("frameset" | "FRAMESET") Spnl '>' HtmlBlockOpenLi = '<' Spnl ("li" | "LI") Spnl HtmlAttribute* '>' HtmlBlockCloseLi = '<' Spnl '/' ("li" | "LI") Spnl '>' HtmlBlockOpenTbody = '<' Spnl ("tbody" | "TBODY") Spnl HtmlAttribute* '>' HtmlBlockCloseTbody = '<' Spnl '/' ("tbody" | "TBODY") Spnl '>' HtmlBlockOpenTd = '<' Spnl ("td" | "TD") Spnl HtmlAttribute* '>' HtmlBlockCloseTd = '<' Spnl '/' ("td" | "TD") Spnl '>' HtmlBlockOpenTfoot = '<' Spnl ("tfoot" | "TFOOT") Spnl HtmlAttribute* '>' HtmlBlockCloseTfoot = '<' Spnl '/' ("tfoot" | "TFOOT") Spnl '>' HtmlBlockOpenTh = '<' Spnl ("th" | "TH") Spnl HtmlAttribute* '>' HtmlBlockCloseTh = '<' Spnl '/' ("th" | "TH") Spnl '>' HtmlBlockOpenThead = '<' Spnl ("thead" | "THEAD") Spnl HtmlAttribute* '>' HtmlBlockCloseThead = '<' Spnl '/' ("thead" | "THEAD") Spnl '>' HtmlBlockOpenTr = '<' Spnl ("tr" | "TR") Spnl HtmlAttribute* '>' HtmlBlockCloseTr = '<' Spnl '/' ("tr" | "TR") Spnl '>' HtmlBlockOpenScript = '<' Spnl ("script" | "SCRIPT") Spnl HtmlAttribute* '>' HtmlBlockCloseScript = '<' Spnl '/' ("script" | "SCRIPT") Spnl '>' HtmlBlockInTags = HtmlBlockOpenAddress (HtmlBlockInTags | !HtmlBlockCloseAddress .)* HtmlBlockCloseAddress | HtmlBlockOpenBlockquote (HtmlBlockInTags | !HtmlBlockCloseBlockquote .)* HtmlBlockCloseBlockquote | HtmlBlockOpenCenter (HtmlBlockInTags | !HtmlBlockCloseCenter .)* HtmlBlockCloseCenter | HtmlBlockOpenDir (HtmlBlockInTags | !HtmlBlockCloseDir .)* HtmlBlockCloseDir | HtmlBlockOpenDiv (HtmlBlockInTags | !HtmlBlockCloseDiv .)* HtmlBlockCloseDiv | HtmlBlockOpenDl (HtmlBlockInTags | !HtmlBlockCloseDl .)* HtmlBlockCloseDl | HtmlBlockOpenFieldset (HtmlBlockInTags | !HtmlBlockCloseFieldset .)* HtmlBlockCloseFieldset | HtmlBlockOpenForm (HtmlBlockInTags | !HtmlBlockCloseForm .)* HtmlBlockCloseForm | HtmlBlockOpenH1 (HtmlBlockInTags | !HtmlBlockCloseH1 .)* HtmlBlockCloseH1 | HtmlBlockOpenH2 (HtmlBlockInTags | !HtmlBlockCloseH2 .)* HtmlBlockCloseH2 | HtmlBlockOpenH3 (HtmlBlockInTags | !HtmlBlockCloseH3 .)* HtmlBlockCloseH3 | HtmlBlockOpenH4 (HtmlBlockInTags | !HtmlBlockCloseH4 .)* HtmlBlockCloseH4 | HtmlBlockOpenH5 (HtmlBlockInTags | !HtmlBlockCloseH5 .)* HtmlBlockCloseH5 | HtmlBlockOpenH6 (HtmlBlockInTags | !HtmlBlockCloseH6 .)* HtmlBlockCloseH6 | HtmlBlockOpenMenu (HtmlBlockInTags | !HtmlBlockCloseMenu .)* HtmlBlockCloseMenu | HtmlBlockOpenNoframes (HtmlBlockInTags | !HtmlBlockCloseNoframes .)* HtmlBlockCloseNoframes | HtmlBlockOpenNoscript (HtmlBlockInTags | !HtmlBlockCloseNoscript .)* HtmlBlockCloseNoscript | HtmlBlockOpenOl (HtmlBlockInTags | !HtmlBlockCloseOl .)* HtmlBlockCloseOl | HtmlBlockOpenP (HtmlBlockInTags | !HtmlBlockCloseP .)* HtmlBlockCloseP | HtmlBlockOpenPre (HtmlBlockInTags | !HtmlBlockClosePre .)* HtmlBlockClosePre | HtmlBlockOpenTable (HtmlBlockInTags | !HtmlBlockCloseTable .)* HtmlBlockCloseTable | HtmlBlockOpenUl (HtmlBlockInTags | !HtmlBlockCloseUl .)* HtmlBlockCloseUl | HtmlBlockOpenDd (HtmlBlockInTags | !HtmlBlockCloseDd .)* HtmlBlockCloseDd | HtmlBlockOpenDt (HtmlBlockInTags | !HtmlBlockCloseDt .)* HtmlBlockCloseDt | HtmlBlockOpenFrameset (HtmlBlockInTags | !HtmlBlockCloseFrameset .)* HtmlBlockCloseFrameset | HtmlBlockOpenLi (HtmlBlockInTags | !HtmlBlockCloseLi .)* HtmlBlockCloseLi | HtmlBlockOpenTbody (HtmlBlockInTags | !HtmlBlockCloseTbody .)* HtmlBlockCloseTbody | HtmlBlockOpenTd (HtmlBlockInTags | !HtmlBlockCloseTd .)* HtmlBlockCloseTd | HtmlBlockOpenTfoot (HtmlBlockInTags | !HtmlBlockCloseTfoot .)* HtmlBlockCloseTfoot | HtmlBlockOpenTh (HtmlBlockInTags | !HtmlBlockCloseTh .)* HtmlBlockCloseTh | HtmlBlockOpenThead (HtmlBlockInTags | !HtmlBlockCloseThead .)* HtmlBlockCloseThead | HtmlBlockOpenTr (HtmlBlockInTags | !HtmlBlockCloseTr .)* HtmlBlockCloseTr | HtmlBlockOpenScript (HtmlBlockInTags | !HtmlBlockCloseScript .)* HtmlBlockCloseScript HtmlBlock = &'<' < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) > BlankLine+ { if p.extension.FilterHTML { $$ = mk_list(LIST, nil) } else { $$ = mk_str(yytext) $$.key = HTMLBLOCK } } HtmlBlockSelfClosing = '<' Spnl HtmlBlockType Spnl HtmlAttribute* '/' Spnl '>' HtmlBlockType = "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" | "ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" | "ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" | "H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE" | "TABLE" | "UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT" StyleOpen = '<' Spnl ("style" | "STYLE") Spnl HtmlAttribute* '>' StyleClose = '<' Spnl '/' ("style" | "STYLE") Spnl '>' InStyleTags = StyleOpen (!StyleClose .)* StyleClose StyleBlock = < InStyleTags > BlankLine* { if p.extension.FilterStyles { $$ = mk_list(LIST, nil) } else { $$ = mk_str(yytext) $$.key = HTMLBLOCK } } Inlines = a:StartList ( !Endline Inline { a = cons($$, a) } | c:Endline &Inline { a = cons(c, a) } )+ Endline? { $$ = mk_list(LIST, a) } Inline = Str | Endline | UlOrStarLine | Space | Strong | Emph | Image | Link | NoteReference | InlineNote | Code | RawHtml | Entity | EscapedChar | Smart | Symbol Space = Spacechar+ { $$ = mk_str(" ") $$.key = SPACE } Str = < NormalChar (NormalChar | '_'+ &Alphanumeric)* > { $$ = mk_str(yytext) } EscapedChar = '\\' !Newline < [-\\`|*_{}[\]()#+.!><] > { $$ = mk_str(yytext) } Entity = ( HexEntity | DecEntity | CharEntity ) { $$ = mk_str(yytext); $$.key = HTML } Endline = LineBreak | TerminalEndline | NormalEndline NormalEndline = Sp Newline !BlankLine !'>' !AtxStart !(Line ("===" '='* | "---" '-'*) Newline) { $$ = mk_str("\n") $$.key = SPACE } TerminalEndline = Sp Newline Eof { $$ = nil } LineBreak = " " NormalEndline { $$ = mk_element(LINEBREAK) } Symbol = < SpecialChar > { $$ = mk_str(yytext) } # This keeps the parser from getting bogged down on long strings of '*' or '_', # or strings of '*' or '_' with space on each side: UlOrStarLine = (UlLine | StarLine) { $$ = mk_str(yytext) } StarLine = < "****" '*'* > | < Spacechar '*'+ &Spacechar > UlLine = < "____" '_'* > | < Spacechar '_'+ &Spacechar > Emph = EmphStar | EmphUl OneStarOpen = !StarLine '*' !Spacechar !Newline OneStarClose = !Spacechar !Newline a:Inline !StrongStar '*' { $$ = a } EmphStar = OneStarOpen a:StartList ( !OneStarClose Inline { a = cons($$, a) } )* OneStarClose { a = cons($$, a) } { $$ = mk_list(EMPH, a) } OneUlOpen = !UlLine '_' !Spacechar !Newline OneUlClose = !Spacechar !Newline a:Inline !StrongUl '_' !Alphanumeric { $$ = a } EmphUl = OneUlOpen a:StartList ( !OneUlClose Inline { a = cons($$, a) } )* OneUlClose { a = cons($$, a) } { $$ = mk_list(EMPH, a) } Strong = StrongStar | StrongUl TwoStarOpen = !StarLine "**" !Spacechar !Newline TwoStarClose = !Spacechar !Newline a:Inline "**" { $$ = a } StrongStar = TwoStarOpen a:StartList ( !TwoStarClose Inline { a = cons($$, a) } )* TwoStarClose { a = cons($$, a) } { $$ = mk_list(STRONG, a) } TwoUlOpen = !UlLine "__" !Spacechar !Newline TwoUlClose = !Spacechar !Newline a:Inline "__" !Alphanumeric { $$ = a } StrongUl = TwoUlOpen a:StartList ( !TwoUlClose Inline { a = cons($$, a) } )* TwoUlClose { a = cons($$, a) } { $$ = mk_list(STRONG, a) } Image = '!' ( ExplicitLink | ReferenceLink ) { if $$.key == LINK { $$.key = IMAGE } else { result := $$ $$.children = cons(mk_str("!"), result.children) } } Link = ExplicitLink | ReferenceLink | AutoLink ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle ReferenceLinkDouble = a:Label < Spnl > !"[]" b:Label { if match, found := p.findReference(b.children); found { $$ = mk_link(a.children, match.url, match.title); a = nil b = nil } else { result := mk_element(LIST) result.children = cons(mk_str("["), cons(a, cons(mk_str("]"), cons(mk_str(yytext), cons(mk_str("["), cons(b, mk_str("]"))))))) $$ = result } } ReferenceLinkSingle = a:Label < (Spnl "[]")? > { if match, found := p.findReference(a.children); found { $$ = mk_link(a.children, match.url, match.title) a = nil } else { result := mk_element(LIST) result.children = cons(mk_str("["), cons(a, cons(mk_str("]"), mk_str(yytext)))); $$ = result } } ExplicitLink = l:Label Spnl '(' Sp s:Source Spnl t:Title Sp ')' { $$ = mk_link(l.children, s.contents.str, t.contents.str) s = nil t = nil l = nil } Source = ( '<' < SourceContents > '>' | < SourceContents > ) { $$ = mk_str(yytext) } SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')* | "" Title = ( TitleSingle | TitleDouble | < "" > ) { $$ = mk_str(yytext) } TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) . )* > '\'' TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) . )* > '"' AutoLink = AutoLinkUrl | AutoLinkEmail AutoLinkUrl = '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>' { $$ = mk_link(mk_str(yytext), yytext, "") } AutoLinkEmail = '<' < [-A-Za-z0-9+_]+ '@' ( !Newline !'>' . )+ > '>' { $$ = mk_link(mk_str(yytext), "mailto:"+yytext, "") } Reference = NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc Spnl t:RefTitle BlankLine* { $$ = mk_link(l.children, s.contents.str, t.contents.str) s = nil t = nil l = nil $$.key = REFERENCE } Label = '[' ( !'^' &{ p.extension.Notes } | &. &{ !p.extension.Notes } ) a:StartList ( !']' Inline { a = cons($$, a) } )* ']' { $$ = mk_list(LIST, a) } RefSrc = < Nonspacechar+ > { $$ = mk_str(yytext) $$.key = HTML } RefTitle = ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle ) { $$ = mk_str(yytext) } EmptyTitle = < "" > RefTitleSingle = '\'' < ( !( '\'' Sp Newline | Newline ) . )* > '\'' RefTitleDouble = '"' < ( !('"' Sp Newline | Newline) . )* > '"' RefTitleParens = '(' < ( !(')' Sp Newline | Newline) . )* > ')' References = a:StartList ( b:Reference { a = cons(b, a) } | SkipBlock )* { p.references = reverse(a) } commit Ticks1 = "`" !'`' Ticks2 = "``" !'`' Ticks3 = "```" !'`' Ticks4 = "````" !'`' Ticks5 = "`````" !'`' Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks1 | Ticks2 Sp < ( ( !'`' Nonspacechar )+ | !Ticks2 '`'+ | !( Sp Ticks2 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks2 | Ticks3 Sp < ( ( !'`' Nonspacechar )+ | !Ticks3 '`'+ | !( Sp Ticks3 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks3 | Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4 | Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5 ) { $$ = mk_str(yytext); $$.key = CODE } RawHtml = < (HtmlComment | HtmlTag) > { if p.extension.FilterHTML { $$ = mk_list(LIST, nil) } else { $$ = mk_str(yytext) $$.key = HTML } } BlankLine = Sp Newline Quoted = '"' (!'"' .)* '"' | '\'' (!'\'' .)* '\'' HtmlAttribute = (AlphanumericAscii | '-')+ Spnl ('=' Spnl (Quoted | (!'>' Nonspacechar)+))? Spnl HtmlComment = "" .)* "-->" HtmlTag = '<' Spnl '/'? AlphanumericAscii+ Spnl HtmlAttribute* '/'? Spnl '>' Eof = !. Spacechar = ' ' | '\t' Nonspacechar = !Spacechar !Newline . Newline = '\n' | '\r' '\n'? Sp = Spacechar* Spnl = Sp (Newline Sp)? SpecialChar = '*' | '_' | '`' | '&' | '[' | ']' | '<' | '!' | '\\' | ExtendedSpecialChar NormalChar = !( SpecialChar | Spacechar | Newline ) . NonAlphanumeric = [\000-\057\072-\100\133-\140\173-\177] Alphanumeric = [0-9A-Za-z\200-\377] AlphanumericAscii = [A-Za-z0-9] Digit = [0-9] HexEntity = < '&' '#' [Xx] [0-9a-fA-F]+ ';' > DecEntity = < '&' '#' [0-9]+ > ';' > CharEntity = < '&' [A-Za-z0-9]+ ';' > NonindentSpace = " " | " " | " " | "" Indent = "\t" | " " IndentedLine = Indent Line OptionallyIndentedLine = Indent? Line # StartList starts a list data structure that can be added to with cons: StartList = &. { $$ = nil } Line = RawLine { $$ = mk_str(yytext) } RawLine = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof ) SkipBlock = ( !BlankLine RawLine )+ BlankLine* | BlankLine+ # Syntax extensions ExtendedSpecialChar = &{ p.extension.Smart } ('.' | '-' | '\'' | '"') | &{ p.extension.Notes } ( '^' ) Smart = &{ p.extension.Smart } ( Ellipsis | Dash | SingleQuoted | DoubleQuoted | Apostrophe ) Apostrophe = '\'' { $$ = mk_element(APOSTROPHE) } Ellipsis = ("..." | ". . .") { $$ = mk_element(ELLIPSIS) } Dash = EmDash | EnDash EnDash = '-' &Digit { $$ = mk_element(ENDASH) } EmDash = ("---" | "--") { $$ = mk_element(EMDASH) } SingleQuoteStart = '\'' ![)!\],.;:-? \t\n] !( ( "s" | "t" | "m" | "ve" | "ll" | "re" ) !Alphanumeric ) SingleQuoteEnd = '\'' !Alphanumeric SingleQuoted = SingleQuoteStart a:StartList ( !SingleQuoteEnd b:Inline { a = cons(b, a) } )+ SingleQuoteEnd { $$ = mk_list(SINGLEQUOTED, a) } DoubleQuoteStart = '"' DoubleQuoteEnd = '"' DoubleQuoted = DoubleQuoteStart a:StartList ( !DoubleQuoteEnd b:Inline { a = cons(b, a) } )+ DoubleQuoteEnd { $$ = mk_list(DOUBLEQUOTED, a) } NoteReference = &{ p.extension.Notes } ref:RawNoteReference { if match, ok := p.find_note(ref.contents.str); ok { $$ = mk_element(NOTE) $$.children = match.children $$.contents.str = "" } else { $$ = mk_str("[^"+ref.contents.str+"]") } } RawNoteReference = "[^" < ( !Newline !']' . )+ > ']' { $$ = mk_str(yytext) } Note = &{ p.extension.Notes } NonindentSpace ref:RawNoteReference ':' Sp a:StartList ( RawNoteBlock { a = cons($$, a) } ) ( &Indent RawNoteBlock { a = cons($$, a) } )* { $$ = mk_list(NOTE, a) $$.contents.str = ref.contents.str } InlineNote = &{ p.extension.Notes } "^[" a:StartList ( !']' Inline { a = cons($$, a) } )+ ']' { $$ = mk_list(NOTE, a) $$.contents.str = "" } Notes = a:StartList ( b:Note { a = cons(b, a) } | SkipBlock )* { p.notes = reverse(a) } commit RawNoteBlock = a:StartList ( !BlankLine OptionallyIndentedLine { a = cons($$, a) } )+ ( < BlankLine* > { a = cons(mk_str(yytext), a) } ) { $$ = mk_str_from_list(a, true) $$.key = RAW } DefinitionList = &{ p.extension.Dlists } a:StartList ( Definition { a = cons($$, a) } )+ { $$ = mk_list(DEFINITIONLIST, a) } Definition = &( (!Defmark RawLine)+ BlankLine? Defmark) a:StartList ( DListTitle { a = cons($$, a) } )+ ( DefTight | DefLoose ) { for e := $$.children; e != nil; e = e.next { e.key = DEFDATA } a = cons($$, a) } { $$ = mk_list(LIST, a) } DListTitle = NonindentSpace !Defmark &Nonspacechar a:StartList (!Endline Inline { a = cons($$, a) } )+ Sp Newline { $$ = mk_list(LIST, a) $$.key = DEFTITLE } DefTight = &Defmark ListTight DefLoose = BlankLine &Defmark ListLoose Defmark = NonindentSpace (':' | '~') Spacechar+ DefMarker = &{ p.extension.Dlists } Defmark %% /* * List manipulation functions */ /* cons - cons an element onto a list, returning pointer to new head */ func cons(new, list *element) *element { new.next = list return new } /* reverse - reverse a list, returning pointer to new list */ func reverse(list *element) (new *element) { for list != nil { next := list.next new = cons(list, new) list = next } return } /* concat_string_list - concatenates string contents of list of STR elements. */ func concat_string_list(list *element) string { s := "" for list != nil { s += list.contents.str list = list.next } return s } /* * Auxiliary functions for parsing actions. * These make it easier to build up data structures (including lists) * in the parsing actions. */ /* mk_element - generic constructor for element */ var elbuf []element var elock sync.Mutex func mk_element(key int) *element { elock.Lock() if len(elbuf) == 0 { elbuf = make([]element, 1024) } e := &elbuf[0] elbuf = elbuf[1:] elock.Unlock() e.key = key return e } /* mk_str - constructor for STR element */ func mk_str(s string) (result *element) { result = mk_element(STR) result.contents.str = s return } /* mk_str_from_list - makes STR element by concatenating a * reversed list of strings, adding optional extra newline */ func mk_str_from_list(list *element, extra_newline bool) (result *element) { s := concat_string_list(reverse(list)) if extra_newline { s += "\n" } result = mk_element(STR) result.contents.str = s return } /* mk_list - makes new list with key 'key' and children the reverse of 'lst'. * This is designed to be used with cons to build lists in a parser action. * The reversing is necessary because cons adds to the head of a list. */ func mk_list(key int, lst *element) *element { result := mk_element(key) result.children = reverse(lst) return result } /* mk_link - constructor for LINK element */ func mk_link(label *element, url, title string) *element { result := mk_element(LINK) result.contents.link = &link{label: label, url: url, title: title} return result } /* match_inlines - returns true if inline lists match (case-insensitive...) */ func match_inlines(l1, l2 *element) bool { for l1 != nil && l2 != nil { if l1.key != l2.key { return false } switch l1.key { case SPACE, LINEBREAK, ELLIPSIS, EMDASH, ENDASH, APOSTROPHE: break case CODE, STR, HTML: if strings.ToUpper(l1.contents.str) != strings.ToUpper(l2.contents.str) { return false } case EMPH, STRONG, LIST, SINGLEQUOTED, DOUBLEQUOTED: if !match_inlines(l1.children, l2.children) { return false } case LINK, IMAGE: return false /* No links or images within links */ default: log.Fatalf("match_inlines encountered unknown key = %d\n", l1.key) } l1 = l1.next l2 = l2.next } return l1 == nil && l2 == nil /* return true if both lists exhausted */ } /* find_reference - return true if link found in references matching label. * 'link' is modified with the matching url and title. */ func (d *Doc) findReference(label *element) (*link, bool) { for cur := d.references; cur != nil; cur = cur.next { l := cur.contents.link if match_inlines(label, l.label) { return l, true } } return nil, false } /* find_note - return true if note found in notes matching label. * if found, 'result' is set to point to matched note. */ func (d *Doc) find_note(label string) (*element, bool) { for el := d.notes; el != nil; el = el.next { if label == el.contents.str { return el, true } } return nil, false } /* print tree of elements, for debugging only. */ func print_tree(elt *element, indent int) { var key string for elt != nil { for i := 0; i < indent; i++ { fmt.Print("\t") } key = keynames[elt.key] if key == "" { key = "?" } if elt.key == STR { fmt.Printf("%p:\t%s\t'%s'\n", elt, key, elt.contents.str) } else { fmt.Printf("%p:\t%s %p\n", elt, key, elt.next) } if elt.children != nil { print_tree(elt.children, indent+1) } elt = elt.next } } var keynames = [numVAL]string{ LIST: "LIST", RAW: "RAW", SPACE: "SPACE", LINEBREAK: "LINEBREAK", ELLIPSIS: "ELLIPSIS", EMDASH: "EMDASH", ENDASH: "ENDASH", APOSTROPHE: "APOSTROPHE", SINGLEQUOTED: "SINGLEQUOTED", DOUBLEQUOTED: "DOUBLEQUOTED", STR: "STR", LINK: "LINK", IMAGE: "IMAGE", CODE: "CODE", HTML: "HTML", EMPH: "EMPH", STRONG: "STRONG", PLAIN: "PLAIN", PARA: "PARA", LISTITEM: "LISTITEM", BULLETLIST: "BULLETLIST", ORDEREDLIST: "ORDEREDLIST", H1: "H1", H2: "H2", H3: "H3", H4: "H4", H5: "H5", H6: "H6", BLOCKQUOTE: "BLOCKQUOTE", VERBATIM: "VERBATIM", HTMLBLOCK: "HTMLBLOCK", HRULE: "HRULE", REFERENCE: "REFERENCE", NOTE: "NOTE", DEFINITIONLIST: "DEFINITIONLIST", DEFTITLE: "DEFTITLE", DEFDATA: "DEFDATA", }