%{ /* Original C version https://github.com/jgm/peg-markdown/ * Copyright 2008 John MacFarlane (jgm at berkeley dot edu). * * Modifications and translation from C into Go * based on markdown_parser.leg and utility_functions.c * Copyright 2010 Michael Teichgräber (mt at wmipf dot de) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License or the MIT * license. See LICENSE for details. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ package markdown // PEG grammar and parser actions for markdown syntax. import ( "fmt" "strings" "log" "sync" ) // Semantic value of a parsing action. type element struct { key int contents children *element next *element } // Information (label, URL and title) for a link. type link struct { label *element url string title string } // Union for contents of an Element (string, list, or link). type contents struct { str string *link } // Types of semantic values returned by parsers. const ( LIST = iota /* A generic list of values. For ordered and bullet lists, see below. */ RAW /* Raw markdown to be processed further */ SPACE LINEBREAK ELLIPSIS EMDASH ENDASH APOSTROPHE SINGLEQUOTED DOUBLEQUOTED STR LINK IMAGE CODE HTML EMPH STRONG PLAIN PARA LISTITEM BULLETLIST ORDEREDLIST H1 /* Code assumes that H1..6 are in order. */ H2 H3 H4 H5 H6 BLOCKQUOTE VERBATIM HTMLBLOCK HRULE REFERENCE NOTE DEFINITIONLIST DEFTITLE DEFDATA numVAL ) type Doc struct { parser *yyParser extension Options tree *element /* Results of parse. */ references *element /* List of link references found. */ notes *element /* List of footnotes found. */ } %} %userstate *Doc %YYSTYPE *element %switchexcl(HtmlBlock Code Title RefTitle) Doc = a:StartList ( Block { a = cons($$, a) } )* { p.tree = reverse(a) } commit Block = BlankLine* ( BlockQuote | Verbatim | Note | Reference | HorizontalRule | Heading | DefinitionList | OrderedList | BulletList | HtmlBlock | StyleBlock | Para | Plain ) Para = NonindentSpace a:Inlines BlankLine+ { $$ = a; $$.key = PARA } Plain = a:Inlines { $$ = a; $$.key = PLAIN } AtxInline = !Newline !(Sp? '#'* Sp Newline) Inline AtxStart = &'#' < ( "######" | "#####" | "####" | "###" | "##" | "#" ) > { $$ = mk_element(H1 + (len(yytext) - 1)) } AtxHeading = s:AtxStart Sp? a:StartList ( AtxInline { a = cons($$, a) } )+ (Sp? '#'* Sp)? Newline { $$ = mk_list(s.key, a) s = nil } SetextHeading = SetextHeading1 | SetextHeading2 SetextBottom1 = "===" '='* Newline SetextBottom2 = "---" '-'* Newline SetextHeading1 = &(RawLine SetextBottom1) a:StartList ( !Endline Inline { a = cons($$, a) } )+ Newline SetextBottom1 { $$ = mk_list(H1, a) } SetextHeading2 = &(RawLine SetextBottom2) a:StartList ( !Endline Inline { a = cons($$, a) } )+ Newline SetextBottom2 { $$ = mk_list(H2, a) } Heading = AtxHeading | SetextHeading BlockQuote = a:BlockQuoteRaw { $$ = mk_element(BLOCKQUOTE) $$.children = a } BlockQuoteRaw = a:StartList (( '>' ' '? Line { a = cons($$, a) } ) ( !'>' !BlankLine Line { a = cons($$, a) } )* ( BlankLine { a = cons(mk_str("\n"), a) } )* )+ { $$ = mk_str_from_list(a, true) $$.key = RAW } NonblankIndentedLine = !BlankLine IndentedLine VerbatimChunk = a:StartList ( BlankLine { a = cons(mk_str("\n"), a) } )* ( NonblankIndentedLine { a = cons($$, a) } )+ { $$ = mk_str_from_list(a, false) } Verbatim = a:StartList ( VerbatimChunk { a = cons($$, a) } )+ { $$ = mk_str_from_list(a, false) $$.key = VERBATIM } HorizontalRule = NonindentSpace ( '*' Sp '*' Sp '*' (Sp '*')* | '-' Sp '-' Sp '-' (Sp '-')* | '_' Sp '_' Sp '_' (Sp '_')*) Sp Newline BlankLine+ { $$ = mk_element(HRULE) } Bullet = !HorizontalRule NonindentSpace ('+' | '*' | '-') Spacechar+ BulletList = &Bullet (ListTight | ListLoose) { $$.key = BULLETLIST } ListTight = a:StartList ( ListItemTight { a = cons($$, a) } )+ BlankLine* !(Bullet | Enumerator | DefMarker) { $$ = mk_list(LIST, a) } ListLoose = a:StartList ( b:ListItem BlankLine* { li := b.children li.contents.str += "\n\n" a = cons(b, a) } )+ { $$ = mk_list(LIST, a) } ListItem = ( Bullet | Enumerator | DefMarker ) a:StartList ListBlock { a = cons($$, a) } ( ListContinuationBlock { a = cons($$, a) } )* { raw := mk_str_from_list(a, false) raw.key = RAW $$ = mk_element(LISTITEM) $$.children = raw } ListItemTight = ( Bullet | Enumerator | DefMarker ) a:StartList ListBlock { a = cons($$, a) } ( !BlankLine ListContinuationBlock { a = cons($$, a) } )* !ListContinuationBlock { raw := mk_str_from_list(a, false) raw.key = RAW $$ = mk_element(LISTITEM) $$.children = raw } ListBlock = a:StartList !BlankLine Line { a = cons($$, a) } ( ListBlockLine { a = cons($$, a) } )* { $$ = mk_str_from_list(a, false) } ListContinuationBlock = a:StartList ( < BlankLine* > { if len(yytext) == 0 { a = cons(mk_str("\001"), a) // block separator } else { a = cons(mk_str(yytext), a) } } ) ( Indent ListBlock { a = cons($$, a) } )+ { $$ = mk_str_from_list(a, false) } Enumerator = NonindentSpace [0-9]+ '.' Spacechar+ OrderedList = &Enumerator (ListTight | ListLoose) { $$.key = ORDEREDLIST } ListBlockLine = !BlankLine !( (Indent? (Bullet | Enumerator)) | DefMarker ) !HorizontalRule OptionallyIndentedLine # Parsers for different kinds of block-level HTML content. # This is repetitive due to constraints of PEG grammar. HtmlBlockOpenAddress = '<' Spnl ("address" | "ADDRESS") Spnl HtmlAttribute* '>' HtmlBlockCloseAddress = '<' Spnl '/' ("address" | "ADDRESS") Spnl '>' HtmlBlockAddress = HtmlBlockOpenAddress (HtmlBlockAddress | !HtmlBlockCloseAddress .)* HtmlBlockCloseAddress HtmlBlockOpenBlockquote = '<' Spnl ("blockquote" | "BLOCKQUOTE") Spnl HtmlAttribute* '>' HtmlBlockCloseBlockquote = '<' Spnl '/' ("blockquote" | "BLOCKQUOTE") Spnl '>' HtmlBlockBlockquote = HtmlBlockOpenBlockquote (HtmlBlockBlockquote | !HtmlBlockCloseBlockquote .)* HtmlBlockCloseBlockquote HtmlBlockOpenCenter = '<' Spnl ("center" | "CENTER") Spnl HtmlAttribute* '>' HtmlBlockCloseCenter = '<' Spnl '/' ("center" | "CENTER") Spnl '>' HtmlBlockCenter = HtmlBlockOpenCenter (HtmlBlockCenter | !HtmlBlockCloseCenter .)* HtmlBlockCloseCenter HtmlBlockOpenDir = '<' Spnl ("dir" | "DIR") Spnl HtmlAttribute* '>' HtmlBlockCloseDir = '<' Spnl '/' ("dir" | "DIR") Spnl '>' HtmlBlockDir = HtmlBlockOpenDir (HtmlBlockDir | !HtmlBlockCloseDir .)* HtmlBlockCloseDir HtmlBlockOpenDiv = '<' Spnl ("div" | "DIV") Spnl HtmlAttribute* '>' HtmlBlockCloseDiv = '<' Spnl '/' ("div" | "DIV") Spnl '>' HtmlBlockDiv = HtmlBlockOpenDiv (HtmlBlockDiv | !HtmlBlockCloseDiv .)* HtmlBlockCloseDiv HtmlBlockOpenDl = '<' Spnl ("dl" | "DL") Spnl HtmlAttribute* '>' HtmlBlockCloseDl = '<' Spnl '/' ("dl" | "DL") Spnl '>' HtmlBlockDl = HtmlBlockOpenDl (HtmlBlockDl | !HtmlBlockCloseDl .)* HtmlBlockCloseDl HtmlBlockOpenFieldset = '<' Spnl ("fieldset" | "FIELDSET") Spnl HtmlAttribute* '>' HtmlBlockCloseFieldset = '<' Spnl '/' ("fieldset" | "FIELDSET") Spnl '>' HtmlBlockFieldset = HtmlBlockOpenFieldset (HtmlBlockFieldset | !HtmlBlockCloseFieldset .)* HtmlBlockCloseFieldset HtmlBlockOpenForm = '<' Spnl ("form" | "FORM") Spnl HtmlAttribute* '>' HtmlBlockCloseForm = '<' Spnl '/' ("form" | "FORM") Spnl '>' HtmlBlockForm = HtmlBlockOpenForm (HtmlBlockForm | !HtmlBlockCloseForm .)* HtmlBlockCloseForm HtmlBlockOpenH1 = '<' Spnl ("h1" | "H1") Spnl HtmlAttribute* '>' HtmlBlockCloseH1 = '<' Spnl '/' ("h1" | "H1") Spnl '>' HtmlBlockH1 = HtmlBlockOpenH1 (HtmlBlockH1 | !HtmlBlockCloseH1 .)* HtmlBlockCloseH1 HtmlBlockOpenH2 = '<' Spnl ("h2" | "H2") Spnl HtmlAttribute* '>' HtmlBlockCloseH2 = '<' Spnl '/' ("h2" | "H2") Spnl '>' HtmlBlockH2 = HtmlBlockOpenH2 (HtmlBlockH2 | !HtmlBlockCloseH2 .)* HtmlBlockCloseH2 HtmlBlockOpenH3 = '<' Spnl ("h3" | "H3") Spnl HtmlAttribute* '>' HtmlBlockCloseH3 = '<' Spnl '/' ("h3" | "H3") Spnl '>' HtmlBlockH3 = HtmlBlockOpenH3 (HtmlBlockH3 | !HtmlBlockCloseH3 .)* HtmlBlockCloseH3 HtmlBlockOpenH4 = '<' Spnl ("h4" | "H4") Spnl HtmlAttribute* '>' HtmlBlockCloseH4 = '<' Spnl '/' ("h4" | "H4") Spnl '>' HtmlBlockH4 = HtmlBlockOpenH4 (HtmlBlockH4 | !HtmlBlockCloseH4 .)* HtmlBlockCloseH4 HtmlBlockOpenH5 = '<' Spnl ("h5" | "H5") Spnl HtmlAttribute* '>' HtmlBlockCloseH5 = '<' Spnl '/' ("h5" | "H5") Spnl '>' HtmlBlockH5 = HtmlBlockOpenH5 (HtmlBlockH5 | !HtmlBlockCloseH5 .)* HtmlBlockCloseH5 HtmlBlockOpenH6 = '<' Spnl ("h6" | "H6") Spnl HtmlAttribute* '>' HtmlBlockCloseH6 = '<' Spnl '/' ("h6" | "H6") Spnl '>' HtmlBlockH6 = HtmlBlockOpenH6 (HtmlBlockH6 | !HtmlBlockCloseH6 .)* HtmlBlockCloseH6 HtmlBlockOpenMenu = '<' Spnl ("menu" | "MENU") Spnl HtmlAttribute* '>' HtmlBlockCloseMenu = '<' Spnl '/' ("menu" | "MENU") Spnl '>' HtmlBlockMenu = HtmlBlockOpenMenu (HtmlBlockMenu | !HtmlBlockCloseMenu .)* HtmlBlockCloseMenu HtmlBlockOpenNoframes = '<' Spnl ("noframes" | "NOFRAMES") Spnl HtmlAttribute* '>' HtmlBlockCloseNoframes = '<' Spnl '/' ("noframes" | "NOFRAMES") Spnl '>' HtmlBlockNoframes = HtmlBlockOpenNoframes (HtmlBlockNoframes | !HtmlBlockCloseNoframes .)* HtmlBlockCloseNoframes HtmlBlockOpenNoscript = '<' Spnl ("noscript" | "NOSCRIPT") Spnl HtmlAttribute* '>' HtmlBlockCloseNoscript = '<' Spnl '/' ("noscript" | "NOSCRIPT") Spnl '>' HtmlBlockNoscript = HtmlBlockOpenNoscript (HtmlBlockNoscript | !HtmlBlockCloseNoscript .)* HtmlBlockCloseNoscript HtmlBlockOpenOl = '<' Spnl ("ol" | "OL") Spnl HtmlAttribute* '>' HtmlBlockCloseOl = '<' Spnl '/' ("ol" | "OL") Spnl '>' HtmlBlockOl = HtmlBlockOpenOl (HtmlBlockOl | !HtmlBlockCloseOl .)* HtmlBlockCloseOl HtmlBlockOpenP = '<' Spnl ("p" | "P") Spnl HtmlAttribute* '>' HtmlBlockCloseP = '<' Spnl '/' ("p" | "P") Spnl '>' HtmlBlockP = HtmlBlockOpenP (HtmlBlockP | !HtmlBlockCloseP .)* HtmlBlockCloseP HtmlBlockOpenPre = '<' Spnl ("pre" | "PRE") Spnl HtmlAttribute* '>' HtmlBlockClosePre = '<' Spnl '/' ("pre" | "PRE") Spnl '>' HtmlBlockPre = HtmlBlockOpenPre (HtmlBlockPre | !HtmlBlockClosePre .)* HtmlBlockClosePre HtmlBlockOpenTable = '<' Spnl ("table" | "TABLE") Spnl HtmlAttribute* '>' HtmlBlockCloseTable = '<' Spnl '/' ("table" | "TABLE") Spnl '>' HtmlBlockTable = HtmlBlockOpenTable (HtmlBlockTable | !HtmlBlockCloseTable .)* HtmlBlockCloseTable HtmlBlockOpenUl = '<' Spnl ("ul" | "UL") Spnl HtmlAttribute* '>' HtmlBlockCloseUl = '<' Spnl '/' ("ul" | "UL") Spnl '>' HtmlBlockUl = HtmlBlockOpenUl (HtmlBlockUl | !HtmlBlockCloseUl .)* HtmlBlockCloseUl HtmlBlockOpenDd = '<' Spnl ("dd" | "DD") Spnl HtmlAttribute* '>' HtmlBlockCloseDd = '<' Spnl '/' ("dd" | "DD") Spnl '>' HtmlBlockDd = HtmlBlockOpenDd (HtmlBlockDd | !HtmlBlockCloseDd .)* HtmlBlockCloseDd HtmlBlockOpenDt = '<' Spnl ("dt" | "DT") Spnl HtmlAttribute* '>' HtmlBlockCloseDt = '<' Spnl '/' ("dt" | "DT") Spnl '>' HtmlBlockDt = HtmlBlockOpenDt (HtmlBlockDt | !HtmlBlockCloseDt .)* HtmlBlockCloseDt HtmlBlockOpenFrameset = '<' Spnl ("frameset" | "FRAMESET") Spnl HtmlAttribute* '>' HtmlBlockCloseFrameset = '<' Spnl '/' ("frameset" | "FRAMESET") Spnl '>' HtmlBlockFrameset = HtmlBlockOpenFrameset (HtmlBlockFrameset | !HtmlBlockCloseFrameset .)* HtmlBlockCloseFrameset HtmlBlockOpenLi = '<' Spnl ("li" | "LI") Spnl HtmlAttribute* '>' HtmlBlockCloseLi = '<' Spnl '/' ("li" | "LI") Spnl '>' HtmlBlockLi = HtmlBlockOpenLi (HtmlBlockLi | !HtmlBlockCloseLi .)* HtmlBlockCloseLi HtmlBlockOpenTbody = '<' Spnl ("tbody" | "TBODY") Spnl HtmlAttribute* '>' HtmlBlockCloseTbody = '<' Spnl '/' ("tbody" | "TBODY") Spnl '>' HtmlBlockTbody = HtmlBlockOpenTbody (HtmlBlockTbody | !HtmlBlockCloseTbody .)* HtmlBlockCloseTbody HtmlBlockOpenTd = '<' Spnl ("td" | "TD") Spnl HtmlAttribute* '>' HtmlBlockCloseTd = '<' Spnl '/' ("td" | "TD") Spnl '>' HtmlBlockTd = HtmlBlockOpenTd (HtmlBlockTd | !HtmlBlockCloseTd .)* HtmlBlockCloseTd HtmlBlockOpenTfoot = '<' Spnl ("tfoot" | "TFOOT") Spnl HtmlAttribute* '>' HtmlBlockCloseTfoot = '<' Spnl '/' ("tfoot" | "TFOOT") Spnl '>' HtmlBlockTfoot = HtmlBlockOpenTfoot (HtmlBlockTfoot | !HtmlBlockCloseTfoot .)* HtmlBlockCloseTfoot HtmlBlockOpenTh = '<' Spnl ("th" | "TH") Spnl HtmlAttribute* '>' HtmlBlockCloseTh = '<' Spnl '/' ("th" | "TH") Spnl '>' HtmlBlockTh = HtmlBlockOpenTh (HtmlBlockTh | !HtmlBlockCloseTh .)* HtmlBlockCloseTh HtmlBlockOpenThead = '<' Spnl ("thead" | "THEAD") Spnl HtmlAttribute* '>' HtmlBlockCloseThead = '<' Spnl '/' ("thead" | "THEAD") Spnl '>' HtmlBlockThead = HtmlBlockOpenThead (HtmlBlockThead | !HtmlBlockCloseThead .)* HtmlBlockCloseThead HtmlBlockOpenTr = '<' Spnl ("tr" | "TR") Spnl HtmlAttribute* '>' HtmlBlockCloseTr = '<' Spnl '/' ("tr" | "TR") Spnl '>' HtmlBlockTr = HtmlBlockOpenTr (HtmlBlockTr | !HtmlBlockCloseTr .)* HtmlBlockCloseTr HtmlBlockOpenScript = '<' Spnl ("script" | "SCRIPT") Spnl HtmlAttribute* '>' HtmlBlockCloseScript = '<' Spnl '/' ("script" | "SCRIPT") Spnl '>' HtmlBlockScript = HtmlBlockOpenScript (HtmlBlockScript | !HtmlBlockCloseScript .)* HtmlBlockCloseScript HtmlBlockInTags = HtmlBlockAddress | HtmlBlockBlockquote | HtmlBlockCenter | HtmlBlockDir | HtmlBlockDiv | HtmlBlockDl | HtmlBlockFieldset | HtmlBlockForm | HtmlBlockH1 | HtmlBlockH2 | HtmlBlockH3 | HtmlBlockH4 | HtmlBlockH5 | HtmlBlockH6 | HtmlBlockMenu | HtmlBlockNoframes | HtmlBlockNoscript | HtmlBlockOl | HtmlBlockP | HtmlBlockPre | HtmlBlockTable | HtmlBlockUl | HtmlBlockDd | HtmlBlockDt | HtmlBlockFrameset | HtmlBlockLi | HtmlBlockTbody | HtmlBlockTd | HtmlBlockTfoot | HtmlBlockTh | HtmlBlockThead | HtmlBlockTr | HtmlBlockScript HtmlBlock = &'<' < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) > BlankLine+ { if p.extension.FilterHTML { $$ = mk_list(LIST, nil) } else { $$ = mk_str(yytext) $$.key = HTMLBLOCK } } HtmlBlockSelfClosing = '<' Spnl HtmlBlockType Spnl HtmlAttribute* '/' Spnl '>' HtmlBlockType = "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" | "ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" | "ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" | "H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE" | "TABLE" | "UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT" StyleOpen = '<' Spnl ("style" | "STYLE") Spnl HtmlAttribute* '>' StyleClose = '<' Spnl '/' ("style" | "STYLE") Spnl '>' InStyleTags = StyleOpen (!StyleClose .)* StyleClose StyleBlock = < InStyleTags > BlankLine* { if p.extension.FilterStyles { $$ = mk_list(LIST, nil) } else { $$ = mk_str(yytext) $$.key = HTMLBLOCK } } Inlines = a:StartList ( !Endline Inline { a = cons($$, a) } | c:Endline &Inline { a = cons(c, a) } )+ Endline? { $$ = mk_list(LIST, a) } Inline = Str | Endline | UlOrStarLine | Space | Strong | Emph | Image | Link | NoteReference | InlineNote | Code | RawHtml | Entity | EscapedChar | Smart | Symbol Space = Spacechar+ { $$ = mk_str(" ") $$.key = SPACE } Str = < NormalChar (NormalChar | '_'+ &Alphanumeric)* > { $$ = mk_str(yytext) } EscapedChar = '\\' !Newline < [-\\`|*_{}[\]()#+.!><] > { $$ = mk_str(yytext) } Entity = ( HexEntity | DecEntity | CharEntity ) { $$ = mk_str(yytext); $$.key = HTML } Endline = LineBreak | TerminalEndline | NormalEndline NormalEndline = Sp Newline !BlankLine !'>' !AtxStart !(Line ("===" '='* | "---" '-'*) Newline) { $$ = mk_str("\n") $$.key = SPACE } TerminalEndline = Sp Newline Eof { $$ = nil } LineBreak = " " NormalEndline { $$ = mk_element(LINEBREAK) } Symbol = < SpecialChar > { $$ = mk_str(yytext) } # This keeps the parser from getting bogged down on long strings of '*' or '_', # or strings of '*' or '_' with space on each side: UlOrStarLine = (UlLine | StarLine) { $$ = mk_str(yytext) } StarLine = < "****" '*'* > | < Spacechar '*'+ &Spacechar > UlLine = < "____" '_'* > | < Spacechar '_'+ &Spacechar > Emph = EmphStar | EmphUl OneStarOpen = !StarLine '*' !Spacechar !Newline OneStarClose = !Spacechar !Newline a:Inline !StrongStar '*' { $$ = a } EmphStar = OneStarOpen a:StartList ( !OneStarClose Inline { a = cons($$, a) } )* OneStarClose { a = cons($$, a) } { $$ = mk_list(EMPH, a) } OneUlOpen = !UlLine '_' !Spacechar !Newline OneUlClose = !Spacechar !Newline a:Inline !StrongUl '_' !Alphanumeric { $$ = a } EmphUl = OneUlOpen a:StartList ( !OneUlClose Inline { a = cons($$, a) } )* OneUlClose { a = cons($$, a) } { $$ = mk_list(EMPH, a) } Strong = StrongStar | StrongUl TwoStarOpen = !StarLine "**" !Spacechar !Newline TwoStarClose = !Spacechar !Newline a:Inline "**" { $$ = a } StrongStar = TwoStarOpen a:StartList ( !TwoStarClose Inline { a = cons($$, a) } )* TwoStarClose { a = cons($$, a) } { $$ = mk_list(STRONG, a) } TwoUlOpen = !UlLine "__" !Spacechar !Newline TwoUlClose = !Spacechar !Newline a:Inline "__" !Alphanumeric { $$ = a } StrongUl = TwoUlOpen a:StartList ( !TwoUlClose Inline { a = cons($$, a) } )* TwoUlClose { a = cons($$, a) } { $$ = mk_list(STRONG, a) } Image = '!' ( ExplicitLink | ReferenceLink ) { if $$.key == LINK { $$.key = IMAGE } else { result := $$ $$.children = cons(mk_str("!"), result.children) } } Link = ExplicitLink | ReferenceLink | AutoLink ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle ReferenceLinkDouble = a:Label < Spnl > !"[]" b:Label { if match, found := p.findReference(b.children); found { $$ = mk_link(a.children, match.url, match.title); a = nil b = nil } else { result := mk_element(LIST) result.children = cons(mk_str("["), cons(a, cons(mk_str("]"), cons(mk_str(yytext), cons(mk_str("["), cons(b, mk_str("]"))))))) $$ = result } } ReferenceLinkSingle = a:Label < (Spnl "[]")? > { if match, found := p.findReference(a.children); found { $$ = mk_link(a.children, match.url, match.title) a = nil } else { result := mk_element(LIST) result.children = cons(mk_str("["), cons(a, cons(mk_str("]"), mk_str(yytext)))); $$ = result } } ExplicitLink = l:Label Spnl '(' Sp s:Source Spnl t:Title Sp ')' { $$ = mk_link(l.children, s.contents.str, t.contents.str) s = nil t = nil l = nil } Source = ( '<' < SourceContents > '>' | < SourceContents > ) { $$ = mk_str(yytext) } SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')* | "" Title = ( TitleSingle | TitleDouble | < "" > ) { $$ = mk_str(yytext) } TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) . )* > '\'' TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) . )* > '"' AutoLink = AutoLinkUrl | AutoLinkEmail AutoLinkUrl = '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>' { $$ = mk_link(mk_str(yytext), yytext, "") } AutoLinkEmail = '<' < [-A-Za-z0-9+_]+ '@' ( !Newline !'>' . )+ > '>' { $$ = mk_link(mk_str(yytext), "mailto:"+yytext, "") } Reference = NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc Spnl t:RefTitle BlankLine* { $$ = mk_link(l.children, s.contents.str, t.contents.str) s = nil t = nil l = nil $$.key = REFERENCE } Label = '[' ( !'^' &{ p.extension.Notes } | &. &{ !p.extension.Notes } ) a:StartList ( !']' Inline { a = cons($$, a) } )* ']' { $$ = mk_list(LIST, a) } RefSrc = < Nonspacechar+ > { $$ = mk_str(yytext) $$.key = HTML } RefTitle = ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle ) { $$ = mk_str(yytext) } EmptyTitle = < "" > RefTitleSingle = '\'' < ( !( '\'' Sp Newline | Newline ) . )* > '\'' RefTitleDouble = '"' < ( !('"' Sp Newline | Newline) . )* > '"' RefTitleParens = '(' < ( !(')' Sp Newline | Newline) . )* > ')' References = a:StartList ( b:Reference { a = cons(b, a) } | SkipBlock )* { p.references = reverse(a) } commit Ticks1 = "`" !'`' Ticks2 = "``" !'`' Ticks3 = "```" !'`' Ticks4 = "````" !'`' Ticks5 = "`````" !'`' Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks1 | Ticks2 Sp < ( ( !'`' Nonspacechar )+ | !Ticks2 '`'+ | !( Sp Ticks2 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks2 | Ticks3 Sp < ( ( !'`' Nonspacechar )+ | !Ticks3 '`'+ | !( Sp Ticks3 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks3 | Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4 | Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5 ) { $$ = mk_str(yytext); $$.key = CODE } RawHtml = < (HtmlComment | HtmlTag) > { if p.extension.FilterHTML { $$ = mk_list(LIST, nil) } else { $$ = mk_str(yytext) $$.key = HTML } } BlankLine = Sp Newline Quoted = '"' (!'"' .)* '"' | '\'' (!'\'' .)* '\'' HtmlAttribute = (AlphanumericAscii | '-')+ Spnl ('=' Spnl (Quoted | (!'>' Nonspacechar)+))? Spnl HtmlComment = "" .)* "-->" HtmlTag = '<' Spnl '/'? AlphanumericAscii+ Spnl HtmlAttribute* '/'? Spnl '>' Eof = !. Spacechar = ' ' | '\t' Nonspacechar = !Spacechar !Newline . Newline = '\n' | '\r' '\n'? Sp = Spacechar* Spnl = Sp (Newline Sp)? SpecialChar = '*' | '_' | '`' | '&' | '[' | ']' | '<' | '!' | '#' | '\\' | ExtendedSpecialChar NormalChar = !( SpecialChar | Spacechar | Newline ) . NonAlphanumeric = [\000-\057\072-\100\133-\140\173-\177] Alphanumeric = [0-9A-Za-z] | '\200' | '\201' | '\202' | '\203' | '\204' | '\205' | '\206' | '\207' | '\210' | '\211' | '\212' | '\213' | '\214' | '\215' | '\216' | '\217' | '\220' | '\221' | '\222' | '\223' | '\224' | '\225' | '\226' | '\227' | '\230' | '\231' | '\232' | '\233' | '\234' | '\235' | '\236' | '\237' | '\240' | '\241' | '\242' | '\243' | '\244' | '\245' | '\246' | '\247' | '\250' | '\251' | '\252' | '\253' | '\254' | '\255' | '\256' | '\257' | '\260' | '\261' | '\262' | '\263' | '\264' | '\265' | '\266' | '\267' | '\270' | '\271' | '\272' | '\273' | '\274' | '\275' | '\276' | '\277' | '\300' | '\301' | '\302' | '\303' | '\304' | '\305' | '\306' | '\307' | '\310' | '\311' | '\312' | '\313' | '\314' | '\315' | '\316' | '\317' | '\320' | '\321' | '\322' | '\323' | '\324' | '\325' | '\326' | '\327' | '\330' | '\331' | '\332' | '\333' | '\334' | '\335' | '\336' | '\337' | '\340' | '\341' | '\342' | '\343' | '\344' | '\345' | '\346' | '\347' | '\350' | '\351' | '\352' | '\353' | '\354' | '\355' | '\356' | '\357' | '\360' | '\361' | '\362' | '\363' | '\364' | '\365' | '\366' | '\367' | '\370' | '\371' | '\372' | '\373' | '\374' | '\375' | '\376' | '\377' AlphanumericAscii = [A-Za-z0-9] Digit = [0-9] HexEntity = < '&' '#' [Xx] [0-9a-fA-F]+ ';' > DecEntity = < '&' '#' [0-9]+ > ';' > CharEntity = < '&' [A-Za-z0-9]+ ';' > NonindentSpace = " " | " " | " " | "" Indent = "\t" | " " IndentedLine = Indent Line OptionallyIndentedLine = Indent? Line # StartList starts a list data structure that can be added to with cons: StartList = &. { $$ = nil } Line = RawLine { $$ = mk_str(yytext) } RawLine = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof ) SkipBlock = ( !BlankLine RawLine )+ BlankLine* | BlankLine+ # Syntax extensions ExtendedSpecialChar = &{ p.extension.Smart } ('.' | '-' | '\'' | '"') | &{ p.extension.Notes } ( '^' ) Smart = &{ p.extension.Smart } ( Ellipsis | Dash | SingleQuoted | DoubleQuoted | Apostrophe ) Apostrophe = '\'' { $$ = mk_element(APOSTROPHE) } Ellipsis = ("..." | ". . .") { $$ = mk_element(ELLIPSIS) } Dash = EmDash | EnDash EnDash = '-' &Digit { $$ = mk_element(ENDASH) } EmDash = ("---" | "--") { $$ = mk_element(EMDASH) } SingleQuoteStart = '\'' ![)!\],.;:-? \t\n] !( ( "s" | "t" | "m" | "ve" | "ll" | "re" ) !Alphanumeric ) SingleQuoteEnd = '\'' !Alphanumeric SingleQuoted = SingleQuoteStart a:StartList ( !SingleQuoteEnd b:Inline { a = cons(b, a) } )+ SingleQuoteEnd { $$ = mk_list(SINGLEQUOTED, a) } DoubleQuoteStart = '"' DoubleQuoteEnd = '"' DoubleQuoted = DoubleQuoteStart a:StartList ( !DoubleQuoteEnd b:Inline { a = cons(b, a) } )+ DoubleQuoteEnd { $$ = mk_list(DOUBLEQUOTED, a) } NoteReference = &{ p.extension.Notes } ref:RawNoteReference { if match, ok := p.find_note(ref.contents.str); ok { $$ = mk_element(NOTE) $$.children = match.children $$.contents.str = "" } else { $$ = mk_str("[^"+ref.contents.str+"]") } } RawNoteReference = "[^" < ( !Newline !']' . )+ > ']' { $$ = mk_str(yytext) } Note = &{ p.extension.Notes } NonindentSpace ref:RawNoteReference ':' Sp a:StartList ( RawNoteBlock { a = cons($$, a) } ) ( &Indent RawNoteBlock { a = cons($$, a) } )* { $$ = mk_list(NOTE, a) $$.contents.str = ref.contents.str } InlineNote = &{ p.extension.Notes } "^[" a:StartList ( !']' Inline { a = cons($$, a) } )+ ']' { $$ = mk_list(NOTE, a) $$.contents.str = "" } Notes = a:StartList ( b:Note { a = cons(b, a) } | SkipBlock )* { p.notes = reverse(a) } commit RawNoteBlock = a:StartList ( !BlankLine OptionallyIndentedLine { a = cons($$, a) } )+ ( < BlankLine* > { a = cons(mk_str(yytext), a) } ) { $$ = mk_str_from_list(a, true) $$.key = RAW } DefinitionList = &{ p.extension.Dlists } a:StartList ( Definition { a = cons($$, a) } )+ { $$ = mk_list(DEFINITIONLIST, a) } Definition = &( (!Defmark RawLine)+ BlankLine? Defmark) a:StartList ( DListTitle { a = cons($$, a) } )+ ( DefTight | DefLoose ) { for e := $$.children; e != nil; e = e.next { e.key = DEFDATA } a = cons($$, a) } { $$ = mk_list(LIST, a) } DListTitle = NonindentSpace !Defmark &Nonspacechar a:StartList (!Endline Inline { a = cons($$, a) } )+ Sp Newline { $$ = mk_list(LIST, a) $$.key = DEFTITLE } DefTight = &Defmark ListTight DefLoose = BlankLine &Defmark ListLoose Defmark = NonindentSpace (':' | '~') Spacechar+ DefMarker = &{ p.extension.Dlists } Defmark %% /* * List manipulation functions */ /* cons - cons an element onto a list, returning pointer to new head */ func cons(new, list *element) *element { new.next = list return new } /* reverse - reverse a list, returning pointer to new list */ func reverse(list *element) (new *element) { for list != nil { next := list.next new = cons(list, new) list = next } return } /* concat_string_list - concatenates string contents of list of STR elements. */ func concat_string_list(list *element) string { s := "" for list != nil { s += list.contents.str list = list.next } return s } /* * Auxiliary functions for parsing actions. * These make it easier to build up data structures (including lists) * in the parsing actions. */ /* mk_element - generic constructor for element */ var elbuf []element var elock sync.Mutex func mk_element(key int) *element { elock.Lock() if len(elbuf) == 0 { elbuf = make([]element, 1024) } e := &elbuf[0] elbuf = elbuf[1:] elock.Unlock() e.key = key return e } /* mk_str - constructor for STR element */ func mk_str(s string) (result *element) { result = mk_element(STR) result.contents.str = s return } /* mk_str_from_list - makes STR element by concatenating a * reversed list of strings, adding optional extra newline */ func mk_str_from_list(list *element, extra_newline bool) (result *element) { s := concat_string_list(reverse(list)) if extra_newline { s += "\n" } result = mk_element(STR) result.contents.str = s return } /* mk_list - makes new list with key 'key' and children the reverse of 'lst'. * This is designed to be used with cons to build lists in a parser action. * The reversing is necessary because cons adds to the head of a list. */ func mk_list(key int, lst *element) (el *element) { el = mk_element(key) el.children = reverse(lst) return } /* mk_link - constructor for LINK element */ func mk_link(label *element, url, title string) (el *element) { el = mk_element(LINK) el.contents.link = &link{label: label, url: url, title: title} return } /* match_inlines - returns true if inline lists match (case-insensitive...) */ func match_inlines(l1, l2 *element) bool { for l1 != nil && l2 != nil { if l1.key != l2.key { return false } switch l1.key { case SPACE, LINEBREAK, ELLIPSIS, EMDASH, ENDASH, APOSTROPHE: break case CODE, STR, HTML: if strings.ToUpper(l1.contents.str) != strings.ToUpper(l2.contents.str) { return false } case EMPH, STRONG, LIST, SINGLEQUOTED, DOUBLEQUOTED: if !match_inlines(l1.children, l2.children) { return false } case LINK, IMAGE: return false /* No links or images within links */ default: log.Fatalf("match_inlines encountered unknown key = %d\n", l1.key) } l1 = l1.next l2 = l2.next } return l1 == nil && l2 == nil /* return true if both lists exhausted */ } /* find_reference - return true if link found in references matching label. * 'link' is modified with the matching url and title. */ func (d *Doc) findReference(label *element) (*link, bool) { for cur := d.references; cur != nil; cur = cur.next { l := cur.contents.link if match_inlines(label, l.label) { return l, true } } return nil, false } /* find_note - return true if note found in notes matching label. * if found, 'result' is set to point to matched note. */ func (d *Doc) find_note(label string) (*element, bool) { for el := d.notes; el != nil; el = el.next { if label == el.contents.str { return el, true } } return nil, false } /* print tree of elements, for debugging only. */ func print_tree(elt *element, indent int) { var key string for elt != nil { for i := 0; i < indent; i++ { fmt.Print("\t") } key = keynames[elt.key] if key == "" { key = "?" } if elt.key == STR { fmt.Printf("%p:\t%s\t'%s'\n", elt, key, elt.contents.str) } else { fmt.Printf("%p:\t%s %p\n", elt, key, elt.next) } if elt.children != nil { print_tree(elt.children, indent+1) } elt = elt.next } } var keynames = [numVAL]string{ LIST: "LIST", RAW: "RAW", SPACE: "SPACE", LINEBREAK: "LINEBREAK", ELLIPSIS: "ELLIPSIS", EMDASH: "EMDASH", ENDASH: "ENDASH", APOSTROPHE: "APOSTROPHE", SINGLEQUOTED: "SINGLEQUOTED", DOUBLEQUOTED: "DOUBLEQUOTED", STR: "STR", LINK: "LINK", IMAGE: "IMAGE", CODE: "CODE", HTML: "HTML", EMPH: "EMPH", STRONG: "STRONG", PLAIN: "PLAIN", PARA: "PARA", LISTITEM: "LISTITEM", BULLETLIST: "BULLETLIST", ORDEREDLIST: "ORDEREDLIST", H1: "H1", H2: "H2", H3: "H3", H4: "H4", H5: "H5", H6: "H6", BLOCKQUOTE: "BLOCKQUOTE", VERBATIM: "VERBATIM", HTMLBLOCK: "HTMLBLOCK", HRULE: "HRULE", REFERENCE: "REFERENCE", NOTE: "NOTE", DEFINITIONLIST: "DEFINITIONLIST", DEFTITLE: "DEFTITLE", DEFDATA: "DEFDATA", }