967 lines
31 KiB
Plaintext
967 lines
31 KiB
Plaintext
%{
|
|
/* Original C version https://github.com/jgm/peg-markdown/
|
|
* Copyright 2008 John MacFarlane (jgm at berkeley dot edu).
|
|
*
|
|
* Modifications and translation from C into Go
|
|
* based on markdown_parser.leg and utility_functions.c
|
|
* Copyright 2010 Michael Teichgräber (mt at wmipf dot de)
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License or the MIT
|
|
* license. See LICENSE for details.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*/
|
|
|
|
package markdown
|
|
|
|
// PEG grammar and parser actions for markdown syntax.
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"log"
|
|
"sync"
|
|
)
|
|
|
|
// Semantic value of a parsing action.
|
|
type element struct {
|
|
key int
|
|
contents
|
|
children *element
|
|
next *element
|
|
}
|
|
|
|
// Information (label, URL and title) for a link.
|
|
type link struct {
|
|
label *element
|
|
url string
|
|
title string
|
|
}
|
|
|
|
// Union for contents of an Element (string, list, or link).
|
|
type contents struct {
|
|
str string
|
|
*link
|
|
}
|
|
|
|
// Types of semantic values returned by parsers.
|
|
const (
|
|
LIST = iota /* A generic list of values. For ordered and bullet lists, see below. */
|
|
RAW /* Raw markdown to be processed further */
|
|
SPACE
|
|
LINEBREAK
|
|
ELLIPSIS
|
|
EMDASH
|
|
ENDASH
|
|
APOSTROPHE
|
|
SINGLEQUOTED
|
|
DOUBLEQUOTED
|
|
STR
|
|
LINK
|
|
IMAGE
|
|
CODE
|
|
HTML
|
|
EMPH
|
|
STRONG
|
|
PLAIN
|
|
PARA
|
|
LISTITEM
|
|
BULLETLIST
|
|
ORDEREDLIST
|
|
H1 /* Code assumes that H1..6 are in order. */
|
|
H2
|
|
H3
|
|
H4
|
|
H5
|
|
H6
|
|
BLOCKQUOTE
|
|
VERBATIM
|
|
HTMLBLOCK
|
|
HRULE
|
|
REFERENCE
|
|
NOTE
|
|
numVAL
|
|
)
|
|
|
|
type Doc struct {
|
|
parser *yyParser
|
|
extension Extensions
|
|
|
|
tree *element /* Results of parse. */
|
|
references *element /* List of link references found. */
|
|
notes *element /* List of footnotes found. */
|
|
}
|
|
|
|
%}
|
|
|
|
%userstate *Doc
|
|
|
|
%YYSTYPE *element
|
|
|
|
|
|
Doc = a:StartList ( Block { a = cons($$, a) } )*
|
|
{ p.tree = reverse(a) }
|
|
commit
|
|
|
|
Block = BlankLine*
|
|
( BlockQuote
|
|
| Verbatim
|
|
| Note
|
|
| Reference
|
|
| HorizontalRule
|
|
| Heading
|
|
| OrderedList
|
|
| BulletList
|
|
| HtmlBlock
|
|
| StyleBlock
|
|
| Para
|
|
| Plain )
|
|
|
|
Para = NonindentSpace a:Inlines BlankLine+
|
|
{ $$ = a; $$.key = PARA }
|
|
|
|
Plain = a:Inlines
|
|
{ $$ = a; $$.key = PLAIN }
|
|
|
|
AtxInline = !Newline !(Sp '#'* Sp Newline) Inline
|
|
|
|
AtxStart = &'#' < ( "######" | "#####" | "####" | "###" | "##" | "#" ) >
|
|
{ $$ = mk_element(H1 + (len(yytext) - 1)) }
|
|
|
|
AtxHeading = s:AtxStart Sp a:StartList ( AtxInline { a = cons($$, a) } )+ (Sp '#'* Sp)? Newline
|
|
{ $$ = mk_list(s.key, a)
|
|
s = nil }
|
|
|
|
SetextHeading = SetextHeading1 | SetextHeading2
|
|
|
|
SetextBottom1 = "===" '='* Newline
|
|
|
|
SetextBottom2 = "---" '-'* Newline
|
|
|
|
SetextHeading1 = &(RawLine SetextBottom1)
|
|
a:StartList ( !Endline Inline { a = cons($$, a) } )+ Newline
|
|
SetextBottom1 { $$ = mk_list(H1, a) }
|
|
|
|
SetextHeading2 = &(RawLine SetextBottom2)
|
|
a:StartList ( !Endline Inline { a = cons($$, a) } )+ Newline
|
|
SetextBottom2 { $$ = mk_list(H2, a) }
|
|
|
|
Heading = AtxHeading | SetextHeading
|
|
|
|
BlockQuote = a:BlockQuoteRaw
|
|
{ $$ = mk_element(BLOCKQUOTE)
|
|
$$.children = a
|
|
}
|
|
|
|
BlockQuoteRaw = a:StartList
|
|
(( '>' ' '? Line { a = cons($$, a) } )
|
|
( !'>' !BlankLine Line { a = cons($$, a) } )*
|
|
( BlankLine { a = cons(mk_str("\n"), a) } )*
|
|
)+
|
|
{ $$ = mk_str_from_list(a, true)
|
|
$$.key = RAW
|
|
}
|
|
|
|
NonblankIndentedLine = !BlankLine IndentedLine
|
|
|
|
VerbatimChunk = a:StartList
|
|
( BlankLine { a = cons(mk_str("\n"), a) } )*
|
|
( NonblankIndentedLine { a = cons($$, a) } )+
|
|
{ $$ = mk_str_from_list(a, false) }
|
|
|
|
Verbatim = a:StartList ( VerbatimChunk { a = cons($$, a) } )+
|
|
{ $$ = mk_str_from_list(a, false)
|
|
$$.key = VERBATIM }
|
|
|
|
HorizontalRule = NonindentSpace
|
|
( '*' Sp '*' Sp '*' (Sp '*')*
|
|
| '-' Sp '-' Sp '-' (Sp '-')*
|
|
| '_' Sp '_' Sp '_' (Sp '_')*)
|
|
Sp Newline BlankLine+
|
|
{ $$ = mk_element(HRULE) }
|
|
|
|
Bullet = !HorizontalRule NonindentSpace ('+' | '*' | '-') Spacechar+
|
|
|
|
BulletList = &Bullet (ListTight | ListLoose)
|
|
{ $$.key = BULLETLIST }
|
|
|
|
ListTight = a:StartList
|
|
( ListItem { a = cons($$, a) } )+
|
|
BlankLine* !(Bullet | Enumerator)
|
|
{ $$ = mk_list(LIST, a) }
|
|
|
|
ListLoose = a:StartList
|
|
( b:ListItem BlankLine*
|
|
{
|
|
li := b.children
|
|
li.contents.str += "\n\n"
|
|
a = cons(b, a)
|
|
} )+
|
|
{ $$ = mk_list(LIST, a) }
|
|
|
|
ListItem = ( Bullet | Enumerator )
|
|
a:StartList
|
|
ListBlock { a = cons($$, a) }
|
|
( ListContinuationBlock { a = cons($$, a) } )*
|
|
{
|
|
raw := mk_str_from_list(a, false)
|
|
raw.key = RAW
|
|
$$ = mk_element(LISTITEM)
|
|
$$.children = raw
|
|
}
|
|
|
|
ListBlock = a:StartList
|
|
Line { a = cons($$, a) }
|
|
( ListBlockLine { a = cons($$, a) } )*
|
|
{ $$ = mk_str_from_list(a, false) }
|
|
|
|
ListContinuationBlock = a:StartList
|
|
( < BlankLine* >
|
|
{ if len(yytext) == 0 {
|
|
a = cons(mk_str("\001"), a) // block separator
|
|
} else {
|
|
a = cons(mk_str(yytext), a)
|
|
}
|
|
} )
|
|
( Indent ListBlock { a = cons($$, a) } )+
|
|
{ $$ = mk_str_from_list(a, false) }
|
|
|
|
Enumerator = NonindentSpace [0-9]+ '.' Spacechar+
|
|
|
|
OrderedList = &Enumerator (ListTight | ListLoose)
|
|
{ $$.key = ORDEREDLIST }
|
|
|
|
ListBlockLine = !( Indent? (Bullet | Enumerator) )
|
|
!BlankLine
|
|
!HorizontalRule
|
|
OptionallyIndentedLine
|
|
|
|
# Parsers for different kinds of block-level HTML content.
|
|
# This is repetitive due to constraints of PEG grammar.
|
|
|
|
HtmlBlockOpenAddress = '<' Spnl ("address" | "ADDRESS") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseAddress = '<' Spnl '/' ("address" | "ADDRESS") Spnl '>'
|
|
|
|
HtmlBlockOpenBlockquote = '<' Spnl ("blockquote" | "BLOCKQUOTE") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseBlockquote = '<' Spnl '/' ("blockquote" | "BLOCKQUOTE") Spnl '>'
|
|
|
|
HtmlBlockOpenCenter = '<' Spnl ("center" | "CENTER") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseCenter = '<' Spnl '/' ("center" | "CENTER") Spnl '>'
|
|
|
|
HtmlBlockOpenDir = '<' Spnl ("dir" | "DIR") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseDir = '<' Spnl '/' ("dir" | "DIR") Spnl '>'
|
|
|
|
HtmlBlockOpenDiv = '<' Spnl ("div" | "DIV") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseDiv = '<' Spnl '/' ("div" | "DIV") Spnl '>'
|
|
|
|
HtmlBlockOpenDl = '<' Spnl ("dl" | "DL") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseDl = '<' Spnl '/' ("dl" | "DL") Spnl '>'
|
|
|
|
HtmlBlockOpenFieldset = '<' Spnl ("fieldset" | "FIELDSET") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseFieldset = '<' Spnl '/' ("fieldset" | "FIELDSET") Spnl '>'
|
|
|
|
HtmlBlockOpenForm = '<' Spnl ("form" | "FORM") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseForm = '<' Spnl '/' ("form" | "FORM") Spnl '>'
|
|
|
|
HtmlBlockOpenH1 = '<' Spnl ("h1" | "H1") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseH1 = '<' Spnl '/' ("h1" | "H1") Spnl '>'
|
|
|
|
HtmlBlockOpenH2 = '<' Spnl ("h2" | "H2") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseH2 = '<' Spnl '/' ("h2" | "H2") Spnl '>'
|
|
|
|
HtmlBlockOpenH3 = '<' Spnl ("h3" | "H3") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseH3 = '<' Spnl '/' ("h3" | "H3") Spnl '>'
|
|
|
|
HtmlBlockOpenH4 = '<' Spnl ("h4" | "H4") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseH4 = '<' Spnl '/' ("h4" | "H4") Spnl '>'
|
|
|
|
HtmlBlockOpenH5 = '<' Spnl ("h5" | "H5") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseH5 = '<' Spnl '/' ("h5" | "H5") Spnl '>'
|
|
|
|
HtmlBlockOpenH6 = '<' Spnl ("h6" | "H6") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseH6 = '<' Spnl '/' ("h6" | "H6") Spnl '>'
|
|
|
|
HtmlBlockOpenMenu = '<' Spnl ("menu" | "MENU") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseMenu = '<' Spnl '/' ("menu" | "MENU") Spnl '>'
|
|
|
|
HtmlBlockOpenNoframes = '<' Spnl ("noframes" | "NOFRAMES") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseNoframes = '<' Spnl '/' ("noframes" | "NOFRAMES") Spnl '>'
|
|
|
|
HtmlBlockOpenNoscript = '<' Spnl ("noscript" | "NOSCRIPT") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseNoscript = '<' Spnl '/' ("noscript" | "NOSCRIPT") Spnl '>'
|
|
|
|
HtmlBlockOpenOl = '<' Spnl ("ol" | "OL") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseOl = '<' Spnl '/' ("ol" | "OL") Spnl '>'
|
|
|
|
HtmlBlockOpenP = '<' Spnl ("p" | "P") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseP = '<' Spnl '/' ("p" | "P") Spnl '>'
|
|
|
|
HtmlBlockOpenPre = '<' Spnl ("pre" | "PRE") Spnl HtmlAttribute* '>'
|
|
HtmlBlockClosePre = '<' Spnl '/' ("pre" | "PRE") Spnl '>'
|
|
|
|
HtmlBlockOpenTable = '<' Spnl ("table" | "TABLE") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseTable = '<' Spnl '/' ("table" | "TABLE") Spnl '>'
|
|
|
|
HtmlBlockOpenUl = '<' Spnl ("ul" | "UL") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseUl = '<' Spnl '/' ("ul" | "UL") Spnl '>'
|
|
|
|
HtmlBlockOpenDd = '<' Spnl ("dd" | "DD") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseDd = '<' Spnl '/' ("dd" | "DD") Spnl '>'
|
|
|
|
HtmlBlockOpenDt = '<' Spnl ("dt" | "DT") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseDt = '<' Spnl '/' ("dt" | "DT") Spnl '>'
|
|
|
|
HtmlBlockOpenFrameset = '<' Spnl ("frameset" | "FRAMESET") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseFrameset = '<' Spnl '/' ("frameset" | "FRAMESET") Spnl '>'
|
|
|
|
HtmlBlockOpenLi = '<' Spnl ("li" | "LI") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseLi = '<' Spnl '/' ("li" | "LI") Spnl '>'
|
|
|
|
HtmlBlockOpenTbody = '<' Spnl ("tbody" | "TBODY") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseTbody = '<' Spnl '/' ("tbody" | "TBODY") Spnl '>'
|
|
|
|
HtmlBlockOpenTd = '<' Spnl ("td" | "TD") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseTd = '<' Spnl '/' ("td" | "TD") Spnl '>'
|
|
|
|
HtmlBlockOpenTfoot = '<' Spnl ("tfoot" | "TFOOT") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseTfoot = '<' Spnl '/' ("tfoot" | "TFOOT") Spnl '>'
|
|
|
|
HtmlBlockOpenTh = '<' Spnl ("th" | "TH") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseTh = '<' Spnl '/' ("th" | "TH") Spnl '>'
|
|
|
|
HtmlBlockOpenThead = '<' Spnl ("thead" | "THEAD") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseThead = '<' Spnl '/' ("thead" | "THEAD") Spnl '>'
|
|
|
|
HtmlBlockOpenTr = '<' Spnl ("tr" | "TR") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseTr = '<' Spnl '/' ("tr" | "TR") Spnl '>'
|
|
|
|
HtmlBlockOpenScript = '<' Spnl ("script" | "SCRIPT") Spnl HtmlAttribute* '>'
|
|
HtmlBlockCloseScript = '<' Spnl '/' ("script" | "SCRIPT") Spnl '>'
|
|
|
|
HtmlBlockInTags = HtmlBlockOpenAddress (HtmlBlockInTags | !HtmlBlockCloseAddress .)* HtmlBlockCloseAddress
|
|
| HtmlBlockOpenBlockquote (HtmlBlockInTags | !HtmlBlockCloseBlockquote .)* HtmlBlockCloseBlockquote
|
|
| HtmlBlockOpenCenter (HtmlBlockInTags | !HtmlBlockCloseCenter .)* HtmlBlockCloseCenter
|
|
| HtmlBlockOpenDir (HtmlBlockInTags | !HtmlBlockCloseDir .)* HtmlBlockCloseDir
|
|
| HtmlBlockOpenDiv (HtmlBlockInTags | !HtmlBlockCloseDiv .)* HtmlBlockCloseDiv
|
|
| HtmlBlockOpenDl (HtmlBlockInTags | !HtmlBlockCloseDl .)* HtmlBlockCloseDl
|
|
| HtmlBlockOpenFieldset (HtmlBlockInTags | !HtmlBlockCloseFieldset .)* HtmlBlockCloseFieldset
|
|
| HtmlBlockOpenForm (HtmlBlockInTags | !HtmlBlockCloseForm .)* HtmlBlockCloseForm
|
|
| HtmlBlockOpenH1 (HtmlBlockInTags | !HtmlBlockCloseH1 .)* HtmlBlockCloseH1
|
|
| HtmlBlockOpenH2 (HtmlBlockInTags | !HtmlBlockCloseH2 .)* HtmlBlockCloseH2
|
|
| HtmlBlockOpenH3 (HtmlBlockInTags | !HtmlBlockCloseH3 .)* HtmlBlockCloseH3
|
|
| HtmlBlockOpenH4 (HtmlBlockInTags | !HtmlBlockCloseH4 .)* HtmlBlockCloseH4
|
|
| HtmlBlockOpenH5 (HtmlBlockInTags | !HtmlBlockCloseH5 .)* HtmlBlockCloseH5
|
|
| HtmlBlockOpenH6 (HtmlBlockInTags | !HtmlBlockCloseH6 .)* HtmlBlockCloseH6
|
|
| HtmlBlockOpenMenu (HtmlBlockInTags | !HtmlBlockCloseMenu .)* HtmlBlockCloseMenu
|
|
| HtmlBlockOpenNoframes (HtmlBlockInTags | !HtmlBlockCloseNoframes .)* HtmlBlockCloseNoframes
|
|
| HtmlBlockOpenNoscript (HtmlBlockInTags | !HtmlBlockCloseNoscript .)* HtmlBlockCloseNoscript | HtmlBlockOpenOl (HtmlBlockInTags | !HtmlBlockCloseOl .)* HtmlBlockCloseOl
|
|
| HtmlBlockOpenP (HtmlBlockInTags | !HtmlBlockCloseP .)* HtmlBlockCloseP
|
|
| HtmlBlockOpenPre (HtmlBlockInTags | !HtmlBlockClosePre .)* HtmlBlockClosePre
|
|
| HtmlBlockOpenTable (HtmlBlockInTags | !HtmlBlockCloseTable .)* HtmlBlockCloseTable
|
|
| HtmlBlockOpenUl (HtmlBlockInTags | !HtmlBlockCloseUl .)* HtmlBlockCloseUl
|
|
| HtmlBlockOpenDd (HtmlBlockInTags | !HtmlBlockCloseDd .)* HtmlBlockCloseDd
|
|
| HtmlBlockOpenDt (HtmlBlockInTags | !HtmlBlockCloseDt .)* HtmlBlockCloseDt
|
|
| HtmlBlockOpenFrameset (HtmlBlockInTags | !HtmlBlockCloseFrameset .)* HtmlBlockCloseFrameset
|
|
| HtmlBlockOpenLi (HtmlBlockInTags | !HtmlBlockCloseLi .)* HtmlBlockCloseLi
|
|
| HtmlBlockOpenTbody (HtmlBlockInTags | !HtmlBlockCloseTbody .)* HtmlBlockCloseTbody
|
|
| HtmlBlockOpenTd (HtmlBlockInTags | !HtmlBlockCloseTd .)* HtmlBlockCloseTd
|
|
| HtmlBlockOpenTfoot (HtmlBlockInTags | !HtmlBlockCloseTfoot .)* HtmlBlockCloseTfoot
|
|
| HtmlBlockOpenTh (HtmlBlockInTags | !HtmlBlockCloseTh .)* HtmlBlockCloseTh
|
|
| HtmlBlockOpenThead (HtmlBlockInTags | !HtmlBlockCloseThead .)* HtmlBlockCloseThead
|
|
| HtmlBlockOpenTr (HtmlBlockInTags | !HtmlBlockCloseTr .)* HtmlBlockCloseTr
|
|
| HtmlBlockOpenScript (HtmlBlockInTags | !HtmlBlockCloseScript .)* HtmlBlockCloseScript
|
|
|
|
HtmlBlock = &'<' < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) >
|
|
BlankLine+
|
|
{ if p.extension.FilterHTML {
|
|
$$ = mk_list(LIST, nil)
|
|
} else {
|
|
$$ = mk_str(yytext)
|
|
$$.key = HTMLBLOCK
|
|
}
|
|
}
|
|
|
|
HtmlBlockSelfClosing = '<' Spnl HtmlBlockType Spnl HtmlAttribute* '/' Spnl '>'
|
|
|
|
HtmlBlockType = "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" |
|
|
"h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" |
|
|
"ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" |
|
|
"ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" |
|
|
"H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE" | "TABLE" |
|
|
"UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT"
|
|
|
|
StyleOpen = '<' Spnl ("style" | "STYLE") Spnl HtmlAttribute* '>'
|
|
StyleClose = '<' Spnl '/' ("style" | "STYLE") Spnl '>'
|
|
InStyleTags = StyleOpen (!StyleClose .)* StyleClose
|
|
StyleBlock = < InStyleTags >
|
|
BlankLine*
|
|
{ if p.extension.FilterStyles {
|
|
$$ = mk_list(LIST, nil)
|
|
} else {
|
|
$$ = mk_str(yytext)
|
|
$$.key = HTMLBLOCK
|
|
}
|
|
}
|
|
|
|
Inlines = a:StartList ( !Endline Inline { a = cons($$, a) }
|
|
| c:Endline &Inline { a = cons(c, a) } )+ Endline?
|
|
{ $$ = mk_list(LIST, a) }
|
|
|
|
Inline = Str
|
|
| Endline
|
|
| UlOrStarLine
|
|
| Space
|
|
| Strong
|
|
| Emph
|
|
| Image
|
|
| Link
|
|
| NoteReference
|
|
| InlineNote
|
|
| Code
|
|
| RawHtml
|
|
| Entity
|
|
| EscapedChar
|
|
| Smart
|
|
| Symbol
|
|
|
|
Space = Spacechar+
|
|
{ $$ = mk_str(" ")
|
|
$$.key = SPACE }
|
|
|
|
Str = < NormalChar (NormalChar | '_'+ &NormalChar)* >
|
|
{ $$ = mk_str(yytext) }
|
|
|
|
EscapedChar = '\\' !Newline < [-\\`|*_{}[\]()#+.!><] >
|
|
{ $$ = mk_str(yytext) }
|
|
|
|
Entity = ( HexEntity | DecEntity | CharEntity )
|
|
{ $$ = mk_str(yytext); $$.key = HTML }
|
|
|
|
Endline = LineBreak | TerminalEndline | NormalEndline
|
|
|
|
NormalEndline = Sp Newline !BlankLine !'>' !AtxStart
|
|
!(Line ("===" '='* | "---" '-'*) Newline)
|
|
{ $$ = mk_str("\n")
|
|
$$.key = SPACE }
|
|
|
|
TerminalEndline = Sp Newline Eof
|
|
{ $$ = nil }
|
|
|
|
LineBreak = " " NormalEndline
|
|
{ $$ = mk_element(LINEBREAK) }
|
|
|
|
Symbol = < SpecialChar >
|
|
{ $$ = mk_str(yytext) }
|
|
|
|
# This keeps the parser from getting bogged down on long strings of '*' or '_',
|
|
# or strings of '*' or '_' with space on each side:
|
|
UlOrStarLine = (UlLine | StarLine) { $$ = mk_str(yytext) }
|
|
StarLine = < "****" '*'* > | < Spacechar '*'+ &Spacechar >
|
|
UlLine = < "____" '_'* > | < Spacechar '_'+ &Spacechar >
|
|
|
|
Emph = EmphStar | EmphUl
|
|
|
|
OneStarOpen = !StarLine '*' !Spacechar !Newline
|
|
OneStarClose = !Spacechar !Newline a:Inline !StrongStar '*' { $$ = a }
|
|
|
|
EmphStar = OneStarOpen
|
|
a:StartList
|
|
( !OneStarClose Inline { a = cons($$, a) } )*
|
|
OneStarClose { a = cons($$, a) }
|
|
{ $$ = mk_list(EMPH, a) }
|
|
|
|
OneUlOpen = !UlLine '_' !Spacechar !Newline
|
|
OneUlClose = !Spacechar !Newline a:Inline !StrongUl '_' !Alphanumeric { $$ = a }
|
|
|
|
EmphUl = OneUlOpen
|
|
a:StartList
|
|
( !OneUlClose Inline { a = cons($$, a) } )*
|
|
OneUlClose { a = cons($$, a) }
|
|
{ $$ = mk_list(EMPH, a) }
|
|
|
|
Strong = StrongStar | StrongUl
|
|
|
|
TwoStarOpen = !StarLine "**" !Spacechar !Newline
|
|
TwoStarClose = !Spacechar !Newline a:Inline "**" { $$ = a }
|
|
|
|
StrongStar = TwoStarOpen
|
|
a:StartList
|
|
( !TwoStarClose Inline { a = cons($$, a) } )*
|
|
TwoStarClose { a = cons($$, a) }
|
|
{ $$ = mk_list(STRONG, a) }
|
|
|
|
TwoUlOpen = !UlLine "__" !Spacechar !Newline
|
|
TwoUlClose = !Spacechar !Newline a:Inline "__" !Alphanumeric { $$ = a }
|
|
|
|
StrongUl = TwoUlOpen
|
|
a:StartList
|
|
( !TwoUlClose Inline { a = cons($$, a) } )*
|
|
TwoUlClose { a = cons($$, a) }
|
|
{ $$ = mk_list(STRONG, a) }
|
|
|
|
Image = '!' ( ExplicitLink | ReferenceLink )
|
|
{ $$.key = IMAGE }
|
|
|
|
Link = ExplicitLink | ReferenceLink | AutoLink
|
|
|
|
ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle
|
|
|
|
ReferenceLinkDouble = a:Label < Spnl > !"[]" b:Label
|
|
{
|
|
if match, found := p.findReference(b.children); found {
|
|
$$ = mk_link(a.children, match.url, match.title);
|
|
a = nil
|
|
b = nil
|
|
} else {
|
|
result := mk_element(LIST)
|
|
result.children = cons(mk_str("["), cons(a, cons(mk_str("]"), cons(mk_str(yytext),
|
|
cons(mk_str("["), cons(b, mk_str("]")))))))
|
|
$$ = result
|
|
}
|
|
}
|
|
|
|
ReferenceLinkSingle = a:Label < (Spnl "[]")? >
|
|
{
|
|
if match, found := p.findReference(a.children); found {
|
|
$$ = mk_link(a.children, match.url, match.title)
|
|
a = nil
|
|
} else {
|
|
result := mk_element(LIST)
|
|
result.children = cons(mk_str("["), cons(a, cons(mk_str("]"), mk_str(yytext))));
|
|
$$ = result
|
|
}
|
|
}
|
|
|
|
ExplicitLink = l:Label Spnl '(' Sp s:Source Spnl t:Title Sp ')'
|
|
{ $$ = mk_link(l.children, s.contents.str, t.contents.str)
|
|
s = nil
|
|
t = nil
|
|
l = nil }
|
|
|
|
Source = ( '<' < SourceContents > '>' | < SourceContents > )
|
|
{ $$ = mk_str(yytext) }
|
|
|
|
SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')*
|
|
| ""
|
|
|
|
Title = ( TitleSingle | TitleDouble | < "" > )
|
|
{ $$ = mk_str(yytext) }
|
|
|
|
TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) . )* > '\''
|
|
|
|
TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) . )* > '"'
|
|
|
|
AutoLink = AutoLinkUrl | AutoLinkEmail
|
|
|
|
AutoLinkUrl = '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>'
|
|
{ $$ = mk_link(mk_str(yytext), yytext, "") }
|
|
|
|
AutoLinkEmail = '<' < [-A-Za-z0-9+_]+ '@' ( !Newline !'>' . )+ > '>'
|
|
{
|
|
$$ = mk_link(mk_str(yytext), "mailto:"+yytext, "")
|
|
}
|
|
|
|
Reference = NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc Spnl t:RefTitle BlankLine*
|
|
{ $$ = mk_link(l.children, s.contents.str, t.contents.str)
|
|
s = nil
|
|
t = nil
|
|
l = nil
|
|
$$.key = REFERENCE }
|
|
|
|
Label = '[' ( !'^' &{ p.extension.Notes } | &. &{ !p.extension.Notes } )
|
|
a:StartList
|
|
( !']' Inline { a = cons($$, a) } )*
|
|
']'
|
|
{ $$ = mk_list(LIST, a) }
|
|
|
|
RefSrc = < Nonspacechar+ >
|
|
{ $$ = mk_str(yytext)
|
|
$$.key = HTML }
|
|
|
|
RefTitle = ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle )
|
|
{ $$ = mk_str(yytext) }
|
|
|
|
EmptyTitle = < "" >
|
|
|
|
RefTitleSingle = '\'' < ( !( '\'' Sp Newline | Newline ) . )* > '\''
|
|
|
|
RefTitleDouble = '"' < ( !('"' Sp Newline | Newline) . )* > '"'
|
|
|
|
RefTitleParens = '(' < ( !(')' Sp Newline | Newline) . )* > ')'
|
|
|
|
References = a:StartList
|
|
( b:Reference { a = cons(b, a) } | SkipBlock )*
|
|
{ p.references = reverse(a) }
|
|
commit
|
|
|
|
Ticks1 = "`" !'`'
|
|
Ticks2 = "``" !'`'
|
|
Ticks3 = "```" !'`'
|
|
Ticks4 = "````" !'`'
|
|
Ticks5 = "`````" !'`'
|
|
|
|
Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks1
|
|
| Ticks2 Sp < ( ( !'`' Nonspacechar )+ | !Ticks2 '`'+ | !( Sp Ticks2 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks2
|
|
| Ticks3 Sp < ( ( !'`' Nonspacechar )+ | !Ticks3 '`'+ | !( Sp Ticks3 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks3
|
|
| Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4
|
|
| Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5
|
|
)
|
|
{ $$ = mk_str(yytext); $$.key = CODE }
|
|
|
|
RawHtml = < (HtmlComment | HtmlTag) >
|
|
{ if p.extension.FilterHTML {
|
|
$$ = mk_list(LIST, nil)
|
|
} else {
|
|
$$ = mk_str(yytext)
|
|
$$.key = HTML
|
|
}
|
|
}
|
|
|
|
BlankLine = Sp Newline
|
|
|
|
Quoted = '"' (!'"' .)* '"' | '\'' (!'\'' .)* '\''
|
|
HtmlAttribute = (Alphanumeric | '-')+ Spnl ('=' Spnl (Quoted | (!'>' Nonspacechar)+))? Spnl
|
|
HtmlComment = "<!--" (!"-->" .)* "-->"
|
|
HtmlTag = '<' Spnl '/'? Alphanumeric+ Spnl HtmlAttribute* '/'? Spnl '>'
|
|
Eof = !.
|
|
Spacechar = ' ' | '\t'
|
|
Nonspacechar = !Spacechar !Newline .
|
|
Newline = '\n' | '\r' '\n'?
|
|
Sp = Spacechar*
|
|
Spnl = Sp (Newline Sp)?
|
|
SpecialChar = '*' | '_' | '`' | '&' | '[' | ']' | '<' | '!' | '\\' | ExtendedSpecialChar
|
|
NormalChar = !( SpecialChar | Spacechar | Newline ) .
|
|
Alphanumeric = [A-Za-z0-9]
|
|
Digit = [0-9]
|
|
|
|
HexEntity = < '&' '#' [Xx] [0-9a-fA-F]+ ';' >
|
|
DecEntity = < '&' '#' [0-9]+ > ';' >
|
|
CharEntity = < '&' [A-Za-z0-9]+ ';' >
|
|
|
|
NonindentSpace = " " | " " | " " | ""
|
|
Indent = "\t" | " "
|
|
IndentedLine = Indent Line
|
|
OptionallyIndentedLine = Indent? Line
|
|
|
|
# StartList starts a list data structure that can be added to with cons:
|
|
StartList = &.
|
|
{ $$ = nil }
|
|
|
|
Line = RawLine
|
|
{ $$ = mk_str(yytext) }
|
|
RawLine = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof )
|
|
|
|
SkipBlock = ( !BlankLine RawLine )+ BlankLine*
|
|
| BlankLine+
|
|
|
|
# Syntax extensions
|
|
|
|
ExtendedSpecialChar = &{ p.extension.Smart } ('.' | '-' | '\'' | '"')
|
|
| &{ p.extension.Notes } ( '^' )
|
|
|
|
Smart = &{ p.extension.Smart }
|
|
( Ellipsis | Dash | SingleQuoted | DoubleQuoted | Apostrophe )
|
|
|
|
Apostrophe = '\''
|
|
{ $$ = mk_element(APOSTROPHE) }
|
|
|
|
Ellipsis = ("..." | ". . .")
|
|
{ $$ = mk_element(ELLIPSIS) }
|
|
|
|
Dash = EmDash | EnDash
|
|
|
|
EnDash = '-' &Digit
|
|
{ $$ = mk_element(ENDASH) }
|
|
|
|
EmDash = ("---" | "--")
|
|
{ $$ = mk_element(EMDASH) }
|
|
|
|
SingleQuoteStart = '\'' ![)!\],.;:-? \t\n] !( ( "s" | "t" | "m" | "ve" | "ll" | "re" ) !Alphanumeric )
|
|
|
|
SingleQuoteEnd = '\'' !Alphanumeric
|
|
|
|
SingleQuoted = SingleQuoteStart
|
|
a:StartList
|
|
( !SingleQuoteEnd b:Inline { a = cons(b, a) } )+
|
|
SingleQuoteEnd
|
|
{ $$ = mk_list(SINGLEQUOTED, a) }
|
|
|
|
DoubleQuoteStart = '"'
|
|
|
|
DoubleQuoteEnd = '"'
|
|
|
|
DoubleQuoted = DoubleQuoteStart
|
|
a:StartList
|
|
( !DoubleQuoteEnd b:Inline { a = cons(b, a) } )+
|
|
DoubleQuoteEnd
|
|
{ $$ = mk_list(DOUBLEQUOTED, a) }
|
|
|
|
NoteReference = &{ p.extension.Notes }
|
|
ref:RawNoteReference
|
|
{
|
|
if match, ok := p.find_note(ref.contents.str); ok {
|
|
$$ = mk_element(NOTE)
|
|
$$.children = match.children
|
|
$$.contents.str = ""
|
|
} else {
|
|
$$ = mk_str("[^"+ref.contents.str+"]")
|
|
}
|
|
}
|
|
|
|
RawNoteReference = "[^" < ( !Newline !']' . )+ > ']'
|
|
{ $$ = mk_str(yytext) }
|
|
|
|
Note = &{ p.extension.Notes }
|
|
NonindentSpace ref:RawNoteReference ':' Sp
|
|
a:StartList
|
|
( RawNoteBlock { a = cons($$, a) } )
|
|
( &Indent RawNoteBlock { a = cons($$, a) } )*
|
|
{ $$ = mk_list(NOTE, a)
|
|
$$.contents.str = ref.contents.str
|
|
}
|
|
|
|
InlineNote = &{ p.extension.Notes }
|
|
"^["
|
|
a:StartList
|
|
( !']' Inline { a = cons($$, a) } )+
|
|
']'
|
|
{ $$ = mk_list(NOTE, a)
|
|
$$.contents.str = "" }
|
|
|
|
Notes = a:StartList
|
|
( b:Note { a = cons(b, a) } | SkipBlock )*
|
|
{ p.notes = reverse(a) }
|
|
commit
|
|
|
|
RawNoteBlock = a:StartList
|
|
( !BlankLine OptionallyIndentedLine { a = cons($$, a) } )+
|
|
( < BlankLine* > { a = cons(mk_str(yytext), a) } )
|
|
{ $$ = mk_str_from_list(a, true)
|
|
$$.key = RAW
|
|
}
|
|
|
|
%%
|
|
|
|
|
|
/*
|
|
* List manipulation functions
|
|
*/
|
|
|
|
|
|
/* cons - cons an element onto a list, returning pointer to new head
|
|
*/
|
|
func cons(new, list *element) *element {
|
|
new.next = list
|
|
return new
|
|
}
|
|
|
|
/* reverse - reverse a list, returning pointer to new list
|
|
*/
|
|
func reverse(list *element) (new *element) {
|
|
for list != nil {
|
|
next := list.next
|
|
new = cons(list, new)
|
|
list = next
|
|
}
|
|
return
|
|
}
|
|
|
|
/* concat_string_list - concatenates string contents of list of STR elements.
|
|
*/
|
|
func concat_string_list(list *element) string {
|
|
s := ""
|
|
for list != nil {
|
|
s += list.contents.str
|
|
list = list.next
|
|
}
|
|
return s
|
|
}
|
|
|
|
|
|
/*
|
|
* Auxiliary functions for parsing actions.
|
|
* These make it easier to build up data structures (including lists)
|
|
* in the parsing actions.
|
|
*/
|
|
|
|
|
|
/* mk_element - generic constructor for element
|
|
*/
|
|
var elbuf []element
|
|
var elock sync.Mutex
|
|
|
|
func mk_element(key int) *element {
|
|
elock.Lock()
|
|
if len(elbuf) == 0 {
|
|
elbuf = make([]element, 1024)
|
|
}
|
|
e := &elbuf[0]
|
|
elbuf = elbuf[1:]
|
|
elock.Unlock()
|
|
e.key = key
|
|
return e
|
|
}
|
|
|
|
/* mk_str - constructor for STR element
|
|
*/
|
|
func mk_str(s string) (result *element) {
|
|
result = mk_element(STR)
|
|
result.contents.str = s
|
|
return
|
|
}
|
|
|
|
/* mk_str_from_list - makes STR element by concatenating a
|
|
* reversed list of strings, adding optional extra newline
|
|
*/
|
|
func mk_str_from_list(list *element, extra_newline bool) (result *element) {
|
|
s := concat_string_list(reverse(list))
|
|
if extra_newline {
|
|
s += "\n"
|
|
}
|
|
result = mk_element(STR)
|
|
result.contents.str = s
|
|
return
|
|
}
|
|
|
|
/* mk_list - makes new list with key 'key' and children the reverse of 'lst'.
|
|
* This is designed to be used with cons to build lists in a parser action.
|
|
* The reversing is necessary because cons adds to the head of a list.
|
|
*/
|
|
func mk_list(key int, lst *element) *element {
|
|
result := mk_element(key)
|
|
result.children = reverse(lst)
|
|
return result
|
|
}
|
|
|
|
/* mk_link - constructor for LINK element
|
|
*/
|
|
func mk_link(label *element, url, title string) *element {
|
|
result := mk_element(LINK)
|
|
result.contents.link = &link{label: label, url: url, title: title}
|
|
return result
|
|
}
|
|
|
|
|
|
/* match_inlines - returns true if inline lists match (case-insensitive...)
|
|
*/
|
|
func match_inlines(l1, l2 *element) bool {
|
|
for l1 != nil && l2 != nil {
|
|
if l1.key != l2.key {
|
|
return false
|
|
}
|
|
switch l1.key {
|
|
case SPACE, LINEBREAK, ELLIPSIS, EMDASH, ENDASH, APOSTROPHE:
|
|
break
|
|
case CODE, STR, HTML:
|
|
if strings.ToUpper(l1.contents.str) != strings.ToUpper(l2.contents.str) {
|
|
return false
|
|
}
|
|
case EMPH, STRONG, LIST, SINGLEQUOTED, DOUBLEQUOTED:
|
|
if !match_inlines(l1.children, l2.children) {
|
|
return false
|
|
}
|
|
case LINK, IMAGE:
|
|
return false /* No links or images within links */
|
|
default:
|
|
log.Exitf("match_inlines encountered unknown key = %d\n", l1.key)
|
|
}
|
|
l1 = l1.next
|
|
l2 = l2.next
|
|
}
|
|
return l1 == nil && l2 == nil /* return true if both lists exhausted */
|
|
}
|
|
|
|
|
|
/* find_reference - return true if link found in references matching label.
|
|
* 'link' is modified with the matching url and title.
|
|
*/
|
|
func (d *Doc) findReference(label *element) (*link, bool) {
|
|
for cur := d.references; cur != nil; cur = cur.next {
|
|
l := cur.contents.link
|
|
if match_inlines(label, l.label) {
|
|
return l, true
|
|
}
|
|
}
|
|
return nil, false
|
|
}
|
|
|
|
|
|
/* find_note - return true if note found in notes matching label.
|
|
* if found, 'result' is set to point to matched note.
|
|
*/
|
|
func (d *Doc) find_note(label string) (*element, bool) {
|
|
for el := d.notes; el != nil; el = el.next {
|
|
if label == el.contents.str {
|
|
return el, true
|
|
}
|
|
}
|
|
return nil, false
|
|
}
|
|
|
|
|
|
/* print tree of elements, for debugging only.
|
|
*/
|
|
func print_tree(elt *element, indent int) {
|
|
var key string
|
|
|
|
for elt != nil {
|
|
for i := 0; i < indent; i++ {
|
|
fmt.Print("\t")
|
|
}
|
|
key = keynames[elt.key]
|
|
if key == "" {
|
|
key = "?"
|
|
}
|
|
if elt.key == STR {
|
|
fmt.Printf("%p:\t%s\t'%s'\n", elt, key, elt.contents.str)
|
|
} else {
|
|
fmt.Printf("%p:\t%s %p\n", elt, key, elt.next)
|
|
}
|
|
if elt.children != nil {
|
|
print_tree(elt.children, indent+1)
|
|
}
|
|
elt = elt.next
|
|
}
|
|
}
|
|
|
|
var keynames = [numVAL]string{
|
|
LIST: "LIST",
|
|
RAW: "RAW",
|
|
SPACE: "SPACE",
|
|
LINEBREAK: "LINEBREAK",
|
|
ELLIPSIS: "ELLIPSIS",
|
|
EMDASH: "EMDASH",
|
|
ENDASH: "ENDASH",
|
|
APOSTROPHE: "APOSTROPHE",
|
|
SINGLEQUOTED: "SINGLEQUOTED",
|
|
DOUBLEQUOTED: "DOUBLEQUOTED",
|
|
STR: "STR",
|
|
LINK: "LINK",
|
|
IMAGE: "IMAGE",
|
|
CODE: "CODE",
|
|
HTML: "HTML",
|
|
EMPH: "EMPH",
|
|
STRONG: "STRONG",
|
|
PLAIN: "PLAIN",
|
|
PARA: "PARA",
|
|
LISTITEM: "LISTITEM",
|
|
BULLETLIST: "BULLETLIST",
|
|
ORDEREDLIST: "ORDEREDLIST",
|
|
H1: "H1",
|
|
H2: "H2",
|
|
H3: "H3",
|
|
H4: "H4",
|
|
H5: "H5",
|
|
H6: "H6",
|
|
BLOCKQUOTE: "BLOCKQUOTE",
|
|
VERBATIM: "VERBATIM",
|
|
HTMLBLOCK: "HTMLBLOCK",
|
|
HRULE: "HRULE",
|
|
REFERENCE: "REFERENCE",
|
|
NOTE: "NOTE",
|
|
}
|