From cca2b1a3c2c4a92213e90d620be94a4927fabdde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Teichgr=C3=A4ber?= Date: Sun, 29 Apr 2012 23:54:10 +0200 Subject: [PATCH] mk_element: turn global allocation into a per-parser, reusable allocation. This reduces memory usage within a single document, because elements allocated during parsing of one Docblock can be reused during parsing of the next Docblock. --- elemheap.go | 49 +++++++++++++ markdown.go | 3 + parser.leg | 195 ++++++++++++++++++++++++++-------------------------- 3 files changed, 148 insertions(+), 99 deletions(-) create mode 100644 elemheap.go diff --git a/elemheap.go b/elemheap.go new file mode 100644 index 0000000..e165554 --- /dev/null +++ b/elemheap.go @@ -0,0 +1,49 @@ +package markdown + +/* +Elements are not allocated one at a time, but in rows of +elemHeap.RowSize elements. After N elements have been +requested, a row is exhausted, and the next one will +be allocated. Previously allocated rows are tracked in +elemHeap.rows. + +Pos() and setPos() methods allow to query and reset the +current position (row, and position within the row), which +allows reusing elements. It must be made sure, that previous +users of such storage don't access it anymore once setPos has +been called. +*/ + +type elemHeap struct { + rows [][]element + heapPos + rowSize int +} + +type heapPos struct { + iRow int + row []element +} + +func (h *elemHeap) nextRow() []element { + h.iRow++ + if h.iRow == len(h.rows) { + h.rows = append(h.rows, make([]element, h.rowSize)) + } + h.row = h.rows[h.iRow] + return h.row +} + +func (h *elemHeap) init(size int) { + h.rowSize = size + h.rows = [][]element{make([]element, size)} + h.row = h.rows[h.iRow] +} + +func (h *elemHeap) Pos() heapPos { + return h.heapPos +} + +func (h *elemHeap) setPos(i heapPos) { + h.heapPos = i +} diff --git a/markdown.go b/markdown.go index 094b2e4..2380d38 100644 --- a/markdown.go +++ b/markdown.go @@ -47,6 +47,7 @@ func NewParser(opt *Options) (p *Parser) { p.yy.state.extension = *opt } p.yy.Init() + p.yy.state.heap.init(1024) p.preformatBuf = bytes.NewBuffer(make([]byte, 0, 32768)) return } @@ -65,6 +66,7 @@ func (p *Parser) Markdown(src io.Reader, f Formatter) { if p.yy.extension.Notes { p.parseRule(ruleNotes, s) } + savedPos := p.yy.state.heap.Pos() L: for { @@ -76,6 +78,7 @@ L: case "", "\n", "\r\n", "\n\n", "\r\n\n", "\n\n\n", "\r\n\n\n": break L } + p.yy.state.heap.setPos(savedPos) } f.Finish() } diff --git a/parser.leg b/parser.leg index 26eed6f..c79d8a5 100644 --- a/parser.leg +++ b/parser.leg @@ -25,7 +25,6 @@ import ( "io" "strings" "log" - "sync" ) // Semantic value of a parsing action. @@ -93,6 +92,7 @@ const ( type state struct { extension Options + heap elemHeap tree *element /* Results of parse. */ references *element /* List of link references found. */ notes *element /* List of footnotes found. */ @@ -136,10 +136,10 @@ Plain = a:Inlines AtxInline = !Newline !(Sp? '#'* Sp Newline) Inline AtxStart = &'#' < ( "######" | "#####" | "####" | "###" | "##" | "#" ) > - { $$ = mk_element(H1 + (len(yytext) - 1)) } + { $$ = p.mkElem(H1 + (len(yytext) - 1)) } AtxHeading = s:AtxStart Sp? a:StartList ( AtxInline { a = cons($$, a) } )+ (Sp? '#'* Sp)? Newline - { $$ = mk_list(s.key, a) + { $$ = p.mkList(s.key, a) s = nil } SetextHeading = SetextHeading1 | SetextHeading2 @@ -150,37 +150,37 @@ SetextBottom2 = "---" '-'* Newline SetextHeading1 = &(RawLine SetextBottom1) a:StartList ( !Endline Inline { a = cons($$, a) } )+ Newline - SetextBottom1 { $$ = mk_list(H1, a) } + SetextBottom1 { $$ = p.mkList(H1, a) } SetextHeading2 = &(RawLine SetextBottom2) a:StartList ( !Endline Inline { a = cons($$, a) } )+ Newline - SetextBottom2 { $$ = mk_list(H2, a) } + SetextBottom2 { $$ = p.mkList(H2, a) } Heading = AtxHeading | SetextHeading BlockQuote = a:BlockQuoteRaw - { $$ = mk_element(BLOCKQUOTE) + { $$ = p.mkElem(BLOCKQUOTE) $$.children = a } BlockQuoteRaw = a:StartList (( '>' ' '? Line { a = cons($$, a) } ) ( !'>' !BlankLine Line { a = cons($$, a) } )* - ( BlankLine { a = cons(mk_str("\n"), a) } )* + ( BlankLine { a = cons(p.mkString("\n"), a) } )* )+ - { $$ = mk_str_from_list(a, true) + { $$ = p.mkStringFromList(a, true) $$.key = RAW } NonblankIndentedLine = !BlankLine IndentedLine VerbatimChunk = a:StartList - ( BlankLine { a = cons(mk_str("\n"), a) } )* + ( BlankLine { a = cons(p.mkString("\n"), a) } )* ( NonblankIndentedLine { a = cons($$, a) } )+ - { $$ = mk_str_from_list(a, false) } + { $$ = p.mkStringFromList(a, false) } Verbatim = a:StartList ( VerbatimChunk { a = cons($$, a) } )+ - { $$ = mk_str_from_list(a, false) + { $$ = p.mkStringFromList(a, false) $$.key = VERBATIM } HorizontalRule = NonindentSpace @@ -188,7 +188,7 @@ HorizontalRule = NonindentSpace | '-' Sp '-' Sp '-' (Sp '-')* | '_' Sp '_' Sp '_' (Sp '_')*) Sp Newline BlankLine+ - { $$ = mk_element(HRULE) } + { $$ = p.mkElem(HRULE) } Bullet = !HorizontalRule NonindentSpace ('+' | '*' | '-') Spacechar+ @@ -198,7 +198,7 @@ BulletList = &Bullet (ListTight | ListLoose) ListTight = a:StartList ( ListItemTight { a = cons($$, a) } )+ BlankLine* !(Bullet | Enumerator | DefMarker) - { $$ = mk_list(LIST, a) } + { $$ = p.mkList(LIST, a) } ListLoose = a:StartList ( b:ListItem BlankLine* @@ -207,16 +207,16 @@ ListLoose = a:StartList li.contents.str += "\n\n" a = cons(b, a) } )+ - { $$ = mk_list(LIST, a) } + { $$ = p.mkList(LIST, a) } ListItem = ( Bullet | Enumerator | DefMarker ) a:StartList ListBlock { a = cons($$, a) } ( ListContinuationBlock { a = cons($$, a) } )* { - raw := mk_str_from_list(a, false) + raw := p.mkStringFromList(a, false) raw.key = RAW - $$ = mk_element(LISTITEM) + $$ = p.mkElem(LISTITEM) $$.children = raw } @@ -228,27 +228,27 @@ ListItemTight = ListContinuationBlock { a = cons($$, a) } )* !ListContinuationBlock { - raw := mk_str_from_list(a, false) + raw := p.mkStringFromList(a, false) raw.key = RAW - $$ = mk_element(LISTITEM) + $$ = p.mkElem(LISTITEM) $$.children = raw } ListBlock = a:StartList !BlankLine Line { a = cons($$, a) } ( ListBlockLine { a = cons($$, a) } )* - { $$ = mk_str_from_list(a, false) } + { $$ = p.mkStringFromList(a, false) } ListContinuationBlock = a:StartList ( < BlankLine* > { if len(yytext) == 0 { - a = cons(mk_str("\001"), a) // block separator + a = cons(p.mkString("\001"), a) // block separator } else { - a = cons(mk_str(yytext), a) + a = cons(p.mkString(yytext), a) } } ) ( Indent ListBlock { a = cons($$, a) } )+ - { $$ = mk_str_from_list(a, false) } + { $$ = p.mkStringFromList(a, false) } Enumerator = NonindentSpace [0-9]+ '.' Spacechar+ @@ -433,9 +433,9 @@ HtmlBlockInTags = HtmlBlockAddress HtmlBlock = &'<' < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) > BlankLine+ { if p.extension.FilterHTML { - $$ = mk_list(LIST, nil) + $$ = p.mkList(LIST, nil) } else { - $$ = mk_str(yytext) + $$ = p.mkString(yytext) $$.key = HTMLBLOCK } } @@ -455,16 +455,16 @@ InStyleTags = StyleOpen (!StyleClose .)* StyleClose StyleBlock = < InStyleTags > BlankLine* { if p.extension.FilterStyles { - $$ = mk_list(LIST, nil) + $$ = p.mkList(LIST, nil) } else { - $$ = mk_str(yytext) + $$ = p.mkString(yytext) $$.key = HTMLBLOCK } } Inlines = a:StartList ( !Endline Inline { a = cons($$, a) } | c:Endline &Inline { a = cons(c, a) } )+ Endline? - { $$ = mk_list(LIST, a) } + { $$ = p.mkList(LIST, a) } Inline = Str | Endline @@ -484,37 +484,37 @@ Inline = Str | Symbol Space = Spacechar+ - { $$ = mk_str(" ") + { $$ = p.mkString(" ") $$.key = SPACE } Str = < NormalChar (NormalChar | '_'+ &Alphanumeric)* > - { $$ = mk_str(yytext) } + { $$ = p.mkString(yytext) } EscapedChar = '\\' !Newline < [-\\`|*_{}[\]()#+.!><] > - { $$ = mk_str(yytext) } + { $$ = p.mkString(yytext) } Entity = ( HexEntity | DecEntity | CharEntity ) - { $$ = mk_str(yytext); $$.key = HTML } + { $$ = p.mkString(yytext); $$.key = HTML } Endline = LineBreak | TerminalEndline | NormalEndline NormalEndline = Sp Newline !BlankLine !'>' !AtxStart !(Line ("===" '='* | "---" '-'*) Newline) - { $$ = mk_str("\n") + { $$ = p.mkString("\n") $$.key = SPACE } TerminalEndline = Sp Newline Eof { $$ = nil } LineBreak = " " NormalEndline - { $$ = mk_element(LINEBREAK) } + { $$ = p.mkElem(LINEBREAK) } Symbol = < SpecialChar > - { $$ = mk_str(yytext) } + { $$ = p.mkString(yytext) } # This keeps the parser from getting bogged down on long strings of '*' or '_', # or strings of '*' or '_' with space on each side: -UlOrStarLine = (UlLine | StarLine) { $$ = mk_str(yytext) } +UlOrStarLine = (UlLine | StarLine) { $$ = p.mkString(yytext) } StarLine = < "****" '*'* > | < Spacechar '*'+ &Spacechar > UlLine = < "____" '_'* > | < Spacechar '_'+ &Spacechar > @@ -527,7 +527,7 @@ EmphStar = OneStarOpen a:StartList ( !OneStarClose Inline { a = cons($$, a) } )* OneStarClose { a = cons($$, a) } - { $$ = mk_list(EMPH, a) } + { $$ = p.mkList(EMPH, a) } OneUlOpen = !UlLine '_' !Spacechar !Newline OneUlClose = !Spacechar !Newline a:Inline !StrongUl '_' !Alphanumeric { $$ = a } @@ -536,7 +536,7 @@ EmphUl = OneUlOpen a:StartList ( !OneUlClose Inline { a = cons($$, a) } )* OneUlClose { a = cons($$, a) } - { $$ = mk_list(EMPH, a) } + { $$ = p.mkList(EMPH, a) } Strong = StrongStar | StrongUl @@ -547,7 +547,7 @@ StrongStar = TwoStarOpen a:StartList ( !TwoStarClose Inline { a = cons($$, a) } )* TwoStarClose { a = cons($$, a) } - { $$ = mk_list(STRONG, a) } + { $$ = p.mkList(STRONG, a) } TwoUlOpen = !UlLine "__" !Spacechar !Newline TwoUlClose = !Spacechar !Newline a:Inline "__" !Alphanumeric { $$ = a } @@ -556,14 +556,14 @@ StrongUl = TwoUlOpen a:StartList ( !TwoUlClose Inline { a = cons($$, a) } )* TwoUlClose { a = cons($$, a) } - { $$ = mk_list(STRONG, a) } + { $$ = p.mkList(STRONG, a) } Image = '!' ( ExplicitLink | ReferenceLink ) { if $$.key == LINK { $$.key = IMAGE } else { result := $$ - $$.children = cons(mk_str("!"), result.children) + $$.children = cons(p.mkString("!"), result.children) } } @@ -574,13 +574,13 @@ ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle ReferenceLinkDouble = a:Label < Spnl > !"[]" b:Label { if match, found := p.findReference(b.children); found { - $$ = mk_link(a.children, match.url, match.title); + $$ = p.mkLink(a.children, match.url, match.title); a = nil b = nil } else { - result := mk_element(LIST) - result.children = cons(mk_str("["), cons(a, cons(mk_str("]"), cons(mk_str(yytext), - cons(mk_str("["), cons(b, mk_str("]"))))))) + result := p.mkElem(LIST) + result.children = cons(p.mkString("["), cons(a, cons(p.mkString("]"), cons(p.mkString(yytext), + cons(p.mkString("["), cons(b, p.mkString("]"))))))) $$ = result } } @@ -588,29 +588,29 @@ ReferenceLinkDouble = a:Label < Spnl > !"[]" b:Label ReferenceLinkSingle = a:Label < (Spnl "[]")? > { if match, found := p.findReference(a.children); found { - $$ = mk_link(a.children, match.url, match.title) + $$ = p.mkLink(a.children, match.url, match.title) a = nil } else { - result := mk_element(LIST) - result.children = cons(mk_str("["), cons(a, cons(mk_str("]"), mk_str(yytext)))); + result := p.mkElem(LIST) + result.children = cons(p.mkString("["), cons(a, cons(p.mkString("]"), p.mkString(yytext)))); $$ = result } } ExplicitLink = l:Label Spnl '(' Sp s:Source Spnl t:Title Sp ')' - { $$ = mk_link(l.children, s.contents.str, t.contents.str) + { $$ = p.mkLink(l.children, s.contents.str, t.contents.str) s = nil t = nil l = nil } Source = ( '<' < SourceContents > '>' | < SourceContents > ) - { $$ = mk_str(yytext) } + { $$ = p.mkString(yytext) } SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')* | "" Title = ( TitleSingle | TitleDouble | < "" > ) - { $$ = mk_str(yytext) } + { $$ = p.mkString(yytext) } TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) . )* > '\'' @@ -619,15 +619,15 @@ TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) . )* > '"' AutoLink = AutoLinkUrl | AutoLinkEmail AutoLinkUrl = '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>' - { $$ = mk_link(mk_str(yytext), yytext, "") } + { $$ = p.mkLink(p.mkString(yytext), yytext, "") } AutoLinkEmail = '<' < [-A-Za-z0-9+_]+ '@' ( !Newline !'>' . )+ > '>' { - $$ = mk_link(mk_str(yytext), "mailto:"+yytext, "") + $$ = p.mkLink(p.mkString(yytext), "mailto:"+yytext, "") } Reference = NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc Spnl t:RefTitle BlankLine* - { $$ = mk_link(l.children, s.contents.str, t.contents.str) + { $$ = p.mkLink(l.children, s.contents.str, t.contents.str) s = nil t = nil l = nil @@ -637,14 +637,14 @@ Label = '[' ( !'^' &{ p.extension.Notes } | &. &{ !p.extension.Notes } ) a:StartList ( !']' Inline { a = cons($$, a) } )* ']' - { $$ = mk_list(LIST, a) } + { $$ = p.mkList(LIST, a) } RefSrc = < Nonspacechar+ > - { $$ = mk_str(yytext) + { $$ = p.mkString(yytext) $$.key = HTML } RefTitle = ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle ) - { $$ = mk_str(yytext) } + { $$ = p.mkString(yytext) } EmptyTitle = < "" > @@ -671,13 +671,13 @@ Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( | Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4 | Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5 ) - { $$ = mk_str(yytext); $$.key = CODE } + { $$ = p.mkString(yytext); $$.key = CODE } RawHtml = < (HtmlComment | HtmlTag) > { if p.extension.FilterHTML { - $$ = mk_list(LIST, nil) + $$ = p.mkList(LIST, nil) } else { - $$ = mk_str(yytext) + $$ = p.mkString(yytext) $$.key = HTML } } @@ -715,7 +715,7 @@ StartList = &. { $$ = nil } Line = RawLine - { $$ = mk_str(yytext) } + { $$ = p.mkString(yytext) } RawLine = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof ) SkipBlock = ( !BlankLine RawLine )+ BlankLine* @@ -730,18 +730,18 @@ Smart = &{ p.extension.Smart } ( Ellipsis | Dash | SingleQuoted | DoubleQuoted | Apostrophe ) Apostrophe = '\'' - { $$ = mk_element(APOSTROPHE) } + { $$ = p.mkElem(APOSTROPHE) } Ellipsis = ("..." | ". . .") - { $$ = mk_element(ELLIPSIS) } + { $$ = p.mkElem(ELLIPSIS) } Dash = EmDash | EnDash EnDash = '-' &Digit - { $$ = mk_element(ENDASH) } + { $$ = p.mkElem(ENDASH) } EmDash = ("---" | "--") - { $$ = mk_element(EMDASH) } + { $$ = p.mkElem(EMDASH) } SingleQuoteStart = '\'' ![)!\],.;:-? \t\n] !( ( "s" | "t" | "m" | "ve" | "ll" | "re" ) !Alphanumeric ) @@ -751,7 +751,7 @@ SingleQuoted = SingleQuoteStart a:StartList ( !SingleQuoteEnd b:Inline { a = cons(b, a) } )+ SingleQuoteEnd - { $$ = mk_list(SINGLEQUOTED, a) } + { $$ = p.mkList(SINGLEQUOTED, a) } DoubleQuoteStart = '"' @@ -761,29 +761,29 @@ DoubleQuoted = DoubleQuoteStart a:StartList ( !DoubleQuoteEnd b:Inline { a = cons(b, a) } )+ DoubleQuoteEnd - { $$ = mk_list(DOUBLEQUOTED, a) } + { $$ = p.mkList(DOUBLEQUOTED, a) } NoteReference = &{ p.extension.Notes } ref:RawNoteReference { if match, ok := p.find_note(ref.contents.str); ok { - $$ = mk_element(NOTE) + $$ = p.mkElem(NOTE) $$.children = match.children $$.contents.str = "" } else { - $$ = mk_str("[^"+ref.contents.str+"]") + $$ = p.mkString("[^"+ref.contents.str+"]") } } RawNoteReference = "[^" < ( !Newline !']' . )+ > ']' - { $$ = mk_str(yytext) } + { $$ = p.mkString(yytext) } Note = &{ p.extension.Notes } NonindentSpace ref:RawNoteReference ':' Sp a:StartList ( RawNoteBlock { a = cons($$, a) } ) ( &Indent RawNoteBlock { a = cons($$, a) } )* - { $$ = mk_list(NOTE, a) + { $$ = p.mkList(NOTE, a) $$.contents.str = ref.contents.str } @@ -792,7 +792,7 @@ InlineNote = &{ p.extension.Notes } a:StartList ( !']' Inline { a = cons($$, a) } )+ ']' - { $$ = mk_list(NOTE, a) + { $$ = p.mkList(NOTE, a) $$.contents.str = "" } Notes = a:StartList @@ -802,8 +802,8 @@ Notes = a:StartList RawNoteBlock = a:StartList ( !BlankLine OptionallyIndentedLine { a = cons($$, a) } )+ - ( < BlankLine* > { a = cons(mk_str(yytext), a) } ) - { $$ = mk_str_from_list(a, true) + ( < BlankLine* > { a = cons(p.mkString(yytext), a) } ) + { $$ = p.mkStringFromList(a, true) $$.key = RAW } @@ -811,7 +811,7 @@ RawNoteBlock = a:StartList DefinitionList = &{ p.extension.Dlists } a:StartList ( Definition { a = cons($$, a) } )+ - { $$ = mk_list(DEFINITIONLIST, a) } + { $$ = p.mkList(DEFINITIONLIST, a) } Definition = &( (NonindentSpace !Defmark Nonspacechar RawLine) BlankLine? Defmark) a:StartList @@ -822,13 +822,13 @@ Definition = &( (NonindentSpace !Defmark Nonspacechar RawLine) BlankLine? Defmar } a = cons($$, a) } - { $$ = mk_list(LIST, a) } + { $$ = p.mkList(LIST, a) } DListTitle = NonindentSpace !Defmark &Nonspacechar a:StartList (!Endline Inline { a = cons($$, a) } )+ Sp Newline - { $$ = mk_list(LIST, a) + { $$ = p.mkList(LIST, a) $$.key = DEFTITLE } @@ -873,35 +873,32 @@ func reverse(list *element) (new *element) { */ -/* mk_element - generic constructor for element +/* p.mkElem - generic constructor for element */ -var elbuf []element -var elock sync.Mutex - -func mk_element(key int) *element { - elock.Lock() - if len(elbuf) == 0 { - elbuf = make([]element, 1024) +func (p *yyParser) mkElem(key int) *element { + r := p.state.heap.row + if len(r) == 0 { + r = p.state.heap.nextRow() } - e := &elbuf[0] - elbuf = elbuf[1:] - elock.Unlock() + e := &r[0] + *e = element{} + p.state.heap.row = r[1:] e.key = key return e } -/* mk_str - constructor for STR element +/* p.mkString - constructor for STR element */ -func mk_str(s string) (result *element) { - result = mk_element(STR) +func (p *yyParser) mkString(s string) (result *element) { + result = p.mkElem(STR) result.contents.str = s return } -/* mk_str_from_list - makes STR element by concatenating a +/* p.mkStringFromList - makes STR element by concatenating a * reversed list of strings, adding optional extra newline */ -func mk_str_from_list(list *element, extra_newline bool) (result *element) { +func (p *yyParser) mkStringFromList(list *element, extra_newline bool) (result *element) { s := "" for list = reverse(list); list != nil; list = list.next { s += list.contents.str @@ -910,25 +907,25 @@ func mk_str_from_list(list *element, extra_newline bool) (result *element) { if extra_newline { s += "\n" } - result = mk_element(STR) + result = p.mkElem(STR) result.contents.str = s return } -/* mk_list - makes new list with key 'key' and children the reverse of 'lst'. +/* p.mkList - makes new list with key 'key' and children the reverse of 'lst'. * This is designed to be used with cons to build lists in a parser action. * The reversing is necessary because cons adds to the head of a list. */ -func mk_list(key int, lst *element) (el *element) { - el = mk_element(key) +func (p *yyParser) mkList(key int, lst *element) (el *element) { + el = p.mkElem(key) el.children = reverse(lst) return } -/* mk_link - constructor for LINK element +/* p.mkLink - constructor for LINK element */ -func mk_link(label *element, url, title string) (el *element) { - el = mk_element(LINK) +func (p *yyParser) mkLink(label *element, url, title string) (el *element) { + el = p.mkElem(LINK) el.contents.link = &link{label: label, url: url, title: title} return }