markdown/markdown.go

/*  Original C version https://github.com/jgm/peg-markdown/
 *	Copyright 2008 John MacFarlane (jgm at berkeley dot edu).
 *
 *  Modifications and translation from C into Go
 *  based on markdown_lib.c and parsing_functions.c
 *	Copyright 2010 Michael Teichgräber (mt at wmipf dot de)
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License or the MIT
 *  license.  See LICENSE for details.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 */

package markdown

import (
	"bytes"
	"io"
	"log"
	"strings"
)

const (
	// If you get a build error message saying that
	// parserIfaceVersion_N is undefined, parser.leg.go
	// either is not present or it is out of date. You should
	// rebuild it using
	//	make nuke
	//	make parser
	needParserIfaceVersion = parserIfaceVersion_17
)

// Markdown Extensions.
type Extensions struct {
	Smart        bool
	Notes        bool
	FilterHTML   bool
	FilterStyles bool
	Strike       bool
	Dlists       bool
}

type Parser struct {
	yy           yyParser
	preformatBuf *bytes.Buffer
}

// NewParser creates an instance of a parser. It can be reused
// so that stacks and buffers need not be allocated anew for
// each Markdown call.
func NewParser(x *Extensions) (p *Parser) {
	p = new(Parser)
	if x != nil {
		p.yy.state.extension = *x
	}
	p.yy.Init()
	p.yy.state.heap.init(1024)
	p.preformatBuf = bytes.NewBuffer(make([]byte, 0, 32768))
	return
}

// A Formatter is called repeatedly, one Markdown block at a time,
// while the document is parsed. At the end of a document the Finish
// method is called, which may, for example, print footnotes.
// A Formatter can be reused.
type Formatter interface {
	FormatBlock(*element)
	Finish()
}

// Markdown parses input from an io.Reader into a tree, and sends
// parsed blocks to a Formatter
func (p *Parser) Markdown(src io.Reader, f Formatter) {
	s := p.preformat(src)

	p.parseRule(ruleReferences, s)
	if p.yy.extension.Notes {
		p.parseRule(ruleNotes, s)
	}
	savedPos := p.yy.state.heap.Pos()

	for {
		tree := p.parseRule(ruleDocblock, s)
		if tree == nil {
			break
		}
		s = p.yy.ResetBuffer("")
		tree = p.processRawBlocks(tree)
		f.FormatBlock(tree)
		p.yy.state.heap.setPos(savedPos)
	}
	f.Finish()
}

func (p *Parser) parseRule(rule int, s string) (tree *element) {
	old := p.yy.ResetBuffer(s)
	if old != "" && strings.Trim(old, "\r\n ") != "" {
		log.Fatalln("Buffer not empty", "["+old+"]")
	}
	err := p.yy.Parse(rule)
	switch rule {
	case ruleDoc, ruleDocblock:
		if err == nil {
			tree = p.yy.state.tree
		}
		p.yy.state.tree = nil
	}
	return
}

/* process_raw_blocks - traverses an element list, replacing any RAW elements with
 * the result of parsing them as markdown text, and recursing into the children
 * of parent elements.  The result should be a tree of elements without any RAWs.
 */
func (p *Parser) processRawBlocks(input *element) *element {

	for current := input; current != nil; current = current.next {
		if current.key == RAW {
			/* \001 is used to indicate boundaries between nested lists when there
			 * is no blank line.  We split the string by \001 and parse
			 * each chunk separately.
			 */
			current.key = LIST
			current.children = nil
			listEnd := &current.children
			for _, contents := range strings.Split(current.contents.str, "\001") {
				if list := p.parseRule(ruleDoc, contents); list != nil {
					*listEnd = list
					for list.next != nil {
						list = list.next
					}
					listEnd = &list.next
				}
			}
			current.contents.str = ""
		}
		if current.children != nil {
			current.children = p.processRawBlocks(current.children)
		}
	}
	return input
}

const (
	TABSTOP = 4
)

/* preformat - allocate and copy text buffer while
 * performing tab expansion.
 */
func (p *Parser) preformat(r io.Reader) (s string) {
	charstotab := TABSTOP
	buf := make([]byte, 32768)

	b := p.preformatBuf
	b.Reset()
	for {
		n, err := r.Read(buf)
		if err != nil {
			break
		}
		i0 := 0
		for i, c := range buf[:n] {
			switch c {
			case '\t':
				b.Write(buf[i0:i])
				for ; charstotab > 0; charstotab-- {
					b.WriteByte(' ')
				}
				i0 = i + 1
			case '\n':
				b.Write(buf[i0 : i+1])
				i0 = i + 1
				charstotab = TABSTOP
			default:
				charstotab--
			}
			if charstotab == 0 {
				charstotab = TABSTOP
			}
		}
		b.Write(buf[i0:n])
	}

	b.WriteString("\n\n")
	return b.String()
}
initial import 2010-11-21 22:04:39 +00:00			`/* Original C version https://github.com/jgm/peg-markdown/`
			`* Copyright 2008 John MacFarlane (jgm at berkeley dot edu).`
			`*`
			`* Modifications and translation from C into Go`
			`* based on markdown_lib.c and parsing_functions.c`
			`* Copyright 2010 Michael Teichgräber (mt at wmipf dot de)`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License or the MIT`
			`* license. See LICENSE for details.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*/`

			`package markdown`

			`import (`
preformat: use bytes.Buffer instead of concatenating strings 2010-11-24 17:54:12 +00:00			`"bytes"`
Parse: take an io.Reader argument instead of a string 2012-04-20 23:19:46 +00:00			`"io"`
use a single yyParser instance only, and reuse initialized parser 2010-11-24 18:36:10 +00:00			`"log"`
go fmt 2012-04-20 11:36:59 +00:00			`"strings"`
initial import 2010-11-21 22:04:39 +00:00			`)`

add a mechanism that makes sure only an up-to-date parser.leg.go can be used 2012-04-29 22:55:37 +00:00			`const (`
			`// If you get a build error message saying that`
			`// parserIfaceVersion_N is undefined, parser.leg.go`
			`// either is not present or it is out of date. You should`
			`// rebuild it using`
			`// make nuke`
			`// make parser`
update parser interface 2013-06-12 12:17:16 +00:00			`needParserIfaceVersion = parserIfaceVersion_17`
add a mechanism that makes sure only an up-to-date parser.leg.go can be used 2012-04-29 22:55:37 +00:00			`)`

rename: Options -> Extensions 2012-05-04 14:20:29 +00:00			`// Markdown Extensions.`
			`type Extensions struct {`
go fmt 2012-04-20 11:36:59 +00:00			`Smart bool`
			`Notes bool`
			`FilterHTML bool`
			`FilterStyles bool`
disable strike-through extension by default [jgm/peg-markdown] See https://github.com/jgm/peg-markdown/commit/9556c54 2013-06-11 00:00:44 +00:00			`Strike bool`
go fmt 2012-04-20 11:36:59 +00:00			`Dlists bool`
replace extension flags by Extension, a struct of boolean values 2010-11-24 18:56:25 +00:00			`}`

new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`type Parser struct {`
			`yy yyParser`
			`preformatBuf *bytes.Buffer`
			`}`
initial import 2010-11-21 22:04:39 +00:00
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`// NewParser creates an instance of a parser. It can be reused`
			`// so that stacks and buffers need not be allocated anew for`
			`// each Markdown call.`
rename: Options -> Extensions 2012-05-04 14:20:29 +00:00			`func NewParser(x Extensions) (p Parser) {`
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`p = new(Parser)`
rename: Options -> Extensions 2012-05-04 14:20:29 +00:00			`if x != nil {`
			`p.yy.state.extension = *x`
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`}`
			`p.yy.Init()`
mk_element: turn global allocation into a per-parser, reusable allocation. This reduces memory usage within a single document, because elements allocated during parsing of one Docblock can be reused during parsing of the next Docblock. 2012-04-29 21:54:10 +00:00			`p.yy.state.heap.init(1024)`
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`p.preformatBuf = bytes.NewBuffer(make([]byte, 0, 32768))`
			`return`
			`}`
use a single yyParser instance only, and reuse initialized parser 2010-11-24 18:36:10 +00:00
markdown.go: document Formatter 2012-08-31 22:25:43 +00:00			`// A Formatter is called repeatedly, one Markdown block at a time,`
			`// while the document is parsed. At the end of a document the Finish`
			`// method is called, which may, for example, print footnotes.`
			`// A Formatter can be reused.`
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`type Formatter interface {`
			`FormatBlock(*element)`
			`Finish()`
			`}`

			`// Markdown parses input from an io.Reader into a tree, and sends`
			`// parsed blocks to a Formatter`
			`func (p *Parser) Markdown(src io.Reader, f Formatter) {`
			`s := p.preformat(src)`
initial import 2010-11-21 22:04:39 +00:00
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`p.parseRule(ruleReferences, s)`
			`if p.yy.extension.Notes {`
			`p.parseRule(ruleNotes, s)`
initial import 2010-11-21 22:04:39 +00:00			`}`
mk_element: turn global allocation into a per-parser, reusable allocation. This reduces memory usage within a single document, because elements allocated during parsing of one Docblock can be reused during parsing of the next Docblock. 2012-04-29 21:54:10 +00:00			`savedPos := p.yy.state.heap.Pos()`
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00
			`for {`
			`tree := p.parseRule(ruleDocblock, s)`
parseRule: treat the occurrence of a parse error as a valid condition The previous implementation was too strict. A parse error can occur if the input is a single line consisting of spaces and tabs only. 2012-05-29 23:29:37 +00:00			`if tree == nil {`
			`break`
			`}`
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`s = p.yy.ResetBuffer("")`
			`tree = p.processRawBlocks(tree)`
			`f.FormatBlock(tree)`
mk_element: turn global allocation into a per-parser, reusable allocation. This reduces memory usage within a single document, because elements allocated during parsing of one Docblock can be reused during parsing of the next Docblock. 2012-04-29 21:54:10 +00:00			`p.yy.state.heap.setPos(savedPos)`
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`}`
			`f.Finish()`
initial import 2010-11-21 22:04:39 +00:00			`}`

new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`func (p Parser) parseRule(rule int, s string) (tree element) {`
markdown:parseRule: fix extra `if', don't trim '\t' 2013-01-08 20:28:57 +00:00			`old := p.yy.ResetBuffer(s)`
			`if old != "" && strings.Trim(old, "\r\n ") != "" {`
			`log.Fatalln("Buffer not empty", "["+old+"]")`
use a single yyParser instance only, and reuse initialized parser 2010-11-24 18:36:10 +00:00			`}`
parseRule: treat the occurrence of a parse error as a valid condition The previous implementation was too strict. A parse error can occur if the input is a single line consisting of spaces and tabs only. 2012-05-29 23:29:37 +00:00			`err := p.yy.Parse(rule)`
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`switch rule {`
			`case ruleDoc, ruleDocblock:`
parseRule: treat the occurrence of a parse error as a valid condition The previous implementation was too strict. A parse error can occur if the input is a single line consisting of spaces and tabs only. 2012-05-29 23:29:37 +00:00			`if err == nil {`
			`tree = p.yy.state.tree`
			`}`
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`p.yy.state.tree = nil`
			`}`
			`return`
initial import 2010-11-21 22:04:39 +00:00			`}`

			`/* process_raw_blocks - traverses an element list, replacing any RAW elements with`
			`* the result of parsing them as markdown text, and recursing into the children`
			`* of parent elements. The result should be a tree of elements without any RAWs.`
			`*/`
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`func (p Parser) processRawBlocks(input element) *element {`
initial import 2010-11-21 22:04:39 +00:00
			`for current := input; current != nil; current = current.next {`
			`if current.key == RAW {`
			`/* \001 is used to indicate boundaries between nested lists when there`
			`* is no blank line. We split the string by \001 and parse`
			`* each chunk separately.`
			`*/`
			`current.key = LIST`
			`current.children = nil`
processRawBlocks: simplify appending of lists 2010-11-24 17:59:08 +00:00			`listEnd := &current.children`
markdown.go: adjust strings.Split call 2011-06-30 18:52:13 +00:00			`for _, contents := range strings.Split(current.contents.str, "\001") {`
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`if list := p.parseRule(ruleDoc, contents); list != nil {`
processRawBlocks: cope with empty lists 2010-11-25 13:33:36 +00:00			`*listEnd = list`
			`for list.next != nil {`
			`list = list.next`
			`}`
			`listEnd = &list.next`
initial import 2010-11-21 22:04:39 +00:00			`}`
			`}`
			`current.contents.str = ""`
			`}`
			`if current.children != nil {`
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`current.children = p.processRawBlocks(current.children)`
initial import 2010-11-21 22:04:39 +00:00			`}`
			`}`
			`return input`
			`}`

			`const (`
			`TABSTOP = 4`
			`)`

			`/* preformat - allocate and copy text buffer while`
			`* performing tab expansion.`
			`*/`
new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`func (p *Parser) preformat(r io.Reader) (s string) {`
initial import 2010-11-21 22:04:39 +00:00			`charstotab := TABSTOP`
Parse: take an io.Reader argument instead of a string 2012-04-20 23:19:46 +00:00			`buf := make([]byte, 32768)`

new API: p := NewParser(&opts), p.Markdown(r, f) replaces d := Parse(r, opts), d.WriteHtml(w) The new api allows to create a parser instance that can be reused between calls to Markdown. Also, an interface `Formatter' replaces the tight coupling between former Doc type and HTML generation. 2012-04-28 20:57:55 +00:00			`b := p.preformatBuf`
			`b.Reset()`
Parse: take an io.Reader argument instead of a string 2012-04-20 23:19:46 +00:00			`for {`
			`n, err := r.Read(buf)`
			`if err != nil {`
			`break`
initial import 2010-11-21 22:04:39 +00:00			`}`
Parse: take an io.Reader argument instead of a string 2012-04-20 23:19:46 +00:00			`i0 := 0`
cosmetics 2012-04-29 22:58:06 +00:00			`for i, c := range buf[:n] {`
			`switch c {`
Parse: take an io.Reader argument instead of a string 2012-04-20 23:19:46 +00:00			`case '\t':`
			`b.Write(buf[i0:i])`
			`for ; charstotab > 0; charstotab-- {`
			`b.WriteByte(' ')`
			`}`
			`i0 = i + 1`
			`case '\n':`
			`b.Write(buf[i0 : i+1])`
			`i0 = i + 1`
			`charstotab = TABSTOP`
			`default:`
			`charstotab--`
			`}`
			`if charstotab == 0 {`
			`charstotab = TABSTOP`
			`}`
initial import 2010-11-21 22:04:39 +00:00			`}`
Parse: take an io.Reader argument instead of a string 2012-04-20 23:19:46 +00:00			`b.Write(buf[i0:n])`
initial import 2010-11-21 22:04:39 +00:00			`}`
Parse: take an io.Reader argument instead of a string 2012-04-20 23:19:46 +00:00
preformat: one \n is not enough. Fix parsing of input containing \r\n line endings. 2012-04-29 21:47:53 +00:00			`b.WriteString("\n\n")`
preformat: use bytes.Buffer instead of concatenating strings 2010-11-24 17:54:12 +00:00			`return b.String()`
initial import 2010-11-21 22:04:39 +00:00			`}`