From 7a083c5daec58164ab95316a53013dd2521225cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Teichgr=C3=A4ber?= Date: Sun, 21 Nov 2010 23:04:39 +0100 Subject: [PATCH] initial import --- .hgignore | 14 + LICENSE | 78 ++++ Makefile | 69 ++++ README.peg-markdown | 214 ++++++++++ cmd/Makefile | 15 + cmd/main.go | 41 ++ doc.go | 25 ++ markdown.go | 138 +++++++ misc/c2go.sed | 37 ++ misc/devel.mk | 16 + misc/gofmt.rc | 51 +++ output.go | 229 +++++++++++ parser.leg | 952 ++++++++++++++++++++++++++++++++++++++++++++ 13 files changed, 1879 insertions(+) create mode 100644 .hgignore create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README.peg-markdown create mode 100644 cmd/Makefile create mode 100644 cmd/main.go create mode 100644 doc.go create mode 100644 markdown.go create mode 100644 misc/c2go.sed create mode 100644 misc/devel.mk create mode 100644 misc/gofmt.rc create mode 100644 output.go create mode 100644 parser.leg diff --git a/.hgignore b/.hgignore new file mode 100644 index 0000000..5894256 --- /dev/null +++ b/.hgignore @@ -0,0 +1,14 @@ +# use glob syntax. +syntax: glob + +parser.leg.go +cmd/markdown +orig-c-src +core ++* +*.orig +[58].* +*.[58] +,* +*~ +[#]* diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..15f5262 --- /dev/null +++ b/LICENSE @@ -0,0 +1,78 @@ +markdown in Go, implemented using PEG grammar + +Copyright (c) 2010 Michael Teichgräber + +This is a translation of peg-markdown, written +by John MacFarlane, into Go: + +Copyright (c) 2008 John MacFarlane + +peg-markdown is released under both the GPL and MIT licenses. +You may pick the license that best fits your needs. + +The GPL + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +The MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + +peg (http://github.com/pointlander/peg), + based on http://piumarta.com/software/peg/, +written by Andrew J Snodgrass. + +Modifications to support LE grammars by Michael Teichgräber + +Copyright (c) 2010, Go Authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + * Neither the name of the Go Authors nor the names of its contributors may be used to + endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..522e616 --- /dev/null +++ b/Makefile @@ -0,0 +1,69 @@ +include $(GOROOT)/src/Make.inc + +TARG=markdown +GOFILES=\ + markdown.go\ + output.go\ + parser.leg.go\ + +package: + +include $(GOROOT)/src/Make.pkg + +all: cmd + +# +# mdtest runs MarkdownTests-1.0.3 that come with original C sources +# +mdtest: package cmd orig-c-src + make -C cmd test + +cmd: package + make -C cmd + + +CLEANFILES=\ + parser.leg.go\ + _obj\ + ,,c\ + ,,fmt\ + +distclean: clean clean-sub + rm -rf orig-c-src + +clean-sub: + for dir in cmd peg peg/leg; do make -C $$dir clean; done + + +# +# LEG parser generator stuff +# +LEG = ./peg/leg/leg +%.leg.go: %.leg $(LEG) + $(LEG) $< + +$(LEG): + make -C peg all + make -C peg/leg all + +peg: + + +# +# access to original C source files +# +VCS = git +# also, if hggit extension is available: +# VCS = hg + +orig-c-src: + $(VCS) clone git://github.com/jgm/peg-markdown.git $@ + + + +include misc/devel.mk + +.PHONY: \ + cmd\ + distclean\ + mdtest\ diff --git a/README.peg-markdown b/README.peg-markdown new file mode 100644 index 0000000..d61cdc3 --- /dev/null +++ b/README.peg-markdown @@ -0,0 +1,214 @@ +What is this? +============= + +This is an implementation of John Gruber's [markdown][] in C. It uses a +[parsing expression grammar (PEG)][] to define the syntax. This should +allow easy modification and extension. It currently supports output in +HTML, LaTeX, or groff_mm formats, and adding new formats is relatively +easy. + +[parsing expression grammar (PEG)]: http://en.wikipedia.org/wiki/Parsing_expression_grammar +[markdown]: http://daringfireball.net/projects/markdown/ + +It is pretty fast. A 179K text file that takes 5.7 seconds for +Markdown.pl (v. 1.0.1) to parse takes less than 0.2 seconds for this +markdown. It does, however, use a lot of memory (up to 4M of heap space +while parsing the 179K file, and up to 80K for a 4K file). (Note that +the memory leaks in earlier versions of this program have now been +plugged.) + +Both a library and a standalone program are provided. + +peg-markdown is written and maintained by John MacFarlane (jgm on +github), with significant contributions by Ryan Tomayko (rtomayko). +It is released under both the GPL and the MIT license; see LICENSE for +details. + +Installing +========== + +On a linux or unix-based system +------------------------------- + +This program is written in portable ANSI C. It requires +[glib2](http://www.gtk.org/download.html). Most *nix systems will have +this installed already. The build system requires GNU make. + +The other required dependency, [Ian Piumarta's peg/leg PEG parser +generator](http://piumarta.com/software/peg/), is included in the source +directory. It will be built automatically. (However, it is not as portable +as peg-markdown itself, and seems to require gcc.) + +To make the 'markdown' executable: + + make + +(Or, on some systems, `gmake`.) Then, for usage instructions: + + ./markdown --help + +To run John Gruber's Markdown 1.0.3 test suite: + + make test + +The test suite will fail on one of the list tests. Here's why. +Markdown.pl encloses "item one" in the following list in `

` tags: + + 1. item one + * subitem + * subitem + + 2. item two + + 3. item three + +peg-markdown does not enclose "item one" in `

` tags unless it has a +following blank line. This is consistent with the official markdown +syntax description, and lets the author of the document choose whether +`

` tags are desired. + +Cross-compiling for Windows with MinGW on a linux box +----------------------------------------------------- + +Prerequisites: + +* Linux system with MinGW cross compiler For Ubuntu: + + sudo apt-get install mingw32 + +* [Windows glib-2.0 binary & development files](http://www.gtk.org/download-windows.html). + Unzip files into cross-compiler directory tree (e.g., `/usr/i586-mingw32msvc`). + +Steps: + +1. Create the markdown parser using Linux-compiled `leg` from peg-0.1.4: + + ./peg-0.1.4/leg markdown_parser.leg >markdown_parser.c + + (Note: The same thing could be accomplished by cross-compiling leg, + executing it on Windows, and copying the resulting C file to the Linux + cross-compiler host.) + +2. Run the cross compiler with include flag for the Windows glib-2.0 headers: + for example, + + /usr/bin/i586-mingw32msvc-cc -c \ + -I/usr/i586-mingw32msvc/include/glib-2.0 \ + -I/usr/i586-mingw32msvc/lib/glib-2.0/include -Wall -O3 -ansi markdown*.c + +3. Link against Windows glib-2.0 headers: for example, + + /usr/bin/i586-mingw32msvc-cc markdown*.o \ + -Wl,-L/usr/i586-mingw32msvc/lib/glib,--dy,--warn-unresolved-symbols,-lglib-2.0 \ + -o markdown.exe + +The resulting executable depends on the glib dll file, so be sure to +load the glib binary on the Windows host. + +Compiling with MinGW on Windows +------------------------------- + +These directions assume that MinGW is installed in `c:\MinGW` and glib-2.0 +is installed in the MinGW directory hierarchy (with the mingw bin directory +in the system path). + +Unzip peg-markdown in a temp directory. From the directory with the +peg-markdown source, execute: + + cd peg-0.1.4 + for %i in (*.c) do @gcc -g -Wall -O3 -DNDEBUG -c -o %~ni.o %i + gcc -o leg.exe leg.o tree.o compile.o + cd .. + peg-0.1.4\leg.exe markdown_parser.leg >markdown_parser.c + @for %i in (markdown*.c) do @gcc -mms-bitfields -Ic:/MinGW/include/glib-2.0 -Ic:/MinGW/lib/glib-2.0/include -c -o %~ni.o %i + gcc -O3 -Lc:/MinGW/lib/glib-2.0 -lglib-2.0 -lintl markdown.o markdown_lib.o markdown_output.o markdown_parser.o -o markdown.exe -Wl,--dy,--warn-unresolved-symbols,-lglib-2.0,-Lc:/MinGW/lib/glib-2.0,-lglib-2.0,-lintl + +(Windows instructions courtesy of Matt Wolf.) + +Extensions +========== + +peg-markdown supports extensions to standard markdown syntax. +These can be turned on using the command line flag `-x` or +`--extensions`. `-x` by itself turns on all extensions. Extensions +can also be turned on selectively, using individual command-line +options. To see the available extensions: + + ./markdown --help-extensions + +The `--smart` extension provides "smart quotes", dashes, and ellipses. + +The `--notes` extension provides a footnote syntax like that of +Pandoc or PHP Markdown Extra. + +Using the library +================= + +The library exports two functions: + + GString * markdown_to_g_string(char *text, int extensions, int output_format); + char * markdown_to_string(char *text, int extensions, int output_format); + +The only difference between these is that `markdown_to_g_string` returns a +`GString` (glib's automatically resizable string), while `markdown_to_string` +returns a regular character pointer. The memory allocated for these must be +freed by the calling program, using `g_string_free()` or `free()`. + +`text` is the markdown-formatted text to be converted. Note that tabs will +be converted to spaces, using a four-space tab stop. Character encodings are +ignored. + +`extensions` is a bit-field specifying which syntax extensions should be used. +If `extensions` is 0, no extensions will be used. If it is `0xFFFFFF`, +all extensions will be used. To set extensions selectively, use the +bitwise `&` operator and the following constants: + + - `EXT_SMART` turns on smart quotes, dashes, and ellipses. + - `EXT_NOTES` turns on footnote syntax. [Pandoc's footnote syntax][] is used here. + - `EXT_FILTER_HTML` filters out raw HTML (except for styles). + - `EXT_FILTER_STYLES` filters out styles in HTML. + + [Pandoc's footnote syntax]: http://johnmacfarlane.net/pandoc/README.html#footnotes + +`output_format` is either `HTML_FORMAT`, `LATEX_FORMAT`, or `GROFF_MM_FORMAT`. + +To use the library, include `markdown_lib.h`. See `markdown.c` for an example. + +Hacking +======= + +It should be pretty easy to modify the program to produce other formats +than HTML or LaTeX, and to parse syntax extensions. A quick guide: + + * `markdown_parser.leg` contains the grammar itself. + + * `markdown_output.c` contains functions for printing the `Element` + structure in various output formats. + + * To add an output format, add the format to `markdown_formats` in + `markdown_lib.h`. Then modify `print_element` in `markdown_output.c`, + and add functions `print_XXXX_string`, `print_XXXX_element`, and + `print_XXXX_element_list`. Also add an option in the main program + that selects the new format. Don't forget to add it to the list of + formats in the usage message. + + * To add syntax extensions, define them in the PEG grammar + (`markdown_parser.leg`), using existing extensions as a guide. New + inline elements will need to be added to `Inline =`; new block + elements will need to be added to `Block =`. (Note: the order + of the alternatives does matter in PEG grammars.) + + * If you need to add new types of elements, modify the `keys` + enum in `markdown_peg.h`. + + * By using `&{ }` rules one can selectively disable extensions + depending on command-line options. For example, + `&{ extension(EXT_SMART) }` succeeds only if the `EXT_SMART` bit + of the global `syntax_extensions` is set. Add your option to + `markdown_extensions` in `markdown_lib.h`, and add an option in + `markdown.c` to turn on your extension. + + * Note: Avoid using `[^abc]` character classes in the grammar, because + they cause problems with non-ascii input. Instead, use: `( !'a' !'b' + !'c' . )` + diff --git a/cmd/Makefile b/cmd/Makefile new file mode 100644 index 0000000..02f1a41 --- /dev/null +++ b/cmd/Makefile @@ -0,0 +1,15 @@ +include $(GOROOT)/src/Make.inc + +TARG=markdown +GOFILES=\ + main.go\ + +R = .. +PREREQ += $(R)/_obj/markdown.a + +include $(GOROOT)/src/Make.cmd + + +test: $(TARG) + cd $(R)/orig-c-src/MarkdownTest_1.0.3; \ + ./MarkdownTest.pl --script=../../cmd/$< --tidy diff --git a/cmd/main.go b/cmd/main.go new file mode 100644 index 0000000..7587441 --- /dev/null +++ b/cmd/main.go @@ -0,0 +1,41 @@ +package main + +import ( + md "../_obj/markdown" + "flag" + "fmt" + "os" + "bufio" + "io/ioutil" +) + +func main() { + var b []byte + + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: %s [FILE]\n", os.Args[0]) + flag.PrintDefaults() + } + optNotes := flag.Bool("notes", false, "turn on footnote syntax") + optSmart := flag.Bool("smart", false, "turn on smart quotes, dashes, and ellipses") + flag.Parse() + + if flag.NArg() > 0 { + b, _ = ioutil.ReadFile(flag.Arg(0)) + } else { + b, _ = ioutil.ReadAll(os.Stdin) + } + + e := 0 + if *optNotes { + e |= md.EXT_NOTES + } + if *optSmart { + e |= md.EXT_SMART + } + + doc := md.Parse(string(b), e) + w := bufio.NewWriter(os.Stdout) + doc.WriteHtml(w) + w.Flush() +} diff --git a/doc.go b/doc.go new file mode 100644 index 0000000..bd24556 --- /dev/null +++ b/doc.go @@ -0,0 +1,25 @@ +/* +A translation of peg-markdown [1] into Go. + +Usage example: + + import ( + md "markdown" + "os" + "io/ioutil" + "bufio" + ) + + func main() { + b, _ := ioutil.ReadAll(os.Stdin) + + doc := md.Parse(string(b), md.EXT_SMART) + + w := bufio.NewWriter(os.Stdout) + doc.WriteHtml(w) + w.Flush() + } + +[1]: https://github.com/jgm/peg-markdown/ +*/ +package markdown diff --git a/markdown.go b/markdown.go new file mode 100644 index 0000000..7027980 --- /dev/null +++ b/markdown.go @@ -0,0 +1,138 @@ +/* Original C version https://github.com/jgm/peg-markdown/ + * Copyright 2008 John MacFarlane (jgm at berkeley dot edu). + * + * Modifications and translation from C into Go + * based on markdown_lib.c and parsing_functions.c + * Copyright 2010 Michael Teichgräber (mt at wmipf dot de) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License or the MIT + * license. See LICENSE for details. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +package markdown + +// implements Parse() + +import ( + "strings" +) + +// Markdown Extensions: +const ( + EXT_SMART = 1 << iota + EXT_NOTES + EXT_FILTER_HTML + EXT_FILTER_STYLES +) + +// Parse converts a Markdown document into a tree for later output processing. +func Parse(text string, extFlags int) *Doc { + d := new(Doc) + d.syntaxExtensions = extFlags + + s := preformat(text) + + d.parseRule(ruleReferences, s) + if extFlags&EXT_NOTES != 0 { + d.parseRule(ruleNotes, s) + } + raw := d.parseMarkdown(s) + d.tree = d.processRawBlocks(raw) + return d +} + +func (d *Doc) parseRule(rule int, s string) { + m := new(yyParser) + m.Doc = d + m.Init() + m.Buffer = s + if !m.Parse(rule) { + m.PrintError() + } +} + +func (d *Doc) parseMarkdown(text string) *element { + m := new(yyParser) + m.Doc = d + m.Init() + m.Buffer = text + m.Parse(ruleDoc) + return d.tree +} + + +/* process_raw_blocks - traverses an element list, replacing any RAW elements with + * the result of parsing them as markdown text, and recursing into the children + * of parent elements. The result should be a tree of elements without any RAWs. + */ +func (d *Doc) processRawBlocks(input *element) *element { + var last_child *element + + for current := input; current != nil; current = current.next { + if current.key == RAW { + /* \001 is used to indicate boundaries between nested lists when there + * is no blank line. We split the string by \001 and parse + * each chunk separately. + */ + current.key = LIST + current.children = nil + for _, contents := range strings.Split(current.contents.str, "\001", -1) { + list := d.parseMarkdown(contents) + if current.children == nil { + current.children = list + last_child = list + } else { + last_child.next = list + } + for last_child.next != nil { + last_child = last_child.next + } + } + current.contents.str = "" + } + if current.children != nil { + current.children = d.processRawBlocks(current.children) + } + } + return input +} + + +const ( + TABSTOP = 4 +) + +/* preformat - allocate and copy text buffer while + * performing tab expansion. + */ +func preformat(text string) (s string) { + charstotab := TABSTOP + i0 := 0 + + for i, _ := range text { + switch text[i] { + case '\t': + s += text[i0:i] + for ; charstotab > 0; charstotab-- { + s += " " + } + i0 = i + 1 + case '\n': + s += text[i0 : i+1] + i0 = i + 1 + charstotab = TABSTOP + default: + charstotab-- + } + if charstotab == 0 { + charstotab = TABSTOP + } + } + return s + text[i0:] + "\n\n" +} diff --git a/misc/c2go.sed b/misc/c2go.sed new file mode 100644 index 0000000..b4ea994 --- /dev/null +++ b/misc/c2go.sed @@ -0,0 +1,37 @@ +# this sed script replaces some bits of the original leg file +# to make it more similar to the Go version, thus avoiding +# to many differences + +/\$\$/ { + s,\$\$->,$$.,g + /\$\$[^}]*$/s,\; *$,,g +} + +s,parse_result,p.tree, +s,references,p.references, +s,notes,p.notes, +s,find_reference,p.findReference,g + +s,->key,.key,g +s,->children,.children,g +s,->contents.str,.contents.str,g + +/{ *if (extens/ { + s,if (,if , + s,)),), +} +/EXT/ s,if extension,if p.extension, +/EXT/ s,{ *extension,{ p.extension,g +/EXT/ s,{ *!extension,{ !p.extension,g + +s,{ *element \*[a-z]*\; *$,{, + +/raw\.key =/ s,;$,, +/result =/ s,;$,, +s,result = mk_el,result := mk_el, + +s,NULL,nil,g + +s, *\; *}, },g + +s,strlen(,len(,g diff --git a/misc/devel.mk b/misc/devel.mk new file mode 100644 index 0000000..770643c --- /dev/null +++ b/misc/devel.mk @@ -0,0 +1,16 @@ +# +# development utilities +# +gofmt: + rc ./misc/gofmt.rc + +diff: ,,c + tkdiff $< parser.leg + +,,c: orig-c-src/markdown_parser.leg + sed -f misc/c2go.sed < $< > $@ + +orig-c-src/markdown_parser.leg: orig-c-src + + +.PHONY: diff gofmt diff --git a/misc/gofmt.rc b/misc/gofmt.rc new file mode 100644 index 0000000..e735b8b --- /dev/null +++ b/misc/gofmt.rc @@ -0,0 +1,51 @@ +fmtopts=('-spaces=false' '-tabwidth=4') + +fn f{ + gofmt $fmtopts -s ../$1 > $1 + diff $1 +} + +fn diff{ + if (! cmp -s ../$1 $1) { + echo tkdiff $1 ,,fmt/$1 + } +} + +if(! test -d ,,fmt) + mkdir ,,fmt +cd ,,fmt + + +# split parser.leg into Go and leg parts, gofmt the Go parts, +# and combine pieces to form a parser.leg again +p=../parser.leg +<$p >,,leg awk ' + /^%%/ { copy=0 } + copy { print } + /^%}/ { copy=1 } +' + +<$p >,,p.go awk ' + /^%%/ { copy=1; $0 = "//" $0 } + /^%}/ { copy=0 } + copy { print } + /^%{/ { copy=1 } +' +gofmt $fmtopts -w -s ,,p.go + +>parser.leg { + echo '%{' + cat ,,p.go | sed '/^\/\/%%/,$d' + echo '%}' + cat ,,leg + echo %% + cat ,,p.go | sed '1,/^\/\/%%/d' +} + + +f doc.go +f markdown.go +f output.go +diff parser.leg + +~ 0 0 diff --git a/output.go b/output.go new file mode 100644 index 0000000..e83142f --- /dev/null +++ b/output.go @@ -0,0 +1,229 @@ +/* Original C version https://github.com/jgm/peg-markdown/ + * Copyright 2008 John MacFarlane (jgm at berkeley dot edu). + * + * Modifications and translation from C into Go + * based on markdown_output.c + * Copyright 2010 Michael Teichgräber (mt at wmipf dot de) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License or the MIT + * license. See LICENSE for details. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +package markdown + +// HTML output functions + +import ( + "os" + "fmt" + "log" + "rand" + "strings" +) + +type Writer interface { + WriteString(string) (int, os.Error) + WriteRune(int) (int, os.Error) + WriteByte(byte) os.Error +} + +type htmlOut struct { + Writer + padded int + + notenum int + endNotes []*element /* List of endnotes to print after main content. */ +} + +// WriteHtml prints a document tree in HTML format using the specified Writer. +// +func (d *Doc) WriteHtml(w Writer) int { + out := new(htmlOut) + out.Writer = w + + out.padded = 2 + out.elist(d.tree, false) + out.pad(2) + out.printEndnotes() + + return 0 +} + +// pad - add newlines if needed +func (h *htmlOut) pad(n int) *htmlOut { + for ; n > h.padded; n-- { + h.WriteByte('\n') + } + h.padded = n + return h +} + +func (h *htmlOut) pset(n int) *htmlOut { + h.padded = n + return h +} + +// print a string +func (w *htmlOut) s(s string) *htmlOut { + w.WriteString(s) + return w +} + + +/* print string, escaping for HTML + * If obfuscate selected, convert characters to hex or decimal entities at random + */ +func (w *htmlOut) str(hs string, obfuscate bool) *htmlOut { + for _, r := range hs { + switch r { + case '&': + w.s("&") + case '<': + w.s("<") + case '>': + w.s(">") + case '"': + w.s(""") + default: + if obfuscate { + if rand.Intn(1) == 0 { + w.s(fmt.Sprintf("&#%d;", r)) + } else { + w.s(fmt.Sprintf("&#%x;", r)) + } + } else { + w.WriteRune(r) + } + } + } + return w +} + +/* print a list of elements + */ +func (w *htmlOut) elist(list *element, obfuscate bool) *htmlOut { + for list != nil { + w.elem(list, obfuscate) + list = list.next + } + return w +} + +// print an element +func (w *htmlOut) elem(elt *element, obfuscate bool) *htmlOut { + var s string + + switch elt.key { + case SPACE: + s = elt.contents.str + case LINEBREAK: + s = "
\n" + case STR: + w.str(elt.contents.str, obfuscate) + case ELLIPSIS: + s = "…" + case EMDASH: + s = "—" + case ENDASH: + s = "–" + case APOSTROPHE: + s = "’" + case SINGLEQUOTED: + w.s("‘").elist(elt.children, obfuscate).s("’") + case DOUBLEQUOTED: + w.s("“").elist(elt.children, obfuscate).s("”") + case CODE: + w.s("").str(elt.contents.str, obfuscate).s("") + case HTML: + s = elt.contents.str + case LINK: + if strings.Index(elt.contents.link.url, "mailto:") == 0 { + obfuscate = true /* obfuscate mailto: links */ + } + w.s(` 0 { + w.s(` title="`).str(elt.contents.link.title, obfuscate).s(`"`) + } + w.s(">").elist(elt.contents.link.label, obfuscate).s("") + case IMAGE: + w.s(``)
+		w.elist(elt.contents.link.label, obfuscate).s(` 0 { + w.s(` title="`).str(elt.contents.link.title, obfuscate).s(`"`) + } + w.s(" />") + case EMPH: + w.s("").elist(elt.children, obfuscate).s("") + case STRONG: + w.s("").elist(elt.children, obfuscate).s("") + case LIST: + w.elist(elt.children, obfuscate) + case RAW: + /* Shouldn't occur - these are handled by process_raw_blocks() */ + log.Exitf("RAW") + case H1, H2, H3, H4, H5, H6: + h := fmt.Sprintf("h%1d>", elt.key-H1+1) /* assumes H1 ... H6 are in order */ + w.pad(2).s("<").s(h).elist(elt.children, obfuscate).s("").elist(elt.children, obfuscate).s("

").pset(0) + case HRULE: + w.pad(2).s("
").pset(0) + case HTMLBLOCK: + w.pad(2).s(elt.contents.str).pset(0) + case VERBATIM: + w.pad(2).s("
").str(elt.contents.str, obfuscate).s("
").pset(0) + case BULLETLIST: + w.pad(2).s("").pset(0) + case ORDEREDLIST: + w.pad(2).s("
    ").pset(0).elist(elt.children, obfuscate).pad(1).s("
").pset(0) + case LISTITEM: + w.pad(1).s("
  • ").pset(2).elist(elt.children, obfuscate).s("
  • ").pset(0) + case BLOCKQUOTE: + w.pad(2).s("
    \n").pset(2).elist(elt.children, obfuscate).pad(1).s("
    ").pset(0) + case REFERENCE: + /* Nonprinting */ + case NOTE: + /* if contents.str == 0, then print note; else ignore, since this + * is a note block that has been incorporated into the notes list + */ + if elt.contents.str == "" { + w.endNotes = append(w.endNotes, elt) /* add an endnote to global endnotes list */ + w.notenum++ + nn := w.notenum + s = fmt.Sprintf(`[%d]`, + nn, nn, nn, nn) + } + default: + log.Exitf("htmlOut.elem encountered unknown element key = %d\n", elt.key) + } + if s != "" { + w.s(s) + } + return w +} + + +func (w *htmlOut) printEndnotes() { + counter := 0 + + if len(w.endNotes) == 0 { + return + } + w.s("
    \n
      ") + for _, elt := range w.endNotes { + counter++ + w.pad(1).s(fmt.Sprintf("
    1. \n", counter)).pset(2) + w.elist(elt.children, false) + w.s(fmt.Sprintf(" [back]", counter)) + w.pad(1).s("
    2. ") + } + w.pad(1).s("
    ") +} diff --git a/parser.leg b/parser.leg new file mode 100644 index 0000000..3ea586f --- /dev/null +++ b/parser.leg @@ -0,0 +1,952 @@ +%{ +/* Original C version https://github.com/jgm/peg-markdown/ + * Copyright 2008 John MacFarlane (jgm at berkeley dot edu). + * + * Modifications and translation from C into Go + * based on markdown_parser.leg and utility_functions.c + * Copyright 2010 Michael Teichgräber (mt at wmipf dot de) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License or the MIT + * license. See LICENSE for details. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +package markdown + +// PEG grammar and parser actions for markdown syntax. + +import ( + "fmt" + "strings" + "log" +) + +// Semantic value of a parsing action. +type element struct { + key int + contents + children *element + next *element +} + +// Information (label, URL and title) for a link. +type link struct { + label *element + url string + title string +} + +// Union for contents of an Element (string, list, or link). +type contents struct { + str string + *link +} + +// Types of semantic values returned by parsers. +const ( + LIST = iota /* A generic list of values. For ordered and bullet lists, see below. */ + RAW /* Raw markdown to be processed further */ + SPACE + LINEBREAK + ELLIPSIS + EMDASH + ENDASH + APOSTROPHE + SINGLEQUOTED + DOUBLEQUOTED + STR + LINK + IMAGE + CODE + HTML + EMPH + STRONG + PLAIN + PARA + LISTITEM + BULLETLIST + ORDEREDLIST + H1 /* Code assumes that H1..6 are in order. */ + H2 + H3 + H4 + H5 + H6 + BLOCKQUOTE + VERBATIM + HTMLBLOCK + HRULE + REFERENCE + NOTE + numVAL +) + +type Doc struct { + tree *element /* Results of parse. */ + references *element /* List of link references found. */ + notes *element /* List of footnotes found. */ + syntaxExtensions int /* Syntax extensions selected. */ +} + +%} + +%userstate *Doc + +%YYSTYPE *element + + +Doc = a:StartList ( Block { a = cons($$, a) } )* + { p.tree = reverse(a) } + commit + +Block = BlankLine* + ( BlockQuote + | Verbatim + | Note + | Reference + | HorizontalRule + | Heading + | OrderedList + | BulletList + | HtmlBlock + | StyleBlock + | Para + | Plain ) + +Para = NonindentSpace a:Inlines BlankLine+ + { $$ = a; $$.key = PARA } + +Plain = a:Inlines + { $$ = a; $$.key = PLAIN } + +AtxInline = !Newline !(Sp '#'* Sp Newline) Inline + +AtxStart = < ( "######" | "#####" | "####" | "###" | "##" | "#" ) > + { $$ = mk_element(H1 + (len(yytext) - 1)) } + +AtxHeading = s:AtxStart Sp a:StartList ( AtxInline { a = cons($$, a) } )+ (Sp '#'* Sp)? Newline + { $$ = mk_list(s.key, a) + s = nil } + +SetextHeading = SetextHeading1 | SetextHeading2 + +SetextHeading1 = a:StartList ( !Endline Inline { a = cons($$, a) } )+ Newline "===" '='* Newline + { $$ = mk_list(H1, a) } + +SetextHeading2 = a:StartList ( !Endline Inline { a = cons($$, a) } )+ Newline "---" '-'* Newline + { $$ = mk_list(H2, a) } + +Heading = AtxHeading | SetextHeading + +BlockQuote = a:BlockQuoteRaw + { $$ = mk_element(BLOCKQUOTE) + $$.children = a + } + +BlockQuoteRaw = a:StartList + (( '>' ' '? Line { a = cons($$, a) } ) + ( !'>' !BlankLine Line { a = cons($$, a) } )* + ( BlankLine { a = cons(mk_str("\n"), a) } )* + )+ + { $$ = mk_str_from_list(a, true) + $$.key = RAW + } + +NonblankIndentedLine = !BlankLine IndentedLine + +VerbatimChunk = a:StartList + ( BlankLine { a = cons(mk_str("\n"), a) } )* + ( NonblankIndentedLine { a = cons($$, a) } )+ + { $$ = mk_str_from_list(a, false) } + +Verbatim = a:StartList ( VerbatimChunk { a = cons($$, a) } )+ + { $$ = mk_str_from_list(a, false) + $$.key = VERBATIM } + +HorizontalRule = NonindentSpace + ( '*' Sp '*' Sp '*' (Sp '*')* + | '-' Sp '-' Sp '-' (Sp '-')* + | '_' Sp '_' Sp '_' (Sp '_')*) + Sp Newline BlankLine+ + { $$ = mk_element(HRULE) } + +Bullet = !HorizontalRule NonindentSpace ('+' | '*' | '-') Spacechar+ + +BulletList = &Bullet (ListTight | ListLoose) + { $$.key = BULLETLIST } + +ListTight = a:StartList + ( ListItem { a = cons($$, a) } )+ + BlankLine* !(Bullet | Enumerator) + { $$ = mk_list(LIST, a) } + +ListLoose = a:StartList + ( b:ListItem BlankLine* + { + li := b.children + li.contents.str += "\n\n" + a = cons(b, a) + } )+ + { $$ = mk_list(LIST, a) } + +ListItem = ( Bullet | Enumerator ) + a:StartList + ListBlock { a = cons($$, a) } + ( ListContinuationBlock { a = cons($$, a) } )* + { + raw := mk_str_from_list(a, false) + raw.key = RAW + $$ = mk_element(LISTITEM) + $$.children = raw + } + +ListBlock = a:StartList + Line { a = cons($$, a) } + ( ListBlockLine { a = cons($$, a) } )* + { $$ = mk_str_from_list(a, false) } + +ListContinuationBlock = a:StartList + ( < BlankLine* > + { if len(yytext) == 0 { + a = cons(mk_str("\001"), a) // block separator + } else { + a = cons(mk_str(yytext), a) + } + } ) + ( Indent ListBlock { a = cons($$, a) } )+ + { $$ = mk_str_from_list(a, false) } + +Enumerator = NonindentSpace [0-9]+ '.' Spacechar+ + +OrderedList = &Enumerator (ListTight | ListLoose) + { $$.key = ORDEREDLIST } + +ListBlockLine = !( Indent? (Bullet | Enumerator) ) + !BlankLine + !HorizontalRule + OptionallyIndentedLine + +# Parsers for different kinds of block-level HTML content. +# This is repetitive due to constraints of PEG grammar. + +HtmlBlockOpenAddress = '<' Spnl ("address" | "ADDRESS") Spnl HtmlAttribute* '>' +HtmlBlockCloseAddress = '<' Spnl '/' ("address" | "ADDRESS") Spnl '>' + +HtmlBlockOpenBlockquote = '<' Spnl ("blockquote" | "BLOCKQUOTE") Spnl HtmlAttribute* '>' +HtmlBlockCloseBlockquote = '<' Spnl '/' ("blockquote" | "BLOCKQUOTE") Spnl '>' + +HtmlBlockOpenCenter = '<' Spnl ("center" | "CENTER") Spnl HtmlAttribute* '>' +HtmlBlockCloseCenter = '<' Spnl '/' ("center" | "CENTER") Spnl '>' + +HtmlBlockOpenDir = '<' Spnl ("dir" | "DIR") Spnl HtmlAttribute* '>' +HtmlBlockCloseDir = '<' Spnl '/' ("dir" | "DIR") Spnl '>' + +HtmlBlockOpenDiv = '<' Spnl ("div" | "DIV") Spnl HtmlAttribute* '>' +HtmlBlockCloseDiv = '<' Spnl '/' ("div" | "DIV") Spnl '>' + +HtmlBlockOpenDl = '<' Spnl ("dl" | "DL") Spnl HtmlAttribute* '>' +HtmlBlockCloseDl = '<' Spnl '/' ("dl" | "DL") Spnl '>' + +HtmlBlockOpenFieldset = '<' Spnl ("fieldset" | "FIELDSET") Spnl HtmlAttribute* '>' +HtmlBlockCloseFieldset = '<' Spnl '/' ("fieldset" | "FIELDSET") Spnl '>' + +HtmlBlockOpenForm = '<' Spnl ("form" | "FORM") Spnl HtmlAttribute* '>' +HtmlBlockCloseForm = '<' Spnl '/' ("form" | "FORM") Spnl '>' + +HtmlBlockOpenH1 = '<' Spnl ("h1" | "H1") Spnl HtmlAttribute* '>' +HtmlBlockCloseH1 = '<' Spnl '/' ("h1" | "H1") Spnl '>' + +HtmlBlockOpenH2 = '<' Spnl ("h2" | "H2") Spnl HtmlAttribute* '>' +HtmlBlockCloseH2 = '<' Spnl '/' ("h2" | "H2") Spnl '>' + +HtmlBlockOpenH3 = '<' Spnl ("h3" | "H3") Spnl HtmlAttribute* '>' +HtmlBlockCloseH3 = '<' Spnl '/' ("h3" | "H3") Spnl '>' + +HtmlBlockOpenH4 = '<' Spnl ("h4" | "H4") Spnl HtmlAttribute* '>' +HtmlBlockCloseH4 = '<' Spnl '/' ("h4" | "H4") Spnl '>' + +HtmlBlockOpenH5 = '<' Spnl ("h5" | "H5") Spnl HtmlAttribute* '>' +HtmlBlockCloseH5 = '<' Spnl '/' ("h5" | "H5") Spnl '>' + +HtmlBlockOpenH6 = '<' Spnl ("h6" | "H6") Spnl HtmlAttribute* '>' +HtmlBlockCloseH6 = '<' Spnl '/' ("h6" | "H6") Spnl '>' + +HtmlBlockOpenMenu = '<' Spnl ("menu" | "MENU") Spnl HtmlAttribute* '>' +HtmlBlockCloseMenu = '<' Spnl '/' ("menu" | "MENU") Spnl '>' + +HtmlBlockOpenNoframes = '<' Spnl ("noframes" | "NOFRAMES") Spnl HtmlAttribute* '>' +HtmlBlockCloseNoframes = '<' Spnl '/' ("noframes" | "NOFRAMES") Spnl '>' + +HtmlBlockOpenNoscript = '<' Spnl ("noscript" | "NOSCRIPT") Spnl HtmlAttribute* '>' +HtmlBlockCloseNoscript = '<' Spnl '/' ("noscript" | "NOSCRIPT") Spnl '>' + +HtmlBlockOpenOl = '<' Spnl ("ol" | "OL") Spnl HtmlAttribute* '>' +HtmlBlockCloseOl = '<' Spnl '/' ("ol" | "OL") Spnl '>' + +HtmlBlockOpenP = '<' Spnl ("p" | "P") Spnl HtmlAttribute* '>' +HtmlBlockCloseP = '<' Spnl '/' ("p" | "P") Spnl '>' + +HtmlBlockOpenPre = '<' Spnl ("pre" | "PRE") Spnl HtmlAttribute* '>' +HtmlBlockClosePre = '<' Spnl '/' ("pre" | "PRE") Spnl '>' + +HtmlBlockOpenTable = '<' Spnl ("table" | "TABLE") Spnl HtmlAttribute* '>' +HtmlBlockCloseTable = '<' Spnl '/' ("table" | "TABLE") Spnl '>' + +HtmlBlockOpenUl = '<' Spnl ("ul" | "UL") Spnl HtmlAttribute* '>' +HtmlBlockCloseUl = '<' Spnl '/' ("ul" | "UL") Spnl '>' + +HtmlBlockOpenDd = '<' Spnl ("dd" | "DD") Spnl HtmlAttribute* '>' +HtmlBlockCloseDd = '<' Spnl '/' ("dd" | "DD") Spnl '>' + +HtmlBlockOpenDt = '<' Spnl ("dt" | "DT") Spnl HtmlAttribute* '>' +HtmlBlockCloseDt = '<' Spnl '/' ("dt" | "DT") Spnl '>' + +HtmlBlockOpenFrameset = '<' Spnl ("frameset" | "FRAMESET") Spnl HtmlAttribute* '>' +HtmlBlockCloseFrameset = '<' Spnl '/' ("frameset" | "FRAMESET") Spnl '>' + +HtmlBlockOpenLi = '<' Spnl ("li" | "LI") Spnl HtmlAttribute* '>' +HtmlBlockCloseLi = '<' Spnl '/' ("li" | "LI") Spnl '>' + +HtmlBlockOpenTbody = '<' Spnl ("tbody" | "TBODY") Spnl HtmlAttribute* '>' +HtmlBlockCloseTbody = '<' Spnl '/' ("tbody" | "TBODY") Spnl '>' + +HtmlBlockOpenTd = '<' Spnl ("td" | "TD") Spnl HtmlAttribute* '>' +HtmlBlockCloseTd = '<' Spnl '/' ("td" | "TD") Spnl '>' + +HtmlBlockOpenTfoot = '<' Spnl ("tfoot" | "TFOOT") Spnl HtmlAttribute* '>' +HtmlBlockCloseTfoot = '<' Spnl '/' ("tfoot" | "TFOOT") Spnl '>' + +HtmlBlockOpenTh = '<' Spnl ("th" | "TH") Spnl HtmlAttribute* '>' +HtmlBlockCloseTh = '<' Spnl '/' ("th" | "TH") Spnl '>' + +HtmlBlockOpenThead = '<' Spnl ("thead" | "THEAD") Spnl HtmlAttribute* '>' +HtmlBlockCloseThead = '<' Spnl '/' ("thead" | "THEAD") Spnl '>' + +HtmlBlockOpenTr = '<' Spnl ("tr" | "TR") Spnl HtmlAttribute* '>' +HtmlBlockCloseTr = '<' Spnl '/' ("tr" | "TR") Spnl '>' + +HtmlBlockOpenScript = '<' Spnl ("script" | "SCRIPT") Spnl HtmlAttribute* '>' +HtmlBlockCloseScript = '<' Spnl '/' ("script" | "SCRIPT") Spnl '>' + +HtmlBlockInTags = HtmlBlockOpenAddress (HtmlBlockInTags | !HtmlBlockCloseAddress .)* HtmlBlockCloseAddress + | HtmlBlockOpenBlockquote (HtmlBlockInTags | !HtmlBlockCloseBlockquote .)* HtmlBlockCloseBlockquote + | HtmlBlockOpenCenter (HtmlBlockInTags | !HtmlBlockCloseCenter .)* HtmlBlockCloseCenter + | HtmlBlockOpenDir (HtmlBlockInTags | !HtmlBlockCloseDir .)* HtmlBlockCloseDir + | HtmlBlockOpenDiv (HtmlBlockInTags | !HtmlBlockCloseDiv .)* HtmlBlockCloseDiv + | HtmlBlockOpenDl (HtmlBlockInTags | !HtmlBlockCloseDl .)* HtmlBlockCloseDl + | HtmlBlockOpenFieldset (HtmlBlockInTags | !HtmlBlockCloseFieldset .)* HtmlBlockCloseFieldset + | HtmlBlockOpenForm (HtmlBlockInTags | !HtmlBlockCloseForm .)* HtmlBlockCloseForm + | HtmlBlockOpenH1 (HtmlBlockInTags | !HtmlBlockCloseH1 .)* HtmlBlockCloseH1 + | HtmlBlockOpenH2 (HtmlBlockInTags | !HtmlBlockCloseH2 .)* HtmlBlockCloseH2 + | HtmlBlockOpenH3 (HtmlBlockInTags | !HtmlBlockCloseH3 .)* HtmlBlockCloseH3 + | HtmlBlockOpenH4 (HtmlBlockInTags | !HtmlBlockCloseH4 .)* HtmlBlockCloseH4 + | HtmlBlockOpenH5 (HtmlBlockInTags | !HtmlBlockCloseH5 .)* HtmlBlockCloseH5 + | HtmlBlockOpenH6 (HtmlBlockInTags | !HtmlBlockCloseH6 .)* HtmlBlockCloseH6 + | HtmlBlockOpenMenu (HtmlBlockInTags | !HtmlBlockCloseMenu .)* HtmlBlockCloseMenu + | HtmlBlockOpenNoframes (HtmlBlockInTags | !HtmlBlockCloseNoframes .)* HtmlBlockCloseNoframes + | HtmlBlockOpenNoscript (HtmlBlockInTags | !HtmlBlockCloseNoscript .)* HtmlBlockCloseNoscript | HtmlBlockOpenOl (HtmlBlockInTags | !HtmlBlockCloseOl .)* HtmlBlockCloseOl + | HtmlBlockOpenP (HtmlBlockInTags | !HtmlBlockCloseP .)* HtmlBlockCloseP + | HtmlBlockOpenPre (HtmlBlockInTags | !HtmlBlockClosePre .)* HtmlBlockClosePre + | HtmlBlockOpenTable (HtmlBlockInTags | !HtmlBlockCloseTable .)* HtmlBlockCloseTable + | HtmlBlockOpenUl (HtmlBlockInTags | !HtmlBlockCloseUl .)* HtmlBlockCloseUl + | HtmlBlockOpenDd (HtmlBlockInTags | !HtmlBlockCloseDd .)* HtmlBlockCloseDd + | HtmlBlockOpenDt (HtmlBlockInTags | !HtmlBlockCloseDt .)* HtmlBlockCloseDt + | HtmlBlockOpenFrameset (HtmlBlockInTags | !HtmlBlockCloseFrameset .)* HtmlBlockCloseFrameset + | HtmlBlockOpenLi (HtmlBlockInTags | !HtmlBlockCloseLi .)* HtmlBlockCloseLi + | HtmlBlockOpenTbody (HtmlBlockInTags | !HtmlBlockCloseTbody .)* HtmlBlockCloseTbody + | HtmlBlockOpenTd (HtmlBlockInTags | !HtmlBlockCloseTd .)* HtmlBlockCloseTd + | HtmlBlockOpenTfoot (HtmlBlockInTags | !HtmlBlockCloseTfoot .)* HtmlBlockCloseTfoot + | HtmlBlockOpenTh (HtmlBlockInTags | !HtmlBlockCloseTh .)* HtmlBlockCloseTh + | HtmlBlockOpenThead (HtmlBlockInTags | !HtmlBlockCloseThead .)* HtmlBlockCloseThead + | HtmlBlockOpenTr (HtmlBlockInTags | !HtmlBlockCloseTr .)* HtmlBlockCloseTr + | HtmlBlockOpenScript (HtmlBlockInTags | !HtmlBlockCloseScript .)* HtmlBlockCloseScript + +HtmlBlock = < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) > + BlankLine+ + { if p.extension(EXT_FILTER_HTML) { + $$ = mk_list(LIST, nil) + } else { + $$ = mk_str(yytext) + $$.key = HTMLBLOCK + } + } + +HtmlBlockSelfClosing = '<' Spnl HtmlBlockType Spnl HtmlAttribute* '/' Spnl '>' + +HtmlBlockType = "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" | + "h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" | + "ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" | + "ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" | + "H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE" | "TABLE" | + "UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT" + +StyleOpen = '<' Spnl ("style" | "STYLE") Spnl HtmlAttribute* '>' +StyleClose = '<' Spnl '/' ("style" | "STYLE") Spnl '>' +InStyleTags = StyleOpen (!StyleClose .)* StyleClose +StyleBlock = < InStyleTags > + BlankLine* + { if p.extension(EXT_FILTER_STYLES) { + $$ = mk_list(LIST, nil) + } else { + $$ = mk_str(yytext) + $$.key = HTMLBLOCK + } + } + +Inlines = a:StartList ( !Endline Inline { a = cons($$, a) } + | c:Endline &Inline { a = cons(c, a) } )+ Endline? + { $$ = mk_list(LIST, a) } + +Inline = Str + | Endline + | UlOrStarLine + | Space + | Strong + | Emph + | Image + | Link + | NoteReference + | InlineNote + | Code + | RawHtml + | Entity + | EscapedChar + | Smart + | Symbol + +Space = Spacechar+ + { $$ = mk_str(" ") + $$.key = SPACE } + +Str = < NormalChar (NormalChar | '_'+ &NormalChar)* > + { $$ = mk_str(yytext) } + +EscapedChar = '\\' !Newline < [-\\`|*_{}[\]()#+.!><] > + { $$ = mk_str(yytext) } + +Entity = ( HexEntity | DecEntity | CharEntity ) + { $$ = mk_str(yytext); $$.key = HTML } + +Endline = LineBreak | TerminalEndline | NormalEndline + +NormalEndline = Sp Newline !BlankLine !'>' !AtxStart + !(Line ("===" '='* | "---" '-'*) Newline) + { $$ = mk_str("\n") + $$.key = SPACE } + +TerminalEndline = Sp Newline Eof + { $$ = nil } + +LineBreak = " " NormalEndline + { $$ = mk_element(LINEBREAK) } + +Symbol = < SpecialChar > + { $$ = mk_str(yytext) } + +# This keeps the parser from getting bogged down on long strings of '*' or '_', +# or strings of '*' or '_' with space on each side: +UlOrStarLine = (UlLine | StarLine) { $$ = mk_str(yytext) } +StarLine = < "****" '*'* > | < Spacechar '*'+ &Spacechar > +UlLine = < "____" '_'* > | < Spacechar '_'+ &Spacechar > + +Emph = EmphStar | EmphUl + +OneStarOpen = !StarLine '*' !Spacechar !Newline +OneStarClose = !Spacechar !Newline a:Inline !StrongStar '*' { $$ = a } + +EmphStar = OneStarOpen + a:StartList + ( !OneStarClose Inline { a = cons($$, a) } )* + OneStarClose { a = cons($$, a) } + { $$ = mk_list(EMPH, a) } + +OneUlOpen = !UlLine '_' !Spacechar !Newline +OneUlClose = !Spacechar !Newline a:Inline !StrongUl '_' !Alphanumeric { $$ = a } + +EmphUl = OneUlOpen + a:StartList + ( !OneUlClose Inline { a = cons($$, a) } )* + OneUlClose { a = cons($$, a) } + { $$ = mk_list(EMPH, a) } + +Strong = StrongStar | StrongUl + +TwoStarOpen = !StarLine "**" !Spacechar !Newline +TwoStarClose = !Spacechar !Newline a:Inline "**" { $$ = a } + +StrongStar = TwoStarOpen + a:StartList + ( !TwoStarClose Inline { a = cons($$, a) } )* + TwoStarClose { a = cons($$, a) } + { $$ = mk_list(STRONG, a) } + +TwoUlOpen = !UlLine "__" !Spacechar !Newline +TwoUlClose = !Spacechar !Newline a:Inline "__" !Alphanumeric { $$ = a } + +StrongUl = TwoUlOpen + a:StartList + ( !TwoUlClose Inline { a = cons($$, a) } )* + TwoUlClose { a = cons($$, a) } + { $$ = mk_list(STRONG, a) } + +Image = '!' ( ExplicitLink | ReferenceLink ) + { $$.key = IMAGE } + +Link = ExplicitLink | ReferenceLink | AutoLink + +ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle + +ReferenceLinkDouble = a:Label < Spnl > !"[]" b:Label + { + if match, found := p.findReference(b.children); found { + $$ = mk_link(a.children, match.url, match.title); + a = nil + b = nil + } else { + result := mk_element(LIST) + result.children = cons(mk_str("["), cons(a, cons(mk_str("]"), cons(mk_str(yytext), + cons(mk_str("["), cons(b, mk_str("]"))))))) + $$ = result + } + } + +ReferenceLinkSingle = a:Label < (Spnl "[]")? > + { + if match, found := p.findReference(a.children); found { + $$ = mk_link(a.children, match.url, match.title) + a = nil + } else { + result := mk_element(LIST) + result.children = cons(mk_str("["), cons(a, cons(mk_str("]"), mk_str(yytext)))); + $$ = result + } + } + +ExplicitLink = l:Label Spnl '(' Sp s:Source Spnl t:Title Sp ')' + { $$ = mk_link(l.children, s.contents.str, t.contents.str) + s = nil + t = nil + l = nil } + +Source = ( '<' < SourceContents > '>' | < SourceContents > ) + { $$ = mk_str(yytext) } + +SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')* + | "" + +Title = ( TitleSingle | TitleDouble | < "" > ) + { $$ = mk_str(yytext) } + +TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) . )* > '\'' + +TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) . )* > '"' + +AutoLink = AutoLinkUrl | AutoLinkEmail + +AutoLinkUrl = '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>' + { $$ = mk_link(mk_str(yytext), yytext, "") } + +AutoLinkEmail = '<' < [-A-Za-z0-9+_]+ '@' ( !Newline !'>' . )+ > '>' + { + $$ = mk_link(mk_str(yytext), "mailto:"+yytext, "") + } + +Reference = NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc Spnl t:RefTitle BlankLine* + { $$ = mk_link(l.children, s.contents.str, t.contents.str) + s = nil + t = nil + l = nil + $$.key = REFERENCE } + +Label = '[' ( !'^' &{ p.extension(EXT_NOTES) } | &. &{ !p.extension(EXT_NOTES) } ) + a:StartList + ( !']' Inline { a = cons($$, a) } )* + ']' + { $$ = mk_list(LIST, a) } + +RefSrc = < Nonspacechar+ > + { $$ = mk_str(yytext) + $$.key = HTML } + +RefTitle = ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle ) + { $$ = mk_str(yytext) } + +EmptyTitle = < "" > + +RefTitleSingle = '\'' < ( !( '\'' Sp Newline | Newline ) . )* > '\'' + +RefTitleDouble = '"' < ( !('"' Sp Newline | Newline) . )* > '"' + +RefTitleParens = '(' < ( !(')' Sp Newline | Newline) . )* > ')' + +References = a:StartList + ( b:Reference { a = cons(b, a) } | SkipBlock )* + { p.references = reverse(a) } + commit + +Ticks1 = "`" !'`' +Ticks2 = "``" !'`' +Ticks3 = "```" !'`' +Ticks4 = "````" !'`' +Ticks5 = "`````" !'`' + +Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks1 + | Ticks2 Sp < ( ( !'`' Nonspacechar )+ | !Ticks2 '`'+ | !( Sp Ticks2 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks2 + | Ticks3 Sp < ( ( !'`' Nonspacechar )+ | !Ticks3 '`'+ | !( Sp Ticks3 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks3 + | Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4 + | Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5 + ) + { $$ = mk_str(yytext); $$.key = CODE } + +RawHtml = < (HtmlComment | HtmlTag) > + { if p.extension(EXT_FILTER_HTML) { + $$ = mk_list(LIST, nil) + } else { + $$ = mk_str(yytext) + $$.key = HTML + } + } + +BlankLine = Sp Newline + +Quoted = '"' (!'"' .)* '"' | '\'' (!'\'' .)* '\'' +HtmlAttribute = (Alphanumeric | '-')+ Spnl ('=' Spnl (Quoted | (!'>' Nonspacechar)+))? Spnl +HtmlComment = "" .)* "-->" +HtmlTag = '<' Spnl '/'? Alphanumeric+ Spnl HtmlAttribute* '/'? Spnl '>' +Eof = !. +Spacechar = ' ' | '\t' +Nonspacechar = !Spacechar !Newline . +Newline = '\n' | '\r' '\n'? +Sp = Spacechar* +Spnl = Sp (Newline Sp)? +SpecialChar = '*' | '_' | '`' | '&' | '[' | ']' | '<' | '!' | '\\' | ExtendedSpecialChar +NormalChar = !( SpecialChar | Spacechar | Newline ) . +Alphanumeric = [A-Za-z0-9] +Digit = [0-9] + +HexEntity = < '&' '#' [Xx] [0-9a-fA-F]+ ';' > +DecEntity = < '&' '#' [0-9]+ > ';' > +CharEntity = < '&' [A-Za-z0-9]+ ';' > + +NonindentSpace = " " | " " | " " | "" +Indent = "\t" | " " +IndentedLine = Indent Line +OptionallyIndentedLine = Indent? Line + +# StartList starts a list data structure that can be added to with cons: +StartList = &. + { $$ = nil } + +Line = RawLine + { $$ = mk_str(yytext) } +RawLine = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof ) + +SkipBlock = ( !BlankLine RawLine )+ BlankLine* + | BlankLine+ + +# Syntax extensions + +ExtendedSpecialChar = &{ p.extension(EXT_SMART) } ('.' | '-' | '\'' | '"') + | &{ p.extension(EXT_NOTES) } ( '^' ) + +Smart = &{ p.extension(EXT_SMART) } + ( Ellipsis | Dash | SingleQuoted | DoubleQuoted | Apostrophe ) + +Apostrophe = '\'' + { $$ = mk_element(APOSTROPHE) } + +Ellipsis = ("..." | ". . .") + { $$ = mk_element(ELLIPSIS) } + +Dash = EmDash | EnDash + +EnDash = '-' &Digit + { $$ = mk_element(ENDASH) } + +EmDash = ("---" | "--") + { $$ = mk_element(EMDASH) } + +SingleQuoteStart = '\'' ![)!\],.;:-? \t\n] !( ( "s" | "t" | "m" | "ve" | "ll" | "re" ) !Alphanumeric ) + +SingleQuoteEnd = '\'' !Alphanumeric + +SingleQuoted = SingleQuoteStart + a:StartList + ( !SingleQuoteEnd b:Inline { a = cons(b, a) } )+ + SingleQuoteEnd + { $$ = mk_list(SINGLEQUOTED, a) } + +DoubleQuoteStart = '"' + +DoubleQuoteEnd = '"' + +DoubleQuoted = DoubleQuoteStart + a:StartList + ( !DoubleQuoteEnd b:Inline { a = cons(b, a) } )+ + DoubleQuoteEnd + { $$ = mk_list(DOUBLEQUOTED, a) } + +NoteReference = &{ p.extension(EXT_NOTES) } + ref:RawNoteReference + { + if match, ok := p.find_note(ref.contents.str); ok { + $$ = mk_element(NOTE) + $$.children = match.children + $$.contents.str = "" + } else { + $$ = mk_str("[^"+ref.contents.str+"]") + } + } + +RawNoteReference = "[^" < ( !Newline !']' . )+ > ']' + { $$ = mk_str(yytext) } + +Note = &{ p.extension(EXT_NOTES) } + NonindentSpace ref:RawNoteReference ':' Sp + a:StartList + ( RawNoteBlock { a = cons($$, a) } ) + ( &Indent RawNoteBlock { a = cons($$, a) } )* + { $$ = mk_list(NOTE, a) + $$.contents.str = ref.contents.str + } + +InlineNote = &{ p.extension(EXT_NOTES) } + "^[" + a:StartList + ( !']' Inline { a = cons($$, a) } )+ + ']' + { $$ = mk_list(NOTE, a) + $$.contents.str = "" } + +Notes = a:StartList + ( b:Note { a = cons(b, a) } | SkipBlock )* + { p.notes = reverse(a) } + commit + +RawNoteBlock = a:StartList + ( !BlankLine OptionallyIndentedLine { a = cons($$, a) } )+ + ( < BlankLine* > { a = cons(mk_str(yytext), a) } ) + { $$ = mk_str_from_list(a, true) + $$.key = RAW + } + +%% + + +/* + * List manipulation functions + */ + + +/* cons - cons an element onto a list, returning pointer to new head + */ +func cons(new, list *element) *element { + new.next = list + return new +} + +/* reverse - reverse a list, returning pointer to new list + */ +func reverse(list *element) (new *element) { + for list != nil { + next := list.next + new = cons(list, new) + list = next + } + return +} + +/* concat_string_list - concatenates string contents of list of STR elements. + */ +func concat_string_list(list *element) string { + s := "" + for list != nil { + s += list.contents.str + list = list.next + } + return s +} + + +/* + * Auxiliary functions for parsing actions. + * These make it easier to build up data structures (including lists) + * in the parsing actions. + */ + + +/* mk_element - generic constructor for element + */ +func mk_element(key int) *element { + return &element{key: key} +} + +/* mk_str - constructor for STR element + */ +func mk_str(s string) (result *element) { + result = mk_element(STR) + result.contents.str = s + return +} + +/* mk_str_from_list - makes STR element by concatenating a + * reversed list of strings, adding optional extra newline + */ +func mk_str_from_list(list *element, extra_newline bool) (result *element) { + s := concat_string_list(reverse(list)) + if extra_newline { + s += "\n" + } + result = mk_element(STR) + result.contents.str = s + return +} + +/* mk_list - makes new list with key 'key' and children the reverse of 'lst'. + * This is designed to be used with cons to build lists in a parser action. + * The reversing is necessary because cons adds to the head of a list. + */ +func mk_list(key int, lst *element) *element { + result := mk_element(key) + result.children = reverse(lst) + return result +} + +/* mk_link - constructor for LINK element + */ +func mk_link(label *element, url, title string) *element { + result := mk_element(LINK) + result.contents.link = &link{label: label, url: url, title: title} + return result +} + + +/* extension = returns true if extension is selected + */ +func (d *Doc) extension(ext int) bool { + return d.syntaxExtensions&ext != 0 +} + +/* match_inlines - returns true if inline lists match (case-insensitive...) + */ +func match_inlines(l1, l2 *element) bool { + for l1 != nil && l2 != nil { + if l1.key != l2.key { + return false + } + switch l1.key { + case SPACE, LINEBREAK, ELLIPSIS, EMDASH, ENDASH, APOSTROPHE: + break + case CODE, STR, HTML: + if strings.ToUpper(l1.contents.str) != strings.ToUpper(l2.contents.str) { + return false + } + case EMPH, STRONG, LIST, SINGLEQUOTED, DOUBLEQUOTED: + if !match_inlines(l1.children, l2.children) { + return false + } + case LINK, IMAGE: + return false /* No links or images within links */ + default: + log.Exitf("match_inlines encountered unknown key = %d\n", l1.key) + } + l1 = l1.next + l2 = l2.next + } + return l1 == nil && l2 == nil /* return true if both lists exhausted */ +} + + +/* find_reference - return true if link found in references matching label. + * 'link' is modified with the matching url and title. + */ +func (d *Doc) findReference(label *element) (*link, bool) { + for cur := d.references; cur != nil; cur = cur.next { + l := cur.contents.link + if match_inlines(label, l.label) { + return l, true + } + } + return nil, false +} + + +/* find_note - return true if note found in notes matching label. + * if found, 'result' is set to point to matched note. + */ +func (d *Doc) find_note(label string) (*element, bool) { + for el := d.notes; el != nil; el = el.next { + if label == el.contents.str { + return el, true + } + } + return nil, false +} + + +/* print tree of elements, for debugging only. + */ +func print_tree(elt *element, indent int) { + var key string + + for elt != nil { + for i := 0; i < indent; i++ { + fmt.Print("\t") + } + key = keynames[elt.key] + if key == "" { + key = "?" + } + if elt.key == STR { + fmt.Printf("%p:\t%s\t'%s'\n", elt, key, elt.contents.str) + } else { + fmt.Printf("%p:\t%s %p\n", elt, key, elt.next) + } + if elt.children != nil { + print_tree(elt.children, indent+1) + } + elt = elt.next + } +} + +var keynames = [numVAL]string{ + LIST: "LIST", + RAW: "RAW", + SPACE: "SPACE", + LINEBREAK: "LINEBREAK", + ELLIPSIS: "ELLIPSIS", + EMDASH: "EMDASH", + ENDASH: "ENDASH", + APOSTROPHE: "APOSTROPHE", + SINGLEQUOTED: "SINGLEQUOTED", + DOUBLEQUOTED: "DOUBLEQUOTED", + STR: "STR", + LINK: "LINK", + IMAGE: "IMAGE", + CODE: "CODE", + HTML: "HTML", + EMPH: "EMPH", + STRONG: "STRONG", + PLAIN: "PLAIN", + PARA: "PARA", + LISTITEM: "LISTITEM", + BULLETLIST: "BULLETLIST", + ORDEREDLIST: "ORDEREDLIST", + H1: "H1", + H2: "H2", + H3: "H3", + H4: "H4", + H5: "H5", + H6: "H6", + BLOCKQUOTE: "BLOCKQUOTE", + VERBATIM: "VERBATIM", + HTMLBLOCK: "HTMLBLOCK", + HRULE: "HRULE", + REFERENCE: "REFERENCE", + NOTE: "NOTE", +}