diff --git a/adoc/adoc.go b/adoc/adoc.go new file mode 100644 index 0000000..bea671a --- /dev/null +++ b/adoc/adoc.go @@ -0,0 +1,39 @@ +package adoc + +import ( + "strings" + "time" +) + +const ( + LANGUAGE_EN = Language("en") + LANGUAGE_NL = Language("nl") + LANGUAGE_UNKNOWN = Language("unknown") +) + +type Language string + +func NewLanguage(ln string) Language { + switch strings.ToLower(ln) { + case "nl": + return LANGUAGE_NL + case "en": + return LANGUAGE_EN + default: + return LANGUAGE_UNKNOWN + } +} + +type Tag string + +type ADoc struct { + Title string + Author string + Language Language + Public bool + Path string + Date time.Time + Updated time.Time + Tags []Tag + Content []BlockElement +} diff --git a/adoc/adoc_test.go b/adoc/adoc_test.go new file mode 100644 index 0000000..070e718 --- /dev/null +++ b/adoc/adoc_test.go @@ -0,0 +1,52 @@ +package adoc_test + +import ( + "testing" + + "ewintr.nl/go-kit/adoc" + "ewintr.nl/go-kit/test" +) + +func TestNewLanguage(t *testing.T) { + for _, tc := range []struct { + name string + input string + exp adoc.Language + }{ + { + name: "empty", + exp: adoc.LANGUAGE_UNKNOWN, + }, + { + name: "dutch lower", + input: "nl", + exp: adoc.LANGUAGE_NL, + }, + { + name: "dutch upper", + input: "NL", + exp: adoc.LANGUAGE_NL, + }, + { + name: "english lower", + input: "en", + exp: adoc.LANGUAGE_EN, + }, + { + name: "english upper", + input: "EN", + exp: adoc.LANGUAGE_EN, + }, + { + name: "unknown", + input: "something", + exp: adoc.LANGUAGE_UNKNOWN, + }, + } { + t.Run(tc.name, func(t *testing.T) { + act := adoc.NewLanguage(tc.input) + test.Equals(t, tc.exp, act) + }) + + } +} diff --git a/adoc/block.go b/adoc/block.go new file mode 100644 index 0000000..1e4ca83 --- /dev/null +++ b/adoc/block.go @@ -0,0 +1,64 @@ +package adoc + +import ( + "fmt" + "strings" +) + +type BlockElement interface { + Text() string +} + +type Paragraph []InlineElement + +func (p Paragraph) Text() string { + var text []string + for _, ie := range p { + text = append(text, ie.Text()) + } + + return strings.Join(text, " ") +} + +type SubTitle string + +func (st SubTitle) Text() string { return string(st) } + +type SubSubTitle string + +func (st SubSubTitle) Text() string { return string(st) } + +type CodeBlock string + +func (cb CodeBlock) Text() string { return string(cb) } + +type Image struct { + Src string + Alt string +} + +func (i Image) Text() string { + return i.Alt +} + +type ListItem []InlineElement + +func (li ListItem) Text() string { + var text []string + for _, ie := range li { + text = append(text, ie.Text()) + } + + return fmt.Sprintf("%s%s", LISTITEM_PREFIX, strings.Join(text, "")) +} + +type List []ListItem + +func (l List) Text() string { + var items []string + for _, item := range l { + items = append(items, item.Text()) + } + + return strings.Join(items, "\n") +} diff --git a/adoc/block_test.go b/adoc/block_test.go new file mode 100644 index 0000000..3a19638 --- /dev/null +++ b/adoc/block_test.go @@ -0,0 +1,134 @@ +package adoc_test + +import ( + "testing" + + "ewintr.nl/go-kit/adoc" + "ewintr.nl/go-kit/test" +) + +func TestParagraph(t *testing.T) { + for _, tc := range []struct { + name string + elements []adoc.InlineElement + exp string + }{ + { + name: "empty", + elements: []adoc.InlineElement{}, + }, + { + name: "one", + elements: []adoc.InlineElement{ + adoc.PlainText("one"), + }, + exp: "one", + }, + { + name: "many", + elements: []adoc.InlineElement{ + adoc.PlainText("one"), + adoc.PlainText("two"), + adoc.PlainText("three"), + }, + exp: "one two three", + }, + } { + t.Run(tc.name, func(t *testing.T) { + p := adoc.Paragraph(tc.elements) + test.Equals(t, tc.exp, p.Text()) + }) + } +} + +func TestBlockSimple(t *testing.T) { + text := "text" + for _, tc := range []struct { + name string + element adoc.BlockElement + }{ + { + name: "subtitle", + element: adoc.SubTitle(text), + }, + { + name: "subsubtitle", + element: adoc.SubSubTitle(text), + }, + { + name: "code block", + element: adoc.CodeBlock(text), + }, + } { + t.Run(tc.name, func(t *testing.T) { + test.Equals(t, text, tc.element.Text()) + }) + } +} + +func TestListItem(t *testing.T) { + for _, tc := range []struct { + name string + elements []adoc.InlineElement + exp string + }{ + { + name: "empty", + exp: "* ", + }, + { + name: "one", + elements: []adoc.InlineElement{ + adoc.PlainText("one"), + }, + exp: "* one", + }, + { + name: "many", + elements: []adoc.InlineElement{ + adoc.PlainText("one"), + adoc.PlainText("two"), + adoc.PlainText("three"), + }, + exp: "* onetwothree", + }, + } { + t.Run(tc.name, func(t *testing.T) { + li := adoc.ListItem(tc.elements) + test.Equals(t, tc.exp, li.Text()) + }) + } +} + +func TestList(t *testing.T) { + for _, tc := range []struct { + name string + elements []adoc.ListItem + exp string + }{ + { + name: "empty", + }, + { + name: "one", + elements: []adoc.ListItem{ + {adoc.PlainText("one")}, + }, + exp: "* one", + }, + { + name: "many", + elements: []adoc.ListItem{ + {adoc.PlainText("one")}, + {adoc.PlainText("two")}, + {adoc.PlainText("three")}, + }, + exp: "* one\n* two\n* three", + }, + } { + t.Run(tc.name, func(t *testing.T) { + l := adoc.List(tc.elements) + test.Equals(t, tc.exp, l.Text()) + }) + } +} diff --git a/adoc/inline.go b/adoc/inline.go new file mode 100644 index 0000000..c6b6b9d --- /dev/null +++ b/adoc/inline.go @@ -0,0 +1,40 @@ +package adoc + +type InlineElement interface { + Text() string +} + +type PlainText string + +func (pt PlainText) Text() string { return string(pt) } + +type StrongText string + +func (st StrongText) Text() string { return string(st) } + +type EmpText string + +func (et EmpText) Text() string { return string(et) } + +type StrongEmpText string + +func (set StrongEmpText) Text() string { return string(set) } + +type Link struct { + url string + title string +} + +func NewLink(url, title string) Link { + return Link{ + url: url, + title: title, + } +} + +func (l Link) URL() string { return l.url } +func (l Link) Text() string { return l.title } + +type CodeText string + +func (ct CodeText) Text() string { return string(ct) } diff --git a/adoc/inline_test.go b/adoc/inline_test.go new file mode 100644 index 0000000..8b05371 --- /dev/null +++ b/adoc/inline_test.go @@ -0,0 +1,50 @@ +package adoc_test + +import ( + "testing" + + "ewintr.nl/go-kit/adoc" + "ewintr.nl/go-kit/test" +) + +func TestInlineSimple(t *testing.T) { + text := "text" + for _, tc := range []struct { + name string + element adoc.InlineElement + }{ + { + name: "plain text", + element: adoc.PlainText(text), + }, + { + name: "strong", + element: adoc.StrongText(text), + }, + { + name: "emphasis", + element: adoc.EmpText(text), + }, + { + name: "strong emphasis", + element: adoc.StrongEmpText(text), + }, + { + name: "code", + element: adoc.CodeText(text), + }, + } { + t.Run(tc.name, func(t *testing.T) { + test.Equals(t, text, tc.element.Text()) + }) + } +} + +func TextLink(t *testing.T) { + url := "url" + title := "title" + l := adoc.NewLink(url, title) + + test.Equals(t, url, l.URL()) + test.Equals(t, title, l.Text()) +} diff --git a/adoc/parser.go b/adoc/parser.go new file mode 100644 index 0000000..e139cff --- /dev/null +++ b/adoc/parser.go @@ -0,0 +1,276 @@ +package adoc + +import ( + "strings" + "time" +) + +const ( + FIELD_DELIMITER = ":" + TITLE_PREFIX = "= " + SUBTITLE_PREFIX = "== " + SUBSUBTITLE_PREFIX = "=== " + PARAGRAPH_SEPARATOR = "\n\n" + LINE_SEPARATOR = "\n" + CODE_PREFIX = "----\n" + CODE_SUFFIX = "\n----" + LISTITEM_PREFIX = "* " + IMAGE_PREFIX = "image::" // end with [alt text] +) + +func New(text string) *ADoc { + doc := &ADoc{ + Language: LANGUAGE_EN, + Tags: []Tag{}, + } + + // split up blocks + var pars []string + for _, s := range strings.Split(text, PARAGRAPH_SEPARATOR) { + if s == "" { + + continue + } + + pars = append(pars, s) + } + + // actually, code blocks are allowed to have empty lines + var blocks []string + var inCode bool + var currentBlock string + for _, par := range pars { + switch { + case strings.HasPrefix(par, CODE_PREFIX) && strings.HasSuffix(par, CODE_SUFFIX): + blocks = append(blocks, par) + case !inCode && strings.HasPrefix(par, CODE_PREFIX): + inCode = true + currentBlock = par + case inCode && !strings.HasSuffix(par, CODE_SUFFIX): + currentBlock += PARAGRAPH_SEPARATOR + par + case inCode && strings.HasSuffix(par, CODE_SUFFIX): + currentBlock += PARAGRAPH_SEPARATOR + par + blocks = append(blocks, currentBlock) + inCode = false + currentBlock = "" + default: + blocks = append(blocks, par) + } + } + + // interpret the blocks + for i, p := range blocks { + switch { + case i == 0 && strings.HasPrefix(p, TITLE_PREFIX): + ParseHeader(p, doc) + case strings.HasPrefix(p, SUBTITLE_PREFIX): + p = strings.TrimSpace(p) + s := strings.Split(p, SUBTITLE_PREFIX) + if len(s) == 1 || s[1] == "" { + + continue + } + doc.Content = append(doc.Content, SubTitle(s[1])) + case strings.HasPrefix(p, SUBSUBTITLE_PREFIX): + p = strings.TrimSpace(p) + s := strings.Split(p, SUBSUBTITLE_PREFIX) + if len(s) == 1 || s[1] == "" { + + continue + } + doc.Content = append(doc.Content, SubSubTitle(s[1])) + case isCodeBlock(p): + doc.Content = append(doc.Content, parseCodeBlock(p)) + case strings.HasPrefix(p, LISTITEM_PREFIX): + p = strings.TrimSpace(p) + var items []ListItem + for i, ti := range strings.Split(p, LISTITEM_PREFIX) { + if i > 0 { + inline := ParseInline(strings.TrimSpace(ti)) + items = append(items, ListItem(inline)) + } + } + doc.Content = append(doc.Content, List(items)) + case strings.HasPrefix(p, IMAGE_PREFIX): + doc.Content = append(doc.Content, parseImage(p)) + default: + p = strings.TrimSpace(p) + doc.Content = append(doc.Content, Paragraph(ParseInline(p))) + } + } + + return doc +} + +func isCodeBlock(par string) bool { + return strings.HasPrefix(par, CODE_PREFIX) && strings.HasSuffix(par, CODE_SUFFIX) +} + +func parseCodeBlock(par string) CodeBlock { + ss := strings.Split(par, "\n") + ss = ss[1 : len(ss)-1] + content := strings.Join(ss, "\n") + + return CodeBlock(content) +} + +func parseImage(img string) Image { + ss := strings.TrimPrefix(img, IMAGE_PREFIX) + if strings.Index(ss, "[") >= 0 && strings.HasSuffix(ss, "]") { + parts := strings.Split(ss, "[") + return Image{ + Src: parts[0], + Alt: strings.TrimSuffix(parts[1], "]"), + } + } + + return Image{ + Src: ss, + } +} + +func ParseHeader(text string, doc *ADoc) { + text = strings.TrimSpace(text) + lines := strings.Split(text, LINE_SEPARATOR) + for i, l := range lines { + switch { + case i == 0: + s := strings.Split(l, TITLE_PREFIX) + doc.Title = s[1] + case isDate(l): + date, _ := time.Parse("2006-01-02", l) + doc.Date = date + case isField(l): + name, value := parseField(l) + switch name { + case "language": + doc.Language = NewLanguage(value) + case "updated": + updateDate, _ := time.Parse("2006-01-02", value) + doc.Updated = updateDate + case "tags": + for _, tag := range strings.Split(value, ",") { + doc.Tags = append(doc.Tags, Tag(strings.TrimSpace(tag))) + } + } + default: + doc.Author = l + } + } +} + +func isField(line string) bool { + parts := strings.Split(line, FIELD_DELIMITER) + if len(parts) != 3 || parts[0] != "" { + return false + } + + return true +} + +func parseField(line string) (string, string) { + parts := strings.Split(line, FIELD_DELIMITER) + if len(parts) != 3 { + return "", "" + } + trimmedParts := []string{} + for _, part := range parts { + trimmedParts = append(trimmedParts, strings.TrimSpace(part)) + } + if trimmedParts[0] != "" { + return "", "" + } + + return trimmedParts[1], trimmedParts[2] +} + +func isDate(text string) bool { + if _, err := time.Parse("2006-01-02", text); err == nil { + return true + } + + return false +} + +func ParseInline(text string) []InlineElement { + var e []InlineElement + + ss := strings.Split(text, "") + var buffer, curWord, prevChar string + var strong, emp, code, linkTitle bool + wordStart := true + for _, s := range ss { + switch { + case (s == "_" && wordStart) || (s == "_" && emp): + e = addElement(e, buffer+curWord, strong, emp, code) + emp = !emp + buffer = "" + curWord = "" + case s == "*": + e = addElement(e, buffer+curWord, strong, emp, code) + buffer = "" + curWord = "" + strong = !strong + case s == "`": + e = addElement(e, buffer+curWord, strong, emp, code) + code = !code + buffer = "" + curWord = "" + case s == "[" && prevChar != "": + e = addElement(e, buffer, strong, emp, code) + linkTitle = true + curWord += s + case s == "]" && linkTitle: + e = addLink(e, curWord) + buffer = "" + curWord = "" + linkTitle = false + case s == " " && !linkTitle: + buffer += curWord + " " + curWord = "" + default: + curWord += s + } + prevChar = s + wordStart = false + if prevChar == " " { + wordStart = true + } + } + if len(buffer+curWord) > 0 { + e = addElement(e, buffer+curWord, false, false, false) + } + + return e +} + +func addLink(ies []InlineElement, linkText string) []InlineElement { + ss := strings.Split(linkText, "[") + if len(ss) < 2 { + ss = append(ss, "ERROR") + } + + return append(ies, Link{url: ss[0], title: ss[1]}) +} + +func addElement(ies []InlineElement, text string, strong, emp, code bool) []InlineElement { + if len(text) == 0 { + return ies + } + + var ne InlineElement + switch { + case code: + ne = CodeText(text) + case strong && emp: + ne = StrongEmpText(text) + case strong && !emp: + ne = StrongText(text) + case !strong && emp: + ne = EmpText(text) + default: + ne = PlainText(text) + } + + return append(ies, ne) +} diff --git a/adoc/parser_test.go b/adoc/parser_test.go new file mode 100644 index 0000000..d54744f --- /dev/null +++ b/adoc/parser_test.go @@ -0,0 +1,215 @@ +package adoc_test + +import ( + "fmt" + "testing" + "time" + + "ewintr.nl/go-kit/adoc" + "ewintr.nl/go-kit/test" +) + +func TestNew(t *testing.T) { + one := "one" + two := "two" + three := "three" + ptOne := adoc.PlainText(one) + ptTwo := adoc.PlainText(two) + ptThree := adoc.PlainText(three) + for _, tc := range []struct { + name string + input string + exp *adoc.ADoc + }{ + { + name: "empty", + exp: &adoc.ADoc{ + Tags: []adoc.Tag{}, + Language: adoc.LANGUAGE_EN, + }, + }, + { + name: "title", + input: "= Title", + exp: &adoc.ADoc{ + Title: "Title", + Tags: []adoc.Tag{}, + Language: adoc.LANGUAGE_EN, + }, + }, + { + name: "header", + input: "= Title\nT. Test\n2020-10-27\n:language:\tnl", + exp: &adoc.ADoc{ + Title: "Title", + Author: "T. Test", + Language: adoc.LANGUAGE_NL, + Tags: []adoc.Tag{}, + Date: time.Date(2020, time.October, 27, 0, 0, 0, 0, time.UTC), + }, + }, + { + name: "paragraphs", + input: fmt.Sprintf("%s\n\n%s\n\n%s", one, two, three), + exp: &adoc.ADoc{ + Tags: []adoc.Tag{}, + Language: adoc.LANGUAGE_EN, + Content: []adoc.BlockElement{ + adoc.Paragraph([]adoc.InlineElement{ptOne}), + adoc.Paragraph([]adoc.InlineElement{ptTwo}), + adoc.Paragraph([]adoc.InlineElement{ptThree}), + }, + }, + }, + { + name: "subtitle", + input: "== Subtitle", + exp: &adoc.ADoc{ + Tags: []adoc.Tag{}, + Language: adoc.LANGUAGE_EN, + Content: []adoc.BlockElement{ + adoc.SubTitle("Subtitle"), + }, + }, + }, + { + name: "code block", + input: "----\nsome code\nmore code\n----", + exp: &adoc.ADoc{ + Tags: []adoc.Tag{}, + Language: adoc.LANGUAGE_EN, + Content: []adoc.BlockElement{ + adoc.CodeBlock("some code\nmore code"), + }, + }, + }, + { + name: "code block with empty lines", + input: "----\nsome code\n\nmore code\n----", + exp: &adoc.ADoc{ + Tags: []adoc.Tag{}, + Language: adoc.LANGUAGE_EN, + Content: []adoc.BlockElement{ + adoc.CodeBlock("some code\n\nmore code"), + }, + }, + }, + { + name: "list", + input: "* item 1\n* item 2\n* *item 3*\n", + exp: &adoc.ADoc{ + Tags: []adoc.Tag{}, + Language: adoc.LANGUAGE_EN, + Content: []adoc.BlockElement{ + adoc.List{ + adoc.ListItem([]adoc.InlineElement{adoc.PlainText("item 1")}), + adoc.ListItem([]adoc.InlineElement{adoc.PlainText("item 2")}), + adoc.ListItem([]adoc.InlineElement{adoc.StrongText("item 3")}), + }, + }, + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + act := adoc.New(tc.input) + + test.Equals(t, tc.exp, act) + }) + } +} + +func TestParseInline(t *testing.T) { + for _, tc := range []struct { + name string + input string + exp []adoc.InlineElement + }{{ + name: "empty", + }, + { + name: "plain", + input: "some test text", + exp: []adoc.InlineElement{ + adoc.PlainText("some test text")}, + }, + { + name: "strong", + input: "*some strong text*", + exp: []adoc.InlineElement{ + adoc.StrongText("some strong text"), + }, + }, + { + name: "strong in plain", + input: "some *strong* text", + exp: []adoc.InlineElement{ + adoc.PlainText("some "), + adoc.StrongText("strong"), + adoc.PlainText(" text"), + }, + }, + { + name: "emphasis", + input: "_some emphasized text_", + exp: []adoc.InlineElement{ + adoc.EmpText("some emphasized text"), + }, + }, + { + name: "emphasis in plain", + input: "some _emphasized_ text", + exp: []adoc.InlineElement{ + adoc.PlainText("some "), + adoc.EmpText("emphasized"), + adoc.PlainText(" text"), + }, + }, + { + name: "emp and strong in plain", + input: "some _*special*_ text", + exp: []adoc.InlineElement{ + adoc.PlainText("some "), + adoc.StrongEmpText("special"), + adoc.PlainText(" text"), + }, + }, + { + name: "link", + input: "a link[title] somewhere", + exp: []adoc.InlineElement{ + adoc.PlainText("a "), + adoc.NewLink("link", "title"), + adoc.PlainText(" somewhere"), + }, + }, + { + name: "code", + input: "`command`", + exp: []adoc.InlineElement{ + adoc.CodeText("command"), + }, + }, + { + name: "code in plain", + input: "some `code` in text", + exp: []adoc.InlineElement{ + adoc.PlainText("some "), + adoc.CodeText("code"), + adoc.PlainText(" in text"), + }, + }, + { + name: "link with underscore", + input: "https://example.com/some_url[some url]", + exp: []adoc.InlineElement{ + adoc.NewLink("https://example.com/some_url", "some url"), + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + act := adoc.ParseInline(tc.input) + + test.Equals(t, tc.exp, act) + }) + } +}