ref: a8853f1c5ace30ae8d256ad374bdb280c95d4228
parent: 4d93aca27dfdebc9e06948ccf37a7922dac09d65
author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
date: Mon Dec 17 15:54:06 EST 2018
parser/pageparser: Split the page lexer into some more files See #5534
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@@ -29,18 +29,6 @@
// returns the next state in scanner.
type stateFunc func(*pageLexer) stateFunc
-type lexerShortcodeState struct {- currLeftDelimItem ItemType
- currRightDelimItem ItemType
- isInline bool
- currShortcodeName string // is only set when a shortcode is in opened state
- closingState int // > 0 = on its way to be closed
- elementStepNum int // step number in element
- paramElements int // number of elements (name + value = 2) found first
- openShortcodes map[string]bool // set of shortcodes in open state
-
-}
-
type pageLexer struct {input []byte
stateStart stateFunc
@@ -102,17 +90,6 @@
return l
}
-// Shortcode syntax
-var (
- leftDelimSc = []byte("{{")- leftDelimScNoMarkup = []byte("{{<")- rightDelimScNoMarkup = []byte(">}}")- leftDelimScWithMarkup = []byte("{{%")- rightDelimScWithMarkup = []byte("%}}")- leftComment = []byte("/*") // comments in this context us used to to mark shortcodes as "not really a shortcode"- rightComment = []byte("*/")-)
-
// Page syntax
var (
byteOrderMark = '\ufeff'
@@ -293,11 +270,6 @@
}
-func (l *pageLexer) isShortCodeStart() bool {- return l.hasPrefix(leftDelimScWithMarkup) || l.hasPrefix(leftDelimScNoMarkup)
-
-}
-
func (l *pageLexer) posFirstNonWhiteSpace() int { f := func(c rune) bool {return !unicode.IsSpace(c)
@@ -305,69 +277,6 @@
return bytes.IndexFunc(l.input[l.pos:], f)
}
-func lexIntroSection(l *pageLexer) stateFunc {- l.summaryDivider = summaryDivider
-
-LOOP:
- for {- r := l.next()
- if r == eof {- break
- }
-
- switch {- case r == '+':
- return l.lexFrontMatterSection(TypeFrontMatterTOML, r, "TOML", delimTOML)
- case r == '-':
- return l.lexFrontMatterSection(TypeFrontMatterYAML, r, "YAML", delimYAML)
- case r == '{':- return lexFrontMatterJSON
- case r == '#':
- return lexFrontMatterOrgMode
- case r == byteOrderMark:
- l.emit(TypeIgnore)
- case !isSpace(r) && !isEndOfLine(r):
- if r == '<' {- l.backup()
- if l.hasPrefix(htmlCommentStart) {- // This may be commented out front mattter, which should
- // still be read.
- l.consumeToNextLine()
- l.isInHTMLComment = true
- l.emit(TypeIgnore)
- continue LOOP
- } else {- if l.pos > l.start {- l.emit(tText)
- }
- l.next()
- // This is the start of a plain HTML document with no
- // front matter. I still can contain shortcodes, so we
- // have to keep looking.
- l.emit(TypeHTMLStart)
- }
- }
- break LOOP
- }
- }
-
- // Now move on to the shortcodes.
- return lexMainSection
-}
-
-func lexEndFromtMatterHTMLComment(l *pageLexer) stateFunc {- l.isInHTMLComment = false
- right := l.index(htmlCommentEnd)
- if right == -1 {- return l.errorf("starting HTML comment with no end")- }
- l.pos += right + len(htmlCommentEnd)
- l.emit(TypeIgnore)
-
- // Now move on to the shortcodes.
- return lexMainSection
-}
-
func lexDone(l *pageLexer) stateFunc {// Done!
@@ -378,385 +287,10 @@
return nil
}
-func lexFrontMatterJSON(l *pageLexer) stateFunc {- // Include the left delimiter
- l.backup()
-
- var (
- inQuote bool
- level int
- )
-
- for {-
- r := l.next()
-
- switch {- case r == eof:
- return l.errorf("unexpected EOF parsing JSON front matter")- case r == '{':- if !inQuote {- level++
- }
- case r == '}':
- if !inQuote {- level--
- }
- case r == '"':
- inQuote = !inQuote
- case r == '\\':
- // This may be an escaped quote. Make sure it's not marked as a
- // real one.
- l.next()
- }
-
- if level == 0 {- break
- }
- }
-
- l.consumeCRLF()
- l.emit(TypeFrontMatterJSON)
-
- return lexMainSection
-}
-
-func lexFrontMatterOrgMode(l *pageLexer) stateFunc {- /*
- #+TITLE: Test File For chaseadamsio/goorgeous
- #+AUTHOR: Chase Adams
- #+DESCRIPTION: Just another golang parser for org content!
- */
-
- l.summaryDivider = summaryDividerOrg
-
- l.backup()
-
- if !l.hasPrefix(delimOrg) {- return lexMainSection
- }
-
- // Read lines until we no longer see a #+ prefix
-LOOP:
- for {-
- r := l.next()
-
- switch {- case r == '\n':
- if !l.hasPrefix(delimOrg) {- break LOOP
- }
- case r == eof:
- break LOOP
-
- }
- }
-
- l.emit(TypeFrontMatterORG)
-
- return lexMainSection
-
-}
-
func (l *pageLexer) printCurrentInput() { fmt.Printf("input[%d:]: %q", l.pos, string(l.input[l.pos:]))}
-// Handle YAML or TOML front matter.
-func (l *pageLexer) lexFrontMatterSection(tp ItemType, delimr rune, name string, delim []byte) stateFunc {-
- for i := 0; i < 2; i++ {- if r := l.next(); r != delimr {- return l.errorf("invalid %s delimiter", name)- }
- }
-
- // Let front matter start at line 1
- wasEndOfLine := l.consumeCRLF()
- // We don't care about the delimiters.
- l.ignore()
-
- var r rune
-
- for {- if !wasEndOfLine {- r = l.next()
- if r == eof {- return l.errorf("EOF looking for end %s front matter delimiter", name)- }
- }
-
- if wasEndOfLine || isEndOfLine(r) {- if l.hasPrefix(delim) {- l.emit(tp)
- l.pos += 3
- l.consumeCRLF()
- l.ignore()
- break
- }
- }
-
- wasEndOfLine = false
- }
-
- return lexMainSection
-}
-
-func lexShortcodeLeftDelim(l *pageLexer) stateFunc {- l.pos += len(l.currentLeftShortcodeDelim())
- if l.hasPrefix(leftComment) {- return lexShortcodeComment
- }
- l.emit(l.currentLeftShortcodeDelimItem())
- l.elementStepNum = 0
- l.paramElements = 0
- return lexInsideShortcode
-}
-
-func lexShortcodeComment(l *pageLexer) stateFunc {- posRightComment := l.index(append(rightComment, l.currentRightShortcodeDelim()...))
- if posRightComment <= 1 {- return l.errorf("comment must be closed")- }
- // we emit all as text, except the comment markers
- l.emit(tText)
- l.pos += len(leftComment)
- l.ignore()
- l.pos += posRightComment - len(leftComment)
- l.emit(tText)
- l.pos += len(rightComment)
- l.ignore()
- l.pos += len(l.currentRightShortcodeDelim())
- l.emit(tText)
- return lexMainSection
-}
-
-func lexShortcodeRightDelim(l *pageLexer) stateFunc {- l.closingState = 0
- l.pos += len(l.currentRightShortcodeDelim())
- l.emit(l.currentRightShortcodeDelimItem())
- return lexMainSection
-}
-
-// either:
-// 1. param
-// 2. "param" or "param\"
-// 3. param="123" or param="123\"
-// 4. param="Some \"escaped\" text"
-func lexShortcodeParam(l *pageLexer, escapedQuoteStart bool) stateFunc {-
- first := true
- nextEq := false
-
- var r rune
-
- for {- r = l.next()
- if first {- if r == '"' {- // a positional param with quotes
- if l.paramElements == 2 {- return l.errorf("got quoted positional parameter. Cannot mix named and positional parameters")- }
- l.paramElements = 1
- l.backup()
- return lexShortcodeQuotedParamVal(l, !escapedQuoteStart, tScParam)
- }
- first = false
- } else if r == '=' {- // a named param
- l.backup()
- nextEq = true
- break
- }
-
- if !isAlphaNumericOrHyphen(r) {- l.backup()
- break
- }
- }
-
- if l.paramElements == 0 {- l.paramElements++
-
- if nextEq {- l.paramElements++
- }
- } else {- if nextEq && l.paramElements == 1 {- return l.errorf("got named parameter '%s'. Cannot mix named and positional parameters", l.current())- } else if !nextEq && l.paramElements == 2 {- return l.errorf("got positional parameter '%s'. Cannot mix named and positional parameters", l.current())- }
- }
-
- l.emit(tScParam)
- return lexInsideShortcode
-
-}
-
-func lexShortcodeQuotedParamVal(l *pageLexer, escapedQuotedValuesAllowed bool, typ ItemType) stateFunc {- openQuoteFound := false
- escapedInnerQuoteFound := false
- escapedQuoteState := 0
-
-Loop:
- for {- switch r := l.next(); {- case r == '\\':
- if l.peek() == '"' {- if openQuoteFound && !escapedQuotedValuesAllowed {- l.backup()
- break Loop
- } else if openQuoteFound {- // the coming quoute is inside
- escapedInnerQuoteFound = true
- escapedQuoteState = 1
- }
- }
- case r == eof, r == '\n':
- return l.errorf("unterminated quoted string in shortcode parameter-argument: '%s'", l.current())- case r == '"':
- if escapedQuoteState == 0 {- if openQuoteFound {- l.backup()
- break Loop
-
- } else {- openQuoteFound = true
- l.ignore()
- }
- } else {- escapedQuoteState = 0
- }
-
- }
- }
-
- if escapedInnerQuoteFound {- l.ignoreEscapesAndEmit(typ)
- } else {- l.emit(typ)
- }
-
- r := l.next()
-
- if r == '\\' {- if l.peek() == '"' {- // ignore the escaped closing quote
- l.ignore()
- l.next()
- l.ignore()
- }
- } else if r == '"' {- // ignore closing quote
- l.ignore()
- } else {- // handled by next state
- l.backup()
- }
-
- return lexInsideShortcode
-}
-
-// Inline shortcodes has the form {{< myshortcode.inline >}}-var inlineIdentifier = []byte("inline ")-
-// scans an alphanumeric inside shortcode
-func lexIdentifierInShortcode(l *pageLexer) stateFunc {- lookForEnd := false
-Loop:
- for {- switch r := l.next(); {- case isAlphaNumericOrHyphen(r):
- // Allow forward slash inside names to make it possible to create namespaces.
- case r == '/':
- case r == '.':
- l.isInline = l.hasPrefix(inlineIdentifier)
- if !l.isInline {- return l.errorf("period in shortcode name only allowed for inline identifiers")- }
- default:
- l.backup()
- word := string(l.input[l.start:l.pos])
- if l.closingState > 0 && !l.openShortcodes[word] {- return l.errorf("closing tag for shortcode '%s' does not match start tag", word)- } else if l.closingState > 0 {- l.openShortcodes[word] = false
- lookForEnd = true
- }
-
- l.closingState = 0
- l.currShortcodeName = word
- l.openShortcodes[word] = true
- l.elementStepNum++
- if l.isInline {- l.emit(tScNameInline)
- } else {- l.emit(tScName)
- }
- break Loop
- }
- }
-
- if lookForEnd {- return lexEndOfShortcode
- }
- return lexInsideShortcode
-}
-
-func lexEndOfShortcode(l *pageLexer) stateFunc {- l.isInline = false
- if l.hasPrefix(l.currentRightShortcodeDelim()) {- return lexShortcodeRightDelim
- }
- switch r := l.next(); {- case isSpace(r):
- l.ignore()
- default:
- return l.errorf("unclosed shortcode")- }
- return lexEndOfShortcode
-}
-
-// scans the elements inside shortcode tags
-func lexInsideShortcode(l *pageLexer) stateFunc {- if l.hasPrefix(l.currentRightShortcodeDelim()) {- return lexShortcodeRightDelim
- }
- switch r := l.next(); {- case r == eof:
- // eol is allowed inside shortcodes; this may go to end of document before it fails
- return l.errorf("unclosed shortcode action")- case isSpace(r), isEndOfLine(r):
- l.ignore()
- case r == '=':
- l.ignore()
- return lexShortcodeQuotedParamVal(l, l.peek() != '\\', tScParamVal)
- case r == '/':
- if l.currShortcodeName == "" {- return l.errorf("got closing shortcode, but none is open")- }
- l.closingState++
- l.emit(tScClose)
- case r == '\\':
- l.ignore()
- if l.peek() == '"' {- return lexShortcodeParam(l, true)
- }
- case l.elementStepNum > 0 && (isAlphaNumericOrHyphen(r) || r == '"'): // positional params can have quotes
- l.backup()
- return lexShortcodeParam(l, false)
- case isAlphaNumeric(r):
- l.backup()
- return lexIdentifierInShortcode
- default:
- return l.errorf("unrecognized character in shortcode action: %#U. Note: Parameters with non-alphanumeric args must be quoted", r)- }
- return lexInsideShortcode
-}
-
// state helpers
func (l *pageLexer) index(sep []byte) int {@@ -765,29 +299,6 @@
func (l *pageLexer) hasPrefix(prefix []byte) bool {return bytes.HasPrefix(l.input[l.pos:], prefix)
-}
-
-func (l *pageLexer) currentLeftShortcodeDelimItem() ItemType {- return l.currLeftDelimItem
-}
-
-func (l *pageLexer) currentRightShortcodeDelimItem() ItemType {- return l.currRightDelimItem
-}
-
-func (l *pageLexer) currentLeftShortcodeDelim() []byte {- if l.currLeftDelimItem == tLeftDelimScWithMarkup {- return leftDelimScWithMarkup
- }
- return leftDelimScNoMarkup
-
-}
-
-func (l *pageLexer) currentRightShortcodeDelim() []byte {- if l.currRightDelimItem == tRightDelimScWithMarkup {- return rightDelimScWithMarkup
- }
- return rightDelimScNoMarkup
}
// helper functions
--- /dev/null
+++ b/parser/pageparser/pagelexer_intro.go
@@ -1,0 +1,202 @@
+// Copyright 2018 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package pageparser provides a parser for Hugo content files (Markdown, HTML etc.) in Hugo.
+// This implementation is highly inspired by the great talk given by Rob Pike called "Lexical Scanning in Go"
+// It's on YouTube, Google it!.
+// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
+package pageparser
+
+func lexIntroSection(l *pageLexer) stateFunc {+ l.summaryDivider = summaryDivider
+
+LOOP:
+ for {+ r := l.next()
+ if r == eof {+ break
+ }
+
+ switch {+ case r == '+':
+ return l.lexFrontMatterSection(TypeFrontMatterTOML, r, "TOML", delimTOML)
+ case r == '-':
+ return l.lexFrontMatterSection(TypeFrontMatterYAML, r, "YAML", delimYAML)
+ case r == '{':+ return lexFrontMatterJSON
+ case r == '#':
+ return lexFrontMatterOrgMode
+ case r == byteOrderMark:
+ l.emit(TypeIgnore)
+ case !isSpace(r) && !isEndOfLine(r):
+ if r == '<' {+ l.backup()
+ if l.hasPrefix(htmlCommentStart) {+ // This may be commented out front mattter, which should
+ // still be read.
+ l.consumeToNextLine()
+ l.isInHTMLComment = true
+ l.emit(TypeIgnore)
+ continue LOOP
+ } else {+ if l.pos > l.start {+ l.emit(tText)
+ }
+ l.next()
+ // This is the start of a plain HTML document with no
+ // front matter. I still can contain shortcodes, so we
+ // have to keep looking.
+ l.emit(TypeHTMLStart)
+ }
+ }
+ break LOOP
+ }
+ }
+
+ // Now move on to the shortcodes.
+ return lexMainSection
+}
+
+func lexEndFromtMatterHTMLComment(l *pageLexer) stateFunc {+ l.isInHTMLComment = false
+ right := l.index(htmlCommentEnd)
+ if right == -1 {+ return l.errorf("starting HTML comment with no end")+ }
+ l.pos += right + len(htmlCommentEnd)
+ l.emit(TypeIgnore)
+
+ // Now move on to the shortcodes.
+ return lexMainSection
+}
+
+func lexFrontMatterJSON(l *pageLexer) stateFunc {+ // Include the left delimiter
+ l.backup()
+
+ var (
+ inQuote bool
+ level int
+ )
+
+ for {+
+ r := l.next()
+
+ switch {+ case r == eof:
+ return l.errorf("unexpected EOF parsing JSON front matter")+ case r == '{':+ if !inQuote {+ level++
+ }
+ case r == '}':
+ if !inQuote {+ level--
+ }
+ case r == '"':
+ inQuote = !inQuote
+ case r == '\\':
+ // This may be an escaped quote. Make sure it's not marked as a
+ // real one.
+ l.next()
+ }
+
+ if level == 0 {+ break
+ }
+ }
+
+ l.consumeCRLF()
+ l.emit(TypeFrontMatterJSON)
+
+ return lexMainSection
+}
+
+func lexFrontMatterOrgMode(l *pageLexer) stateFunc {+ /*
+ #+TITLE: Test File For chaseadamsio/goorgeous
+ #+AUTHOR: Chase Adams
+ #+DESCRIPTION: Just another golang parser for org content!
+ */
+
+ l.summaryDivider = summaryDividerOrg
+
+ l.backup()
+
+ if !l.hasPrefix(delimOrg) {+ return lexMainSection
+ }
+
+ // Read lines until we no longer see a #+ prefix
+LOOP:
+ for {+
+ r := l.next()
+
+ switch {+ case r == '\n':
+ if !l.hasPrefix(delimOrg) {+ break LOOP
+ }
+ case r == eof:
+ break LOOP
+
+ }
+ }
+
+ l.emit(TypeFrontMatterORG)
+
+ return lexMainSection
+
+}
+
+// Handle YAML or TOML front matter.
+func (l *pageLexer) lexFrontMatterSection(tp ItemType, delimr rune, name string, delim []byte) stateFunc {+
+ for i := 0; i < 2; i++ {+ if r := l.next(); r != delimr {+ return l.errorf("invalid %s delimiter", name)+ }
+ }
+
+ // Let front matter start at line 1
+ wasEndOfLine := l.consumeCRLF()
+ // We don't care about the delimiters.
+ l.ignore()
+
+ var r rune
+
+ for {+ if !wasEndOfLine {+ r = l.next()
+ if r == eof {+ return l.errorf("EOF looking for end %s front matter delimiter", name)+ }
+ }
+
+ if wasEndOfLine || isEndOfLine(r) {+ if l.hasPrefix(delim) {+ l.emit(tp)
+ l.pos += 3
+ l.consumeCRLF()
+ l.ignore()
+ break
+ }
+ }
+
+ wasEndOfLine = false
+ }
+
+ return lexMainSection
+}
--- /dev/null
+++ b/parser/pageparser/pagelexer_shortcode.go
@@ -1,0 +1,322 @@
+// Copyright 2018 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package pageparser provides a parser for Hugo content files (Markdown, HTML etc.) in Hugo.
+// This implementation is highly inspired by the great talk given by Rob Pike called "Lexical Scanning in Go"
+// It's on YouTube, Google it!.
+// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
+package pageparser
+
+type lexerShortcodeState struct {+ currLeftDelimItem ItemType
+ currRightDelimItem ItemType
+ isInline bool
+ currShortcodeName string // is only set when a shortcode is in opened state
+ closingState int // > 0 = on its way to be closed
+ elementStepNum int // step number in element
+ paramElements int // number of elements (name + value = 2) found first
+ openShortcodes map[string]bool // set of shortcodes in open state
+
+}
+
+// Shortcode syntax
+var (
+ leftDelimSc = []byte("{{")+ leftDelimScNoMarkup = []byte("{{<")+ rightDelimScNoMarkup = []byte(">}}")+ leftDelimScWithMarkup = []byte("{{%")+ rightDelimScWithMarkup = []byte("%}}")+ leftComment = []byte("/*") // comments in this context us used to to mark shortcodes as "not really a shortcode"+ rightComment = []byte("*/")+)
+
+func (l *pageLexer) isShortCodeStart() bool {+ return l.hasPrefix(leftDelimScWithMarkup) || l.hasPrefix(leftDelimScNoMarkup)
+}
+
+func lexShortcodeLeftDelim(l *pageLexer) stateFunc {+ l.pos += len(l.currentLeftShortcodeDelim())
+ if l.hasPrefix(leftComment) {+ return lexShortcodeComment
+ }
+ l.emit(l.currentLeftShortcodeDelimItem())
+ l.elementStepNum = 0
+ l.paramElements = 0
+ return lexInsideShortcode
+}
+
+func lexShortcodeComment(l *pageLexer) stateFunc {+ posRightComment := l.index(append(rightComment, l.currentRightShortcodeDelim()...))
+ if posRightComment <= 1 {+ return l.errorf("comment must be closed")+ }
+ // we emit all as text, except the comment markers
+ l.emit(tText)
+ l.pos += len(leftComment)
+ l.ignore()
+ l.pos += posRightComment - len(leftComment)
+ l.emit(tText)
+ l.pos += len(rightComment)
+ l.ignore()
+ l.pos += len(l.currentRightShortcodeDelim())
+ l.emit(tText)
+ return lexMainSection
+}
+
+func lexShortcodeRightDelim(l *pageLexer) stateFunc {+ l.closingState = 0
+ l.pos += len(l.currentRightShortcodeDelim())
+ l.emit(l.currentRightShortcodeDelimItem())
+ return lexMainSection
+}
+
+// either:
+// 1. param
+// 2. "param" or "param\"
+// 3. param="123" or param="123\"
+// 4. param="Some \"escaped\" text"
+func lexShortcodeParam(l *pageLexer, escapedQuoteStart bool) stateFunc {+
+ first := true
+ nextEq := false
+
+ var r rune
+
+ for {+ r = l.next()
+ if first {+ if r == '"' {+ // a positional param with quotes
+ if l.paramElements == 2 {+ return l.errorf("got quoted positional parameter. Cannot mix named and positional parameters")+ }
+ l.paramElements = 1
+ l.backup()
+ return lexShortcodeQuotedParamVal(l, !escapedQuoteStart, tScParam)
+ }
+ first = false
+ } else if r == '=' {+ // a named param
+ l.backup()
+ nextEq = true
+ break
+ }
+
+ if !isAlphaNumericOrHyphen(r) {+ l.backup()
+ break
+ }
+ }
+
+ if l.paramElements == 0 {+ l.paramElements++
+
+ if nextEq {+ l.paramElements++
+ }
+ } else {+ if nextEq && l.paramElements == 1 {+ return l.errorf("got named parameter '%s'. Cannot mix named and positional parameters", l.current())+ } else if !nextEq && l.paramElements == 2 {+ return l.errorf("got positional parameter '%s'. Cannot mix named and positional parameters", l.current())+ }
+ }
+
+ l.emit(tScParam)
+ return lexInsideShortcode
+
+}
+
+func lexShortcodeQuotedParamVal(l *pageLexer, escapedQuotedValuesAllowed bool, typ ItemType) stateFunc {+ openQuoteFound := false
+ escapedInnerQuoteFound := false
+ escapedQuoteState := 0
+
+Loop:
+ for {+ switch r := l.next(); {+ case r == '\\':
+ if l.peek() == '"' {+ if openQuoteFound && !escapedQuotedValuesAllowed {+ l.backup()
+ break Loop
+ } else if openQuoteFound {+ // the coming quoute is inside
+ escapedInnerQuoteFound = true
+ escapedQuoteState = 1
+ }
+ }
+ case r == eof, r == '\n':
+ return l.errorf("unterminated quoted string in shortcode parameter-argument: '%s'", l.current())+ case r == '"':
+ if escapedQuoteState == 0 {+ if openQuoteFound {+ l.backup()
+ break Loop
+
+ } else {+ openQuoteFound = true
+ l.ignore()
+ }
+ } else {+ escapedQuoteState = 0
+ }
+
+ }
+ }
+
+ if escapedInnerQuoteFound {+ l.ignoreEscapesAndEmit(typ)
+ } else {+ l.emit(typ)
+ }
+
+ r := l.next()
+
+ if r == '\\' {+ if l.peek() == '"' {+ // ignore the escaped closing quote
+ l.ignore()
+ l.next()
+ l.ignore()
+ }
+ } else if r == '"' {+ // ignore closing quote
+ l.ignore()
+ } else {+ // handled by next state
+ l.backup()
+ }
+
+ return lexInsideShortcode
+}
+
+// Inline shortcodes has the form {{< myshortcode.inline >}}+var inlineIdentifier = []byte("inline ")+
+// scans an alphanumeric inside shortcode
+func lexIdentifierInShortcode(l *pageLexer) stateFunc {+ lookForEnd := false
+Loop:
+ for {+ switch r := l.next(); {+ case isAlphaNumericOrHyphen(r):
+ // Allow forward slash inside names to make it possible to create namespaces.
+ case r == '/':
+ case r == '.':
+ l.isInline = l.hasPrefix(inlineIdentifier)
+ if !l.isInline {+ return l.errorf("period in shortcode name only allowed for inline identifiers")+ }
+ default:
+ l.backup()
+ word := string(l.input[l.start:l.pos])
+ if l.closingState > 0 && !l.openShortcodes[word] {+ return l.errorf("closing tag for shortcode '%s' does not match start tag", word)+ } else if l.closingState > 0 {+ l.openShortcodes[word] = false
+ lookForEnd = true
+ }
+
+ l.closingState = 0
+ l.currShortcodeName = word
+ l.openShortcodes[word] = true
+ l.elementStepNum++
+ if l.isInline {+ l.emit(tScNameInline)
+ } else {+ l.emit(tScName)
+ }
+ break Loop
+ }
+ }
+
+ if lookForEnd {+ return lexEndOfShortcode
+ }
+ return lexInsideShortcode
+}
+
+func lexEndOfShortcode(l *pageLexer) stateFunc {+ l.isInline = false
+ if l.hasPrefix(l.currentRightShortcodeDelim()) {+ return lexShortcodeRightDelim
+ }
+ switch r := l.next(); {+ case isSpace(r):
+ l.ignore()
+ default:
+ return l.errorf("unclosed shortcode")+ }
+ return lexEndOfShortcode
+}
+
+// scans the elements inside shortcode tags
+func lexInsideShortcode(l *pageLexer) stateFunc {+ if l.hasPrefix(l.currentRightShortcodeDelim()) {+ return lexShortcodeRightDelim
+ }
+ switch r := l.next(); {+ case r == eof:
+ // eol is allowed inside shortcodes; this may go to end of document before it fails
+ return l.errorf("unclosed shortcode action")+ case isSpace(r), isEndOfLine(r):
+ l.ignore()
+ case r == '=':
+ l.ignore()
+ return lexShortcodeQuotedParamVal(l, l.peek() != '\\', tScParamVal)
+ case r == '/':
+ if l.currShortcodeName == "" {+ return l.errorf("got closing shortcode, but none is open")+ }
+ l.closingState++
+ l.emit(tScClose)
+ case r == '\\':
+ l.ignore()
+ if l.peek() == '"' {+ return lexShortcodeParam(l, true)
+ }
+ case l.elementStepNum > 0 && (isAlphaNumericOrHyphen(r) || r == '"'): // positional params can have quotes
+ l.backup()
+ return lexShortcodeParam(l, false)
+ case isAlphaNumeric(r):
+ l.backup()
+ return lexIdentifierInShortcode
+ default:
+ return l.errorf("unrecognized character in shortcode action: %#U. Note: Parameters with non-alphanumeric args must be quoted", r)+ }
+ return lexInsideShortcode
+}
+
+func (l *pageLexer) currentLeftShortcodeDelimItem() ItemType {+ return l.currLeftDelimItem
+}
+
+func (l *pageLexer) currentRightShortcodeDelimItem() ItemType {+ return l.currRightDelimItem
+}
+
+func (l *pageLexer) currentLeftShortcodeDelim() []byte {+ if l.currLeftDelimItem == tLeftDelimScWithMarkup {+ return leftDelimScWithMarkup
+ }
+ return leftDelimScNoMarkup
+
+}
+
+func (l *pageLexer) currentRightShortcodeDelim() []byte {+ if l.currRightDelimItem == tRightDelimScWithMarkup {+ return rightDelimScWithMarkup
+ }
+ return rightDelimScNoMarkup
+}
--
⑨