From 14bb08c1df8db9ec6c8a05520d4eee67971235d9 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Thu, 27 Sep 2018 20:03:23 +0200 Subject: mod tidy --- vendor/golang.org/x/net/html/parse.go | 2310 --------------------------------- 1 file changed, 2310 deletions(-) delete mode 100644 vendor/golang.org/x/net/html/parse.go (limited to 'vendor/golang.org/x/net/html/parse.go') diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go deleted file mode 100644 index 7e539b1..0000000 --- a/vendor/golang.org/x/net/html/parse.go +++ /dev/null @@ -1,2310 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package html - -import ( - "errors" - "fmt" - "io" - "strings" - - a "golang.org/x/net/html/atom" -) - -// A parser implements the HTML5 parsing algorithm: -// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction -type parser struct { - // tokenizer provides the tokens for the parser. - tokenizer *Tokenizer - // tok is the most recently read token. - tok Token - // Self-closing tags like
are treated as start tags, except that - // hasSelfClosingToken is set while they are being processed. - hasSelfClosingToken bool - // doc is the document root element. - doc *Node - // The stack of open elements (section 12.2.4.2) and active formatting - // elements (section 12.2.4.3). - oe, afe nodeStack - // Element pointers (section 12.2.4.4). - head, form *Node - // Other parsing state flags (section 12.2.4.5). - scripting, framesetOK bool - // The stack of template insertion modes - templateStack insertionModeStack - // im is the current insertion mode. - im insertionMode - // originalIM is the insertion mode to go back to after completing a text - // or inTableText insertion mode. - originalIM insertionMode - // fosterParenting is whether new elements should be inserted according to - // the foster parenting rules (section 12.2.6.1). - fosterParenting bool - // quirks is whether the parser is operating in "quirks mode." - quirks bool - // fragment is whether the parser is parsing an HTML fragment. - fragment bool - // context is the context element when parsing an HTML fragment - // (section 12.4). - context *Node -} - -func (p *parser) top() *Node { - if n := p.oe.top(); n != nil { - return n - } - return p.doc -} - -// Stop tags for use in popUntil. These come from section 12.2.4.2. -var ( - defaultScopeStopTags = map[string][]a.Atom{ - "": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template}, - "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext}, - "svg": {a.Desc, a.ForeignObject, a.Title}, - } -) - -type scope int - -const ( - defaultScope scope = iota - listItemScope - buttonScope - tableScope - tableRowScope - tableBodyScope - selectScope -) - -// popUntil pops the stack of open elements at the highest element whose tag -// is in matchTags, provided there is no higher element in the scope's stop -// tags (as defined in section 12.2.4.2). It returns whether or not there was -// such an element. If there was not, popUntil leaves the stack unchanged. -// -// For example, the set of stop tags for table scope is: "html", "table". If -// the stack was: -// ["html", "body", "font", "table", "b", "i", "u"] -// then popUntil(tableScope, "font") would return false, but -// popUntil(tableScope, "i") would return true and the stack would become: -// ["html", "body", "font", "table", "b"] -// -// If an element's tag is in both the stop tags and matchTags, then the stack -// will be popped and the function returns true (provided, of course, there was -// no higher element in the stack that was also in the stop tags). For example, -// popUntil(tableScope, "table") returns true and leaves: -// ["html", "body", "font"] -func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool { - if i := p.indexOfElementInScope(s, matchTags...); i != -1 { - p.oe = p.oe[:i] - return true - } - return false -} - -// indexOfElementInScope returns the index in p.oe of the highest element whose -// tag is in matchTags that is in scope. If no matching element is in scope, it -// returns -1. -func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int { - for i := len(p.oe) - 1; i >= 0; i-- { - tagAtom := p.oe[i].DataAtom - if p.oe[i].Namespace == "" { - for _, t := range matchTags { - if t == tagAtom { - return i - } - } - switch s { - case defaultScope: - // No-op. - case listItemScope: - if tagAtom == a.Ol || tagAtom == a.Ul { - return -1 - } - case buttonScope: - if tagAtom == a.Button { - return -1 - } - case tableScope: - if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template { - return -1 - } - case selectScope: - if tagAtom != a.Optgroup && tagAtom != a.Option { - return -1 - } - default: - panic("unreachable") - } - } - switch s { - case defaultScope, listItemScope, buttonScope: - for _, t := range defaultScopeStopTags[p.oe[i].Namespace] { - if t == tagAtom { - return -1 - } - } - } - } - return -1 -} - -// elementInScope is like popUntil, except that it doesn't modify the stack of -// open elements. -func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool { - return p.indexOfElementInScope(s, matchTags...) != -1 -} - -// clearStackToContext pops elements off the stack of open elements until a -// scope-defined element is found. -func (p *parser) clearStackToContext(s scope) { - for i := len(p.oe) - 1; i >= 0; i-- { - tagAtom := p.oe[i].DataAtom - switch s { - case tableScope: - if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template { - p.oe = p.oe[:i+1] - return - } - case tableRowScope: - if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template { - p.oe = p.oe[:i+1] - return - } - case tableBodyScope: - if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template { - p.oe = p.oe[:i+1] - return - } - default: - panic("unreachable") - } - } -} - -// generateImpliedEndTags pops nodes off the stack of open elements as long as -// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc. -// If exceptions are specified, nodes with that name will not be popped off. -func (p *parser) generateImpliedEndTags(exceptions ...string) { - var i int -loop: - for i = len(p.oe) - 1; i >= 0; i-- { - n := p.oe[i] - if n.Type == ElementNode { - switch n.DataAtom { - case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: - for _, except := range exceptions { - if n.Data == except { - break loop - } - } - continue - } - } - break - } - - p.oe = p.oe[:i+1] -} - -// generateAllImpliedEndTags pops nodes off the stack of open elements as long as -// the top node has a tag name of caption, colgroup, dd, div, dt, li, optgroup, option, p, rb, -// rp, rt, rtc, span, tbody, td, tfoot, th, thead or tr. -func (p *parser) generateAllImpliedEndTags() { - var i int - for i = len(p.oe) - 1; i >= 0; i-- { - n := p.oe[i] - if n.Type == ElementNode { - switch n.DataAtom { - // TODO: remove this divergence from the HTML5 spec - case a.Caption, a.Colgroup, a.Dd, a.Div, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, - a.Rp, a.Rt, a.Rtc, a.Span, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: - continue - } - } - break - } - - p.oe = p.oe[:i+1] -} - -// addChild adds a child node n to the top element, and pushes n onto the stack -// of open elements if it is an element node. -func (p *parser) addChild(n *Node) { - if p.shouldFosterParent() { - p.fosterParent(n) - } else { - p.top().AppendChild(n) - } - - if n.Type == ElementNode { - p.oe = append(p.oe, n) - } -} - -// shouldFosterParent returns whether the next node to be added should be -// foster parented. -func (p *parser) shouldFosterParent() bool { - if p.fosterParenting { - switch p.top().DataAtom { - case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: - return true - } - } - return false -} - -// fosterParent adds a child node according to the foster parenting rules. -// Section 12.2.6.1, "foster parenting". -func (p *parser) fosterParent(n *Node) { - var table, parent, prev, template *Node - var i int - for i = len(p.oe) - 1; i >= 0; i-- { - if p.oe[i].DataAtom == a.Table { - table = p.oe[i] - break - } - } - - var j int - for j = len(p.oe) - 1; j >= 0; j-- { - if p.oe[j].DataAtom == a.Template { - template = p.oe[j] - break - } - } - - if template != nil && (table == nil || j < i) { - template.AppendChild(n) - return - } - - if table == nil { - // The foster parent is the html element. - parent = p.oe[0] - } else { - parent = table.Parent - } - if parent == nil { - parent = p.oe[i-1] - } - - if table != nil { - prev = table.PrevSibling - } else { - prev = parent.LastChild - } - if prev != nil && prev.Type == TextNode && n.Type == TextNode { - prev.Data += n.Data - return - } - - parent.InsertBefore(n, table) -} - -// addText adds text to the preceding node if it is a text node, or else it -// calls addChild with a new text node. -func (p *parser) addText(text string) { - if text == "" { - return - } - - if p.shouldFosterParent() { - p.fosterParent(&Node{ - Type: TextNode, - Data: text, - }) - return - } - - t := p.top() - if n := t.LastChild; n != nil && n.Type == TextNode { - n.Data += text - return - } - p.addChild(&Node{ - Type: TextNode, - Data: text, - }) -} - -// addElement adds a child element based on the current token. -func (p *parser) addElement() { - p.addChild(&Node{ - Type: ElementNode, - DataAtom: p.tok.DataAtom, - Data: p.tok.Data, - Attr: p.tok.Attr, - }) -} - -// Section 12.2.4.3. -func (p *parser) addFormattingElement() { - tagAtom, attr := p.tok.DataAtom, p.tok.Attr - p.addElement() - - // Implement the Noah's Ark clause, but with three per family instead of two. - identicalElements := 0 -findIdenticalElements: - for i := len(p.afe) - 1; i >= 0; i-- { - n := p.afe[i] - if n.Type == scopeMarkerNode { - break - } - if n.Type != ElementNode { - continue - } - if n.Namespace != "" { - continue - } - if n.DataAtom != tagAtom { - continue - } - if len(n.Attr) != len(attr) { - continue - } - compareAttributes: - for _, t0 := range n.Attr { - for _, t1 := range attr { - if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val { - // Found a match for this attribute, continue with the next attribute. - continue compareAttributes - } - } - // If we get here, there is no attribute that matches a. - // Therefore the element is not identical to the new one. - continue findIdenticalElements - } - - identicalElements++ - if identicalElements >= 3 { - p.afe.remove(n) - } - } - - p.afe = append(p.afe, p.top()) -} - -// Section 12.2.4.3. -func (p *parser) clearActiveFormattingElements() { - for { - n := p.afe.pop() - if len(p.afe) == 0 || n.Type == scopeMarkerNode { - return - } - } -} - -// Section 12.2.4.3. -func (p *parser) reconstructActiveFormattingElements() { - n := p.afe.top() - if n == nil { - return - } - if n.Type == scopeMarkerNode || p.oe.index(n) != -1 { - return - } - i := len(p.afe) - 1 - for n.Type != scopeMarkerNode && p.oe.index(n) == -1 { - if i == 0 { - i = -1 - break - } - i-- - n = p.afe[i] - } - for { - i++ - clone := p.afe[i].clone() - p.addChild(clone) - p.afe[i] = clone - if i == len(p.afe)-1 { - break - } - } -} - -// Section 12.2.5. -func (p *parser) acknowledgeSelfClosingTag() { - p.hasSelfClosingToken = false -} - -// An insertion mode (section 12.2.4.1) is the state transition function from -// a particular state in the HTML5 parser's state machine. It updates the -// parser's fields depending on parser.tok (where ErrorToken means EOF). -// It returns whether the token was consumed. -type insertionMode func(*parser) bool - -// setOriginalIM sets the insertion mode to return to after completing a text or -// inTableText insertion mode. -// Section 12.2.4.1, "using the rules for". -func (p *parser) setOriginalIM() { - if p.originalIM != nil { - panic("html: bad parser state: originalIM was set twice") - } - p.originalIM = p.im -} - -// Section 12.2.4.1, "reset the insertion mode". -func (p *parser) resetInsertionMode() { - for i := len(p.oe) - 1; i >= 0; i-- { - n := p.oe[i] - last := i == 0 - if last && p.context != nil { - n = p.context - } - - switch n.DataAtom { - case a.Select: - if !last { - for ancestor, first := n, p.oe[0]; ancestor != first; { - if ancestor == first { - break - } - ancestor = p.oe[p.oe.index(ancestor)-1] - switch ancestor.DataAtom { - case a.Template: - p.im = inSelectIM - return - case a.Table: - p.im = inSelectInTableIM - return - } - } - } - p.im = inSelectIM - case a.Td, a.Th: - // TODO: remove this divergence from the HTML5 spec. - // - // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668 - p.im = inCellIM - case a.Tr: - p.im = inRowIM - case a.Tbody, a.Thead, a.Tfoot: - p.im = inTableBodyIM - case a.Caption: - p.im = inCaptionIM - case a.Colgroup: - p.im = inColumnGroupIM - case a.Table: - p.im = inTableIM - case a.Template: - p.im = p.templateStack.top() - case a.Head: - // TODO: remove this divergence from the HTML5 spec. - // - // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668 - p.im = inHeadIM - case a.Body: - p.im = inBodyIM - case a.Frameset: - p.im = inFramesetIM - case a.Html: - if p.head == nil { - p.im = beforeHeadIM - } else { - p.im = afterHeadIM - } - default: - if last { - p.im = inBodyIM - return - } - continue - } - return - } -} - -const whitespace = " \t\r\n\f" - -// Section 12.2.6.4.1. -func initialIM(p *parser) bool { - switch p.tok.Type { - case TextToken: - p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) - if len(p.tok.Data) == 0 { - // It was all whitespace, so ignore it. - return true - } - case CommentToken: - p.doc.AppendChild(&Node{ - Type: CommentNode, - Data: p.tok.Data, - }) - return true - case DoctypeToken: - n, quirks := parseDoctype(p.tok.Data) - p.doc.AppendChild(n) - p.quirks = quirks - p.im = beforeHTMLIM - return true - } - p.quirks = true - p.im = beforeHTMLIM - return false -} - -// Section 12.2.6.4.2. -func beforeHTMLIM(p *parser) bool { - switch p.tok.Type { - case DoctypeToken: - // Ignore the token. - return true - case TextToken: - p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) - if len(p.tok.Data) == 0 { - // It was all whitespace, so ignore it. - return true - } - case StartTagToken: - if p.tok.DataAtom == a.Html { - p.addElement() - p.im = beforeHeadIM - return true - } - case EndTagToken: - switch p.tok.DataAtom { - case a.Head, a.Body, a.Html, a.Br: - p.parseImpliedToken(StartTagToken, a.Html, a.Html.String()) - return false - default: - // Ignore the token. - return true - } - case CommentToken: - p.doc.AppendChild(&Node{ - Type: CommentNode, - Data: p.tok.Data, - }) - return true - } - p.parseImpliedToken(StartTagToken, a.Html, a.Html.String()) - return false -} - -// Section 12.2.6.4.3. -func beforeHeadIM(p *parser) bool { - switch p.tok.Type { - case TextToken: - p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) - if len(p.tok.Data) == 0 { - // It was all whitespace, so ignore it. - return true - } - case StartTagToken: - switch p.tok.DataAtom { - case a.Head: - p.addElement() - p.head = p.top() - p.im = inHeadIM - return true - case a.Html: - return inBodyIM(p) - } - case EndTagToken: - switch p.tok.DataAtom { - case a.Head, a.Body, a.Html, a.Br: - p.parseImpliedToken(StartTagToken, a.Head, a.Head.String()) - return false - default: - // Ignore the token. - return true - } - case CommentToken: - p.addChild(&Node{ - Type: CommentNode, - Data: p.tok.Data, - }) - return true - case DoctypeToken: - // Ignore the token. - return true - } - - p.parseImpliedToken(StartTagToken, a.Head, a.Head.String()) - return false -} - -// Section 12.2.6.4.4. -func inHeadIM(p *parser) bool { - switch p.tok.Type { - case TextToken: - s := strings.TrimLeft(p.tok.Data, whitespace) - if len(s) < len(p.tok.Data) { - // Add the initial whitespace to the current node. - p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) - if s == "" { - return true - } - p.tok.Data = s - } - case StartTagToken: - switch p.tok.DataAtom { - case a.Html: - return inBodyIM(p) - case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta: - p.addElement() - p.oe.pop() - p.acknowledgeSelfClosingTag() - return true - case a.Script, a.Title, a.Noscript, a.Noframes, a.Style: - p.addElement() - p.setOriginalIM() - p.im = textIM - return true - case a.Head: - // Ignore the token. - return true - case a.Template: - p.addElement() - p.afe = append(p.afe, &scopeMarker) - p.framesetOK = false - p.im = inTemplateIM - p.templateStack = append(p.templateStack, inTemplateIM) - return true - } - case EndTagToken: - switch p.tok.DataAtom { - case a.Head: - p.oe.pop() - p.im = afterHeadIM - return true - case a.Body, a.Html, a.Br: - p.parseImpliedToken(EndTagToken, a.Head, a.Head.String()) - return false - case a.Template: - if !p.oe.contains(a.Template) { - return true - } - p.generateAllImpliedEndTags() - if n := p.oe.top(); n.DataAtom != a.Template { - return true - } - p.popUntil(defaultScope, a.Template) - p.clearActiveFormattingElements() - p.templateStack.pop() - p.resetInsertionMode() - return true - default: - // Ignore the token. - return true - } - case CommentToken: - p.addChild(&Node{ - Type: CommentNode, - Data: p.tok.Data, - }) - return true - case DoctypeToken: - // Ignore the token. - return true - } - - p.parseImpliedToken(EndTagToken, a.Head, a.Head.String()) - return false -} - -// Section 12.2.6.4.6. -func afterHeadIM(p *parser) bool { - switch p.tok.Type { - case TextToken: - s := strings.TrimLeft(p.tok.Data, whitespace) - if len(s) < len(p.tok.Data) { - // Add the initial whitespace to the current node. - p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) - if s == "" { - return true - } - p.tok.Data = s - } - case StartTagToken: - switch p.tok.DataAtom { - case a.Html: - return inBodyIM(p) - case a.Body: - p.addElement() - p.framesetOK = false - p.im = inBodyIM - return true - case a.Frameset: - p.addElement() - p.im = inFramesetIM - return true - case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title: - p.oe = append(p.oe, p.head) - defer p.oe.remove(p.head) - return inHeadIM(p) - case a.Head: - // Ignore the token. - return true - } - case EndTagToken: - switch p.tok.DataAtom { - case a.Body, a.Html, a.Br: - // Drop down to creating an implied tag. - case a.Template: - return inHeadIM(p) - default: - // Ignore the token. - return true - } - case CommentToken: - p.addChild(&Node{ - Type: CommentNode, - Data: p.tok.Data, - }) - return true - case DoctypeToken: - // Ignore the token. - return true - } - - p.parseImpliedToken(StartTagToken, a.Body, a.Body.String()) - p.framesetOK = true - return false -} - -// copyAttributes copies attributes of src not found on dst to dst. -func copyAttributes(dst *Node, src Token) { - if len(src.Attr) == 0 { - return - } - attr := map[string]string{} - for _, t := range dst.Attr { - attr[t.Key] = t.Val - } - for _, t := range src.Attr { - if _, ok := attr[t.Key]; !ok { - dst.Attr = append(dst.Attr, t) - attr[t.Key] = t.Val - } - } -} - -// Section 12.2.6.4.7. -func inBodyIM(p *parser) bool { - switch p.tok.Type { - case TextToken: - d := p.tok.Data - switch n := p.oe.top(); n.DataAtom { - case a.Pre, a.Listing: - if n.FirstChild == nil { - // Ignore a newline at the start of a
 block.
-				if d != "" && d[0] == '\r' {
-					d = d[1:]
-				}
-				if d != "" && d[0] == '\n' {
-					d = d[1:]
-				}
-			}
-		}
-		d = strings.Replace(d, "\x00", "", -1)
-		if d == "" {
-			return true
-		}
-		p.reconstructActiveFormattingElements()
-		p.addText(d)
-		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
-			// There were non-whitespace characters inserted.
-			p.framesetOK = false
-		}
-	case StartTagToken:
-		switch p.tok.DataAtom {
-		case a.Html:
-			if p.oe.contains(a.Template) {
-				return true
-			}
-			copyAttributes(p.oe[0], p.tok)
-		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
-			return inHeadIM(p)
-		case a.Body:
-			if p.oe.contains(a.Template) {
-				return true
-			}
-			if len(p.oe) >= 2 {
-				body := p.oe[1]
-				if body.Type == ElementNode && body.DataAtom == a.Body {
-					p.framesetOK = false
-					copyAttributes(body, p.tok)
-				}
-			}
-		case a.Frameset:
-			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
-				// Ignore the token.
-				return true
-			}
-			body := p.oe[1]
-			if body.Parent != nil {
-				body.Parent.RemoveChild(body)
-			}
-			p.oe = p.oe[:1]
-			p.addElement()
-			p.im = inFramesetIM
-			return true
-		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
-			p.popUntil(buttonScope, a.P)
-			p.addElement()
-		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
-			p.popUntil(buttonScope, a.P)
-			switch n := p.top(); n.DataAtom {
-			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
-				p.oe.pop()
-			}
-			p.addElement()
-		case a.Pre, a.Listing:
-			p.popUntil(buttonScope, a.P)
-			p.addElement()
-			// The newline, if any, will be dealt with by the TextToken case.
-			p.framesetOK = false
-		case a.Form:
-			if p.form != nil && !p.oe.contains(a.Template) {
-				// Ignore the token
-				return true
-			}
-			p.popUntil(buttonScope, a.P)
-			p.addElement()
-			if !p.oe.contains(a.Template) {
-				p.form = p.top()
-			}
-		case a.Li:
-			p.framesetOK = false
-			for i := len(p.oe) - 1; i >= 0; i-- {
-				node := p.oe[i]
-				switch node.DataAtom {
-				case a.Li:
-					p.oe = p.oe[:i]
-				case a.Address, a.Div, a.P:
-					continue
-				default:
-					if !isSpecialElement(node) {
-						continue
-					}
-				}
-				break
-			}
-			p.popUntil(buttonScope, a.P)
-			p.addElement()
-		case a.Dd, a.Dt:
-			p.framesetOK = false
-			for i := len(p.oe) - 1; i >= 0; i-- {
-				node := p.oe[i]
-				switch node.DataAtom {
-				case a.Dd, a.Dt:
-					p.oe = p.oe[:i]
-				case a.Address, a.Div, a.P:
-					continue
-				default:
-					if !isSpecialElement(node) {
-						continue
-					}
-				}
-				break
-			}
-			p.popUntil(buttonScope, a.P)
-			p.addElement()
-		case a.Plaintext:
-			p.popUntil(buttonScope, a.P)
-			p.addElement()
-		case a.Button:
-			p.popUntil(defaultScope, a.Button)
-			p.reconstructActiveFormattingElements()
-			p.addElement()
-			p.framesetOK = false
-		case a.A:
-			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
-				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
-					p.inBodyEndTagFormatting(a.A)
-					p.oe.remove(n)
-					p.afe.remove(n)
-					break
-				}
-			}
-			p.reconstructActiveFormattingElements()
-			p.addFormattingElement()
-		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
-			p.reconstructActiveFormattingElements()
-			p.addFormattingElement()
-		case a.Nobr:
-			p.reconstructActiveFormattingElements()
-			if p.elementInScope(defaultScope, a.Nobr) {
-				p.inBodyEndTagFormatting(a.Nobr)
-				p.reconstructActiveFormattingElements()
-			}
-			p.addFormattingElement()
-		case a.Applet, a.Marquee, a.Object:
-			p.reconstructActiveFormattingElements()
-			p.addElement()
-			p.afe = append(p.afe, &scopeMarker)
-			p.framesetOK = false
-		case a.Table:
-			if !p.quirks {
-				p.popUntil(buttonScope, a.P)
-			}
-			p.addElement()
-			p.framesetOK = false
-			p.im = inTableIM
-			return true
-		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
-			p.reconstructActiveFormattingElements()
-			p.addElement()
-			p.oe.pop()
-			p.acknowledgeSelfClosingTag()
-			if p.tok.DataAtom == a.Input {
-				for _, t := range p.tok.Attr {
-					if t.Key == "type" {
-						if strings.ToLower(t.Val) == "hidden" {
-							// Skip setting framesetOK = false
-							return true
-						}
-					}
-				}
-			}
-			p.framesetOK = false
-		case a.Param, a.Source, a.Track:
-			p.addElement()
-			p.oe.pop()
-			p.acknowledgeSelfClosingTag()
-		case a.Hr:
-			p.popUntil(buttonScope, a.P)
-			p.addElement()
-			p.oe.pop()
-			p.acknowledgeSelfClosingTag()
-			p.framesetOK = false
-		case a.Image:
-			p.tok.DataAtom = a.Img
-			p.tok.Data = a.Img.String()
-			return false
-		case a.Isindex:
-			if p.form != nil {
-				// Ignore the token.
-				return true
-			}
-			action := ""
-			prompt := "This is a searchable index. Enter search keywords: "
-			attr := []Attribute{{Key: "name", Val: "isindex"}}
-			for _, t := range p.tok.Attr {
-				switch t.Key {
-				case "action":
-					action = t.Val
-				case "name":
-					// Ignore the attribute.
-				case "prompt":
-					prompt = t.Val
-				default:
-					attr = append(attr, t)
-				}
-			}
-			p.acknowledgeSelfClosingTag()
-			p.popUntil(buttonScope, a.P)
-			p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
-			if action != "" {
-				p.form.Attr = []Attribute{{Key: "action", Val: action}}
-			}
-			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
-			p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
-			p.addText(prompt)
-			p.addChild(&Node{
-				Type:     ElementNode,
-				DataAtom: a.Input,
-				Data:     a.Input.String(),
-				Attr:     attr,
-			})
-			p.oe.pop()
-			p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
-			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
-			p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
-		case a.Textarea:
-			p.addElement()
-			p.setOriginalIM()
-			p.framesetOK = false
-			p.im = textIM
-		case a.Xmp:
-			p.popUntil(buttonScope, a.P)
-			p.reconstructActiveFormattingElements()
-			p.framesetOK = false
-			p.addElement()
-			p.setOriginalIM()
-			p.im = textIM
-		case a.Iframe:
-			p.framesetOK = false
-			p.addElement()
-			p.setOriginalIM()
-			p.im = textIM
-		case a.Noembed, a.Noscript:
-			p.addElement()
-			p.setOriginalIM()
-			p.im = textIM
-		case a.Select:
-			p.reconstructActiveFormattingElements()
-			p.addElement()
-			p.framesetOK = false
-			p.im = inSelectIM
-			return true
-		case a.Optgroup, a.Option:
-			if p.top().DataAtom == a.Option {
-				p.oe.pop()
-			}
-			p.reconstructActiveFormattingElements()
-			p.addElement()
-		case a.Rb, a.Rtc:
-			if p.elementInScope(defaultScope, a.Ruby) {
-				p.generateImpliedEndTags()
-			}
-			p.addElement()
-		case a.Rp, a.Rt:
-			if p.elementInScope(defaultScope, a.Ruby) {
-				p.generateImpliedEndTags("rtc")
-			}
-			p.addElement()
-		case a.Math, a.Svg:
-			p.reconstructActiveFormattingElements()
-			if p.tok.DataAtom == a.Math {
-				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
-			} else {
-				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
-			}
-			adjustForeignAttributes(p.tok.Attr)
-			p.addElement()
-			p.top().Namespace = p.tok.Data
-			if p.hasSelfClosingToken {
-				p.oe.pop()
-				p.acknowledgeSelfClosingTag()
-			}
-			return true
-		case a.Frame:
-			// TODO: remove this divergence from the HTML5 spec.
-			if p.oe.contains(a.Template) {
-				p.addElement()
-				return true
-			}
-		case a.Caption, a.Col, a.Colgroup, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
-			// Ignore the token.
-		default:
-			p.reconstructActiveFormattingElements()
-			p.addElement()
-		}
-	case EndTagToken:
-		switch p.tok.DataAtom {
-		case a.Body:
-			if p.elementInScope(defaultScope, a.Body) {
-				p.im = afterBodyIM
-			}
-		case a.Html:
-			if p.elementInScope(defaultScope, a.Body) {
-				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
-				return false
-			}
-			return true
-		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
-			p.popUntil(defaultScope, p.tok.DataAtom)
-		case a.Form:
-			if p.oe.contains(a.Template) {
-				i := p.indexOfElementInScope(defaultScope, a.Form)
-				if i == -1 {
-					// Ignore the token.
-					return true
-				}
-				p.generateImpliedEndTags()
-				if p.oe[i].DataAtom != a.Form {
-					// Ignore the token.
-					return true
-				}
-				p.popUntil(defaultScope, a.Form)
-			} else {
-				node := p.form
-				p.form = nil
-				i := p.indexOfElementInScope(defaultScope, a.Form)
-				if node == nil || i == -1 || p.oe[i] != node {
-					// Ignore the token.
-					return true
-				}
-				p.generateImpliedEndTags()
-				p.oe.remove(node)
-			}
-		case a.P:
-			if !p.elementInScope(buttonScope, a.P) {
-				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
-			}
-			p.popUntil(buttonScope, a.P)
-		case a.Li:
-			p.popUntil(listItemScope, a.Li)
-		case a.Dd, a.Dt:
-			p.popUntil(defaultScope, p.tok.DataAtom)
-		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
-			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
-		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
-			p.inBodyEndTagFormatting(p.tok.DataAtom)
-		case a.Applet, a.Marquee, a.Object:
-			if p.popUntil(defaultScope, p.tok.DataAtom) {
-				p.clearActiveFormattingElements()
-			}
-		case a.Br:
-			p.tok.Type = StartTagToken
-			return false
-		case a.Template:
-			return inHeadIM(p)
-		default:
-			p.inBodyEndTagOther(p.tok.DataAtom)
-		}
-	case CommentToken:
-		p.addChild(&Node{
-			Type: CommentNode,
-			Data: p.tok.Data,
-		})
-	case ErrorToken:
-		// TODO: remove this divergence from the HTML5 spec.
-		if len(p.templateStack) > 0 {
-			p.im = inTemplateIM
-			return false
-		} else {
-			for _, e := range p.oe {
-				switch e.DataAtom {
-				case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
-					a.Thead, a.Tr, a.Body, a.Html:
-				default:
-					return true
-				}
-			}
-		}
-	}
-
-	return true
-}
-
-func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
-	// This is the "adoption agency" algorithm, described at
-	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
-
-	// TODO: this is a fairly literal line-by-line translation of that algorithm.
-	// Once the code successfully parses the comprehensive test suite, we should
-	// refactor this code to be more idiomatic.
-
-	// Steps 1-4. The outer loop.
-	for i := 0; i < 8; i++ {
-		// Step 5. Find the formatting element.
-		var formattingElement *Node
-		for j := len(p.afe) - 1; j >= 0; j-- {
-			if p.afe[j].Type == scopeMarkerNode {
-				break
-			}
-			if p.afe[j].DataAtom == tagAtom {
-				formattingElement = p.afe[j]
-				break
-			}
-		}
-		if formattingElement == nil {
-			p.inBodyEndTagOther(tagAtom)
-			return
-		}
-		feIndex := p.oe.index(formattingElement)
-		if feIndex == -1 {
-			p.afe.remove(formattingElement)
-			return
-		}
-		if !p.elementInScope(defaultScope, tagAtom) {
-			// Ignore the tag.
-			return
-		}
-
-		// Steps 9-10. Find the furthest block.
-		var furthestBlock *Node
-		for _, e := range p.oe[feIndex:] {
-			if isSpecialElement(e) {
-				furthestBlock = e
-				break
-			}
-		}
-		if furthestBlock == nil {
-			e := p.oe.pop()
-			for e != formattingElement {
-				e = p.oe.pop()
-			}
-			p.afe.remove(e)
-			return
-		}
-
-		// Steps 11-12. Find the common ancestor and bookmark node.
-		commonAncestor := p.oe[feIndex-1]
-		bookmark := p.afe.index(formattingElement)
-
-		// Step 13. The inner loop. Find the lastNode to reparent.
-		lastNode := furthestBlock
-		node := furthestBlock
-		x := p.oe.index(node)
-		// Steps 13.1-13.2
-		for j := 0; j < 3; j++ {
-			// Step 13.3.
-			x--
-			node = p.oe[x]
-			// Step 13.4 - 13.5.
-			if p.afe.index(node) == -1 {
-				p.oe.remove(node)
-				continue
-			}
-			// Step 13.6.
-			if node == formattingElement {
-				break
-			}
-			// Step 13.7.
-			clone := node.clone()
-			p.afe[p.afe.index(node)] = clone
-			p.oe[p.oe.index(node)] = clone
-			node = clone
-			// Step 13.8.
-			if lastNode == furthestBlock {
-				bookmark = p.afe.index(node) + 1
-			}
-			// Step 13.9.
-			if lastNode.Parent != nil {
-				lastNode.Parent.RemoveChild(lastNode)
-			}
-			node.AppendChild(lastNode)
-			// Step 13.10.
-			lastNode = node
-		}
-
-		// Step 14. Reparent lastNode to the common ancestor,
-		// or for misnested table nodes, to the foster parent.
-		if lastNode.Parent != nil {
-			lastNode.Parent.RemoveChild(lastNode)
-		}
-		switch commonAncestor.DataAtom {
-		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
-			p.fosterParent(lastNode)
-		case a.Template:
-			// TODO: remove namespace checking
-			if commonAncestor.Namespace == "html" {
-				commonAncestor = commonAncestor.LastChild
-			}
-			fallthrough
-		default:
-			commonAncestor.AppendChild(lastNode)
-		}
-
-		// Steps 15-17. Reparent nodes from the furthest block's children
-		// to a clone of the formatting element.
-		clone := formattingElement.clone()
-		reparentChildren(clone, furthestBlock)
-		furthestBlock.AppendChild(clone)
-
-		// Step 18. Fix up the list of active formatting elements.
-		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
-			// Move the bookmark with the rest of the list.
-			bookmark--
-		}
-		p.afe.remove(formattingElement)
-		p.afe.insert(bookmark, clone)
-
-		// Step 19. Fix up the stack of open elements.
-		p.oe.remove(formattingElement)
-		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
-	}
-}
-
-// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
-// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
-// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
-func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
-	for i := len(p.oe) - 1; i >= 0; i-- {
-		if p.oe[i].DataAtom == tagAtom {
-			p.oe = p.oe[:i]
-			break
-		}
-		if isSpecialElement(p.oe[i]) {
-			break
-		}
-	}
-}
-
-// Section 12.2.6.4.8.
-func textIM(p *parser) bool {
-	switch p.tok.Type {
-	case ErrorToken:
-		p.oe.pop()
-	case TextToken:
-		d := p.tok.Data
-		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
-			// Ignore a newline at the start of a