From 473acc61c8392dc7ae303d91568e179c4f105a76 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Tue, 2 Jul 2019 12:12:53 +0200 Subject: add black list --- vendor/golang.org/x/net/html/parse.go | 271 ++++++++++++++++++++++++---------- 1 file changed, 189 insertions(+), 82 deletions(-) (limited to 'vendor/golang.org/x/net/html/parse.go') diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go index 7e539b1..992cff2 100644 --- a/vendor/golang.org/x/net/html/parse.go +++ b/vendor/golang.org/x/net/html/parse.go @@ -209,27 +209,6 @@ loop: p.oe = p.oe[:i+1] } -// generateAllImpliedEndTags pops nodes off the stack of open elements as long as -// the top node has a tag name of caption, colgroup, dd, div, dt, li, optgroup, option, p, rb, -// rp, rt, rtc, span, tbody, td, tfoot, th, thead or tr. -func (p *parser) generateAllImpliedEndTags() { - var i int - for i = len(p.oe) - 1; i >= 0; i-- { - n := p.oe[i] - if n.Type == ElementNode { - switch n.DataAtom { - // TODO: remove this divergence from the HTML5 spec - case a.Caption, a.Colgroup, a.Dd, a.Div, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, - a.Rp, a.Rt, a.Rtc, a.Span, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: - continue - } - } - break - } - - p.oe = p.oe[:i+1] -} - // addChild adds a child node n to the top element, and pushes n onto the stack // of open elements if it is an element node. func (p *parser) addChild(n *Node) { @@ -276,7 +255,7 @@ func (p *parser) fosterParent(n *Node) { } } - if template != nil && (table == nil || j < i) { + if template != nil && (table == nil || j > i) { template.AppendChild(n) return } @@ -460,9 +439,6 @@ func (p *parser) resetInsertionMode() { case a.Select: if !last { for ancestor, first := n, p.oe[0]; ancestor != first; { - if ancestor == first { - break - } ancestor = p.oe[p.oe.index(ancestor)-1] switch ancestor.DataAtom { case a.Template: @@ -491,6 +467,10 @@ func (p *parser) resetInsertionMode() { case a.Table: p.im = inTableIM case a.Template: + // TODO: remove this divergence from the HTML5 spec. + if n.Namespace != "" { + continue + } p.im = p.templateStack.top() case a.Head: // TODO: remove this divergence from the HTML5 spec. @@ -650,7 +630,16 @@ func inHeadIM(p *parser) bool { p.oe.pop() p.acknowledgeSelfClosingTag() return true - case a.Script, a.Title, a.Noscript, a.Noframes, a.Style: + case a.Noscript: + p.addElement() + if p.scripting { + p.setOriginalIM() + p.im = textIM + } else { + p.im = inHeadNoscriptIM + } + return true + case a.Script, a.Title, a.Noframes, a.Style: p.addElement() p.setOriginalIM() p.im = textIM @@ -679,11 +668,16 @@ func inHeadIM(p *parser) bool { if !p.oe.contains(a.Template) { return true } - p.generateAllImpliedEndTags() - if n := p.oe.top(); n.DataAtom != a.Template { - return true + // TODO: remove this divergence from the HTML5 spec. + // + // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668 + p.generateImpliedEndTags() + for i := len(p.oe) - 1; i >= 0; i-- { + if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template { + p.oe = p.oe[:i] + break + } } - p.popUntil(defaultScope, a.Template) p.clearActiveFormattingElements() p.templateStack.pop() p.resetInsertionMode() @@ -707,6 +701,49 @@ func inHeadIM(p *parser) bool { return false } +// 12.2.6.4.5. +func inHeadNoscriptIM(p *parser) bool { + switch p.tok.Type { + case DoctypeToken: + // Ignore the token. + return true + case StartTagToken: + switch p.tok.DataAtom { + case a.Html: + return inBodyIM(p) + case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style: + return inHeadIM(p) + case a.Head, a.Noscript: + // Ignore the token. + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Noscript, a.Br: + default: + // Ignore the token. + return true + } + case TextToken: + s := strings.TrimLeft(p.tok.Data, whitespace) + if len(s) == 0 { + // It was all whitespace. + return inHeadIM(p) + } + case CommentToken: + return inHeadIM(p) + } + p.oe.pop() + if p.top().DataAtom != a.Head { + panic("html: the new current node will be a head element.") + } + p.im = inHeadIM + if p.tok.DataAtom == a.Noscript { + return true + } + return false +} + // Section 12.2.6.4.6. func afterHeadIM(p *parser) bool { switch p.tok.Type { @@ -916,7 +953,7 @@ func inBodyIM(p *parser) bool { case a.A: for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- { if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A { - p.inBodyEndTagFormatting(a.A) + p.inBodyEndTagFormatting(a.A, "a") p.oe.remove(n) p.afe.remove(n) break @@ -930,7 +967,7 @@ func inBodyIM(p *parser) bool { case a.Nobr: p.reconstructActiveFormattingElements() if p.elementInScope(defaultScope, a.Nobr) { - p.inBodyEndTagFormatting(a.Nobr) + p.inBodyEndTagFormatting(a.Nobr, "nobr") p.reconstructActiveFormattingElements() } p.addFormattingElement() @@ -1000,6 +1037,14 @@ func inBodyIM(p *parser) bool { p.acknowledgeSelfClosingTag() p.popUntil(buttonScope, a.P) p.parseImpliedToken(StartTagToken, a.Form, a.Form.String()) + if p.form == nil { + // NOTE: The 'isindex' element has been removed, + // and the 'template' element has not been designed to be + // collaborative with the index element. + // + // Ignore the token. + return true + } if action != "" { p.form.Attr = []Attribute{{Key: "action", Val: action}} } @@ -1074,13 +1119,7 @@ func inBodyIM(p *parser) bool { p.acknowledgeSelfClosingTag() } return true - case a.Frame: - // TODO: remove this divergence from the HTML5 spec. - if p.oe.contains(a.Template) { - p.addElement() - return true - } - case a.Caption, a.Col, a.Colgroup, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: + case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: // Ignore the token. default: p.reconstructActiveFormattingElements() @@ -1136,7 +1175,7 @@ func inBodyIM(p *parser) bool { case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6) case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U: - p.inBodyEndTagFormatting(p.tok.DataAtom) + p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data) case a.Applet, a.Marquee, a.Object: if p.popUntil(defaultScope, p.tok.DataAtom) { p.clearActiveFormattingElements() @@ -1147,7 +1186,7 @@ func inBodyIM(p *parser) bool { case a.Template: return inHeadIM(p) default: - p.inBodyEndTagOther(p.tok.DataAtom) + p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data) } case CommentToken: p.addChild(&Node{ @@ -1174,7 +1213,7 @@ func inBodyIM(p *parser) bool { return true } -func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) { +func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) { // This is the "adoption agency" algorithm, described at // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency @@ -1196,7 +1235,7 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) { } } if formattingElement == nil { - p.inBodyEndTagOther(tagAtom) + p.inBodyEndTagOther(tagAtom, tagName) return } feIndex := p.oe.index(formattingElement) @@ -1274,12 +1313,6 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) { switch commonAncestor.DataAtom { case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: p.fosterParent(lastNode) - case a.Template: - // TODO: remove namespace checking - if commonAncestor.Namespace == "html" { - commonAncestor = commonAncestor.LastChild - } - fallthrough default: commonAncestor.AppendChild(lastNode) } @@ -1307,9 +1340,17 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) { // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM. // "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign -func (p *parser) inBodyEndTagOther(tagAtom a.Atom) { +func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) { for i := len(p.oe) - 1; i >= 0; i-- { - if p.oe[i].DataAtom == tagAtom { + // Two element nodes have the same tag if they have the same Data (a + // string-typed field). As an optimization, for common HTML tags, each + // Data string is assigned a unique, non-zero DataAtom (a uint32-typed + // field), since integer comparison is faster than string comparison. + // Uncommon (custom) tags get a zero DataAtom. + // + // The if condition here is equivalent to (p.oe[i].Data == tagName). + if (p.oe[i].DataAtom == tagAtom) && + ((tagAtom != 0) || (p.oe[i].Data == tagName)) { p.oe = p.oe[:i] break } @@ -1351,9 +1392,6 @@ func textIM(p *parser) bool { // Section 12.2.6.4.9. func inTableIM(p *parser) bool { switch p.tok.Type { - case ErrorToken: - // Stop parsing. - return true case TextToken: p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1) switch p.oe.top().DataAtom { @@ -1448,6 +1486,8 @@ func inTableIM(p *parser) bool { case DoctypeToken: // Ignore the token. return true + case ErrorToken: + return inBodyIM(p) } p.fosterParenting = true @@ -1550,6 +1590,8 @@ func inColumnGroupIM(p *parser) bool { case a.Template: return inHeadIM(p) } + case ErrorToken: + return inBodyIM(p) } if p.oe.top().DataAtom != a.Colgroup { return true @@ -1702,8 +1744,9 @@ func inCellIM(p *parser) bool { return true } // Close the cell and reprocess. - p.popUntil(tableScope, a.Td, a.Th) - p.clearActiveFormattingElements() + if p.popUntil(tableScope, a.Td, a.Th) { + p.clearActiveFormattingElements() + } p.im = inRowIM return false } @@ -1714,9 +1757,6 @@ func inCellIM(p *parser) bool { // Section 12.2.6.4.16. func inSelectIM(p *parser) bool { switch p.tok.Type { - case ErrorToken: - // Stop parsing. - return true case TextToken: p.addText(strings.Replace(p.tok.Data, "\x00", "", -1)) case StartTagToken: @@ -1737,8 +1777,12 @@ func inSelectIM(p *parser) bool { } p.addElement() case a.Select: - p.tok.Type = EndTagToken - return false + if p.popUntil(selectScope, a.Select) { + p.resetInsertionMode() + } else { + // Ignore the token. + return true + } case a.Input, a.Keygen, a.Textarea: if p.elementInScope(selectScope, a.Select) { p.parseImpliedToken(EndTagToken, a.Select, a.Select.String()) @@ -1768,6 +1812,9 @@ func inSelectIM(p *parser) bool { case a.Select: if p.popUntil(selectScope, a.Select) { p.resetInsertionMode() + } else { + // Ignore the token. + return true } case a.Template: return inHeadIM(p) @@ -1780,6 +1827,8 @@ func inSelectIM(p *parser) bool { case DoctypeToken: // Ignore the token. return true + case ErrorToken: + return inBodyIM(p) } return true @@ -1791,13 +1840,22 @@ func inSelectInTableIM(p *parser) bool { case StartTagToken, EndTagToken: switch p.tok.DataAtom { case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th: - if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) { - p.parseImpliedToken(EndTagToken, a.Select, a.Select.String()) - return false - } else { + if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) { // Ignore the token. return true } + // This is like p.popUntil(selectScope, a.Select), but it also + // matches , not just