From 354da79bb2edaa1af7d909d2774e7d67eb4e198c Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Tue, 23 Jan 2018 18:17:51 +0100 Subject: Add vendor --- vendor/golang.org/x/net/html/parse_test.go | 388 +++++++++++++++++++++++++++++ 1 file changed, 388 insertions(+) create mode 100644 vendor/golang.org/x/net/html/parse_test.go (limited to 'vendor/golang.org/x/net/html/parse_test.go') diff --git a/vendor/golang.org/x/net/html/parse_test.go b/vendor/golang.org/x/net/html/parse_test.go new file mode 100644 index 0000000..7e47d11 --- /dev/null +++ b/vendor/golang.org/x/net/html/parse_test.go @@ -0,0 +1,388 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "runtime" + "sort" + "strings" + "testing" + + "golang.org/x/net/html/atom" +) + +// readParseTest reads a single test case from r. +func readParseTest(r *bufio.Reader) (text, want, context string, err error) { + line, err := r.ReadSlice('\n') + if err != nil { + return "", "", "", err + } + var b []byte + + // Read the HTML. + if string(line) != "#data\n" { + return "", "", "", fmt.Errorf(`got %q want "#data\n"`, line) + } + for { + line, err = r.ReadSlice('\n') + if err != nil { + return "", "", "", err + } + if line[0] == '#' { + break + } + b = append(b, line...) + } + text = strings.TrimSuffix(string(b), "\n") + b = b[:0] + + // Skip the error list. + if string(line) != "#errors\n" { + return "", "", "", fmt.Errorf(`got %q want "#errors\n"`, line) + } + for { + line, err = r.ReadSlice('\n') + if err != nil { + return "", "", "", err + } + if line[0] == '#' { + break + } + } + + if string(line) == "#document-fragment\n" { + line, err = r.ReadSlice('\n') + if err != nil { + return "", "", "", err + } + context = strings.TrimSpace(string(line)) + line, err = r.ReadSlice('\n') + if err != nil { + return "", "", "", err + } + } + + // Read the dump of what the parse tree should be. + if string(line) != "#document\n" { + return "", "", "", fmt.Errorf(`got %q want "#document\n"`, line) + } + inQuote := false + for { + line, err = r.ReadSlice('\n') + if err != nil && err != io.EOF { + return "", "", "", err + } + trimmed := bytes.Trim(line, "| \n") + if len(trimmed) > 0 { + if line[0] == '|' && trimmed[0] == '"' { + inQuote = true + } + if trimmed[len(trimmed)-1] == '"' && !(line[0] == '|' && len(trimmed) == 1) { + inQuote = false + } + } + if len(line) == 0 || len(line) == 1 && line[0] == '\n' && !inQuote { + break + } + b = append(b, line...) + } + return text, string(b), context, nil +} + +func dumpIndent(w io.Writer, level int) { + io.WriteString(w, "| ") + for i := 0; i < level; i++ { + io.WriteString(w, " ") + } +} + +type sortedAttributes []Attribute + +func (a sortedAttributes) Len() int { + return len(a) +} + +func (a sortedAttributes) Less(i, j int) bool { + if a[i].Namespace != a[j].Namespace { + return a[i].Namespace < a[j].Namespace + } + return a[i].Key < a[j].Key +} + +func (a sortedAttributes) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + +func dumpLevel(w io.Writer, n *Node, level int) error { + dumpIndent(w, level) + switch n.Type { + case ErrorNode: + return errors.New("unexpected ErrorNode") + case DocumentNode: + return errors.New("unexpected DocumentNode") + case ElementNode: + if n.Namespace != "" { + fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data) + } else { + fmt.Fprintf(w, "<%s>", n.Data) + } + attr := sortedAttributes(n.Attr) + sort.Sort(attr) + for _, a := range attr { + io.WriteString(w, "\n") + dumpIndent(w, level+1) + if a.Namespace != "" { + fmt.Fprintf(w, `%s %s="%s"`, a.Namespace, a.Key, a.Val) + } else { + fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val) + } + } + case TextNode: + fmt.Fprintf(w, `"%s"`, n.Data) + case CommentNode: + fmt.Fprintf(w, "", n.Data) + case DoctypeNode: + fmt.Fprintf(w, "") + case scopeMarkerNode: + return errors.New("unexpected scopeMarkerNode") + default: + return errors.New("unknown node type") + } + io.WriteString(w, "\n") + for c := n.FirstChild; c != nil; c = c.NextSibling { + if err := dumpLevel(w, c, level+1); err != nil { + return err + } + } + return nil +} + +func dump(n *Node) (string, error) { + if n == nil || n.FirstChild == nil { + return "", nil + } + var b bytes.Buffer + for c := n.FirstChild; c != nil; c = c.NextSibling { + if err := dumpLevel(&b, c, 0); err != nil { + return "", err + } + } + return b.String(), nil +} + +const testDataDir = "testdata/webkit/" + +func TestParser(t *testing.T) { + testFiles, err := filepath.Glob(testDataDir + "*.dat") + if err != nil { + t.Fatal(err) + } + for _, tf := range testFiles { + f, err := os.Open(tf) + if err != nil { + t.Fatal(err) + } + defer f.Close() + r := bufio.NewReader(f) + + for i := 0; ; i++ { + text, want, context, err := readParseTest(r) + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + + err = testParseCase(text, want, context) + + if err != nil { + t.Errorf("%s test #%d %q, %s", tf, i, text, err) + } + } + } +} + +// testParseCase tests one test case from the test files. If the test does not +// pass, it returns an error that explains the failure. +// text is the HTML to be parsed, want is a dump of the correct parse tree, +// and context is the name of the context node, if any. +func testParseCase(text, want, context string) (err error) { + defer func() { + if x := recover(); x != nil { + switch e := x.(type) { + case error: + err = e + default: + err = fmt.Errorf("%v", e) + } + } + }() + + var doc *Node + if context == "" { + doc, err = Parse(strings.NewReader(text)) + if err != nil { + return err + } + } else { + contextNode := &Node{ + Type: ElementNode, + DataAtom: atom.Lookup([]byte(context)), + Data: context, + } + nodes, err := ParseFragment(strings.NewReader(text), contextNode) + if err != nil { + return err + } + doc = &Node{ + Type: DocumentNode, + } + for _, n := range nodes { + doc.AppendChild(n) + } + } + + if err := checkTreeConsistency(doc); err != nil { + return err + } + + got, err := dump(doc) + if err != nil { + return err + } + // Compare the parsed tree to the #document section. + if got != want { + return fmt.Errorf("got vs want:\n----\n%s----\n%s----", got, want) + } + + if renderTestBlacklist[text] || context != "" { + return nil + } + + // Check that rendering and re-parsing results in an identical tree. + pr, pw := io.Pipe() + go func() { + pw.CloseWithError(Render(pw, doc)) + }() + doc1, err := Parse(pr) + if err != nil { + return err + } + got1, err := dump(doc1) + if err != nil { + return err + } + if got != got1 { + return fmt.Errorf("got vs got1:\n----\n%s----\n%s----", got, got1) + } + + return nil +} + +// Some test input result in parse trees are not 'well-formed' despite +// following the HTML5 recovery algorithms. Rendering and re-parsing such a +// tree will not result in an exact clone of that tree. We blacklist such +// inputs from the render test. +var renderTestBlacklist = map[string]bool{ + // The second will be reparented to the first 's parent. This + // results in an whose parent is an , which is not 'well-formed'. + `
XCY`: true, + // The same thing with a

: + `

`: true, + // More cases of being reparented: + `aba
brx
aoe`: true, + `

`: true, + `
`: true, + // A similar reparenting situation involving : + `123`: true, + // A element is reparented, putting it before a table. + // A <plaintext> element can't have anything after it in HTML. + `<table><plaintext><td>`: true, + `<!doctype html><table><plaintext></plaintext>`: true, + `<!doctype html><table><tbody><plaintext></plaintext>`: true, + `<!doctype html><table><tbody><tr><plaintext></plaintext>`: true, + // A form inside a table inside a form doesn't work either. + `<!doctype html><form><table></form><form></table></form>`: true, + // A script that ends at EOF may escape its own closing tag when rendered. + `<!doctype html><script><!--<script `: true, + `<!doctype html><script><!--<script <`: true, + `<!doctype html><script><!--<script <a`: true, + `<!doctype html><script><!--<script </`: true, + `<!doctype html><script><!--<script </s`: true, + `<!doctype html><script><!--<script </script`: true, + `<!doctype html><script><!--<script </scripta`: true, + `<!doctype html><script><!--<script -`: true, + `<!doctype html><script><!--<script -a`: true, + `<!doctype html><script><!--<script -<`: true, + `<!doctype html><script><!--<script --`: true, + `<!doctype html><script><!--<script --a`: true, + `<!doctype html><script><!--<script --<`: true, + `<script><!--<script `: true, + `<script><!--<script <a`: true, + `<script><!--<script </script`: true, + `<script><!--<script </scripta`: true, + `<script><!--<script -`: true, + `<script><!--<script -a`: true, + `<script><!--<script --`: true, + `<script><!--<script --a`: true, + `<script><!--<script <`: true, + `<script><!--<script </`: true, + `<script><!--<script </s`: true, + // Reconstructing the active formatting elements results in a <plaintext> + // element that contains an <a> element. + `<!doctype html><p><a><plaintext>b`: true, +} + +func TestNodeConsistency(t *testing.T) { + // inconsistentNode is a Node whose DataAtom and Data do not agree. + inconsistentNode := &Node{ + Type: ElementNode, + DataAtom: atom.Frameset, + Data: "table", + } + _, err := ParseFragment(strings.NewReader("<p>hello</p>"), inconsistentNode) + if err == nil { + t.Errorf("got nil error, want non-nil") + } +} + +func BenchmarkParser(b *testing.B) { + buf, err := ioutil.ReadFile("testdata/go1.html") + if err != nil { + b.Fatalf("could not read testdata/go1.html: %v", err) + } + b.SetBytes(int64(len(buf))) + runtime.GC() + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + Parse(bytes.NewBuffer(buf)) + } +} -- cgit v1.2.3