// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package html import ( "strings" ) // parseDoctype parses the data from a DoctypeToken into a name, // public identifier, and system identifier. It returns a Node whose Type // is DoctypeNode, whose Data is the name, and which has attributes // named "system" and "public" for the two identifiers if they were present. // quirks is whether the document should be parsed in "quirks mode". func parseDoctype(s string) (n *Node, quirks bool) { n = &Node{Type: DoctypeNode} // Find the name. space := strings.IndexAny(s, whitespace) if space == -1 { space = len(s) } n.Data = s[:space] // The comparison to "html" is case-sensitive. if n.Data != "html" { quirks = true } n.Data = strings.ToLower(n.Data) s = strings.TrimLeft(s[space:], whitespace) if len(s) < 6 { // It can't start with "PUBLIC" or "SYSTEM". // Ignore the rest of the string. return n, quirks || s != "" } key := strings.ToLower(s[:6]) s = s[6:] for key == "public" || key == "system" { s = strings.TrimLeft(s, whitespace) if s == "" { break } quote := s[0] if quote != '"' && quote != '\'' { break } s = s[1:] q := strings.IndexRune(s, rune(quote)) var id string if q == -1 { id = s s = "" } else { id = s[:q] s = s[q+1:] } n.Attr = append(n.Attr, Attribute{Key: key, Val: id}) if key == "public" { key = "system" } else { key = "" } } if key != "" || s != "" { quirks = true } else if len(n.Attr) > 0 { if n.Attr[0].Key == "public" { public := strings.ToLower(n.Attr[0].Val) switch public { case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html": quirks = true default: for _, q := range quirkyIDs { if strings.HasPrefix(public, q) { quirks = true break } } } // The following two public IDs only cause quirks mode if there is no system ID. if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") || strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) { quirks = true } } if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" && strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" { quirks = true } } return n, quirks } // quirkyIDs is a list of public doctype identifiers that cause a document // to be interpreted in quirks mode. The identifiers should be in lower case. var quirkyIDs = []string{ "+//silmaril//dtd html pro v0r11 19970101//", "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", "-//as//dtd html 3.0 aswedit + extensions//", "-//ietf//dtd html 2.0 level 1//", "-//ietf//dtd html 2.0 level 2//", "-//ietf//dtd html 2.0 strict level 1//", "-//ietf//dtd html 2.0 strict level 2//", "-//ietf//dtd html 2.0 strict//", "-//ietf//dtd html 2.0//", "-//ietf//dtd html 2.1e//", "-//ietf//dtd html 3.0//", "-//ietf//dtd html 3.2 final//", "-//ietf//dtd html 3.2//", "-//ietf//dtd html 3//", "-//ietf//dtd html level 0//", "-//ietf//dtd html level 1//", "-//ietf//dtd html level 2//", "-//ietf//dtd html level 3//", "-//ietf//dtd html strict level 0//", "-//ietf//dtd html strict level 1//", "-//ietf//dtd html strict level 2//", "-//ietf//dtd html strict level 3//", "-//ietf//dtd html strict//", "-//ietf//dtd html//", "-//metrius//dtd metrius presentational//", "-//microsoft//dtd internet explorer 2.0 html strict//", "-//microsoft//dtd internet explorer 2.0 html//", "-//microsoft//dtd internet explorer 2.0 tables//", "-//microsoft//dtd internet explorer 3.0 html strict//", "-//microsoft//dtd internet explorer 3.0 html//", "-//microsoft//dtd internet explorer 3.0 tables//", "-//netscape comm. corp.//dtd html//", "-//netscape comm. corp.//dtd strict html//", "-//o'reilly and associates//dtd html 2.0//", "-//o'reilly and associates//dtd html extended 1.0//", "-//o'reilly and associates//dtd html extended relaxed 1.0//", "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", "-//spyglass//dtd html 2.0 extended//", "-//sq//dtd html 2.0 hotmetal + extensions//", "-//sun microsystems corp.//dtd hotjava html//", "-//sun microsystems corp.//dtd hotjava strict html//", "-//w3c//dtd html 3 1995-03-24//", "-//w3c//dtd html 3.2 draft//", "-//w3c//dtd html 3.2 final//", "-//w3c//dtd html 3.2//", "-//w3c//dtd html 3.2s draft//", "-//w3c//dtd html 4.0 frameset//", "-//w3c//dtd html 4.0 transitional//", "-//w3c//dtd html experimental 19960712//", "-//w3c//dtd html experimental 970421//", "-//w3c//dtd w3 html//", "-//w3o//dtd w3 html 3.0//", "-//webtechs//dtd mozilla html 2.0//", "-//webtechs//dtd mozilla html//", }