From 634f3fd022ba8ad3f064944ab29f930689f14b38 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Tue, 24 Jul 2018 10:53:21 +0200 Subject: limit parse depth --- href.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/href.go b/href.go index bd6a61c..d5b0c0f 100644 --- a/href.go +++ b/href.go @@ -14,11 +14,20 @@ var ( errNotHTML = errors.New("not HTML") errNotOK = errors.New("not OK") errTooBig = errors.New("content too big") + errTooDeep = errors.New("content too deep") errNoTitle = errors.New("no title") ) -func title(n *html.Node) (string, error) { +const ( + maxLength = 10 * 1024 * 1024 // 10MB + maxDepth = 10 +) + +func title(n *html.Node, depth int) (string, error) { var s string + if depth <= 0 { + return "", errTooDeep + } if n.Type == html.ElementNode && n.Data == "title" { for c := n.FirstChild; c != nil; c = c.NextSibling { s += c.Data @@ -26,15 +35,13 @@ func title(n *html.Node) (string, error) { return strings.TrimSpace(s), nil } for c := n.FirstChild; c != nil; c = c.NextSibling { - if t, err := title(c); err == nil { + if t, err := title(c, depth-1); err == nil { return t, nil } } return "", errNoTitle } -const maxLength = 10 * 1024 * 1024 // 10MB - func getTitle(uri string) (string, error) { resp, err := http.Get(uri) if err != nil { @@ -65,7 +72,7 @@ func getTitle(uri string) (string, error) { return "", err } - return title(doc) + return title(doc, maxDepth) } func getLinks(s string) (ret []string) { -- cgit v1.2.3