From 13ec57cb1aeb58271cb0cde6b0ca4cd71fdfb177 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Sat, 18 Jul 2015 19:41:18 +0200 Subject: Check content type --- href.go | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'href.go') diff --git a/href.go b/href.go index c68499c..6dc72d9 100644 --- a/href.go +++ b/href.go @@ -1,13 +1,20 @@ package main import ( + "errors" "net/http" "strings" - "errors" "golang.org/x/net/html" ) +var ( + notHTML = errors.New("not HTML content") + tooBig = errors.New("cotent too big") +) + +const MB = 1024 * 1024 + func findTitle(n *html.Node) (s string) { if n.Type == html.ElementNode && n.Data == "title" { for c := n.FirstChild; c != nil; c = c.NextSibling { @@ -29,8 +36,14 @@ func FetchTitle(url string) (string, error) { return "", err } defer resp.Body.Close() - if resp.ContentLength > 8*1024*1024 { - return "", errors.New("content too big") + + ct := resp.Header.Get("Content-Type") + if !strings.HasPrefix(ct, "text/html") { + return "", notHTML + } + + if resp.ContentLength > 8*MB { + return "", tooBig } doc, err := html.Parse(resp.Body) -- cgit v1.2.3