package main import ( "errors" "net/http" "strings" "golang.org/x/net/html" "golang.org/x/net/html/charset" ) var ( errNotHTML = errors.New("not HTML") errTooBig = errors.New("content too big") errNotTitle = errors.New("no Title") ) func title(n *html.Node) (string, error) { var s string if n.Type == html.ElementNode && n.Data == "title" { for c := n.FirstChild; c != nil; c = c.NextSibling { s += c.Data } return strings.TrimSpace(s), nil } for c := n.FirstChild; c != nil; c = c.NextSibling { if t, err := title(c); err == nil { return t, nil } } return "", errNotTitle } func getTitle(uri string) (string, error) { resp, err := http.Get(uri) if err != nil { return "", err } defer resp.Body.Close() ct := resp.Header.Get("Content-Type") if !strings.HasPrefix(ct, "text/html") { return "", errNotHTML } if resp.ContentLength > 10*1024*1024 { return "", errTooBig } r, err := charset.NewReader(resp.Body, ct) if err != nil { return "", err } doc, err := html.Parse(r) if err != nil { return "", err } return title(doc) } func getLinks(s string) (ret []string) { for _, v := range strings.Fields(s) { switch { case strings.HasPrefix(v, "www."): v = "http://" + v fallthrough case strings.HasPrefix(v, "http:"), strings.HasPrefix(v, "https:"): ret = append(ret, v) } } return }