package main import ( "errors" "log" "net/http" "net/http/cookiejar" "net/url" "strings" irc "github.com/fluffle/goirc/client" "golang.org/x/net/html" "golang.org/x/net/html/charset" ) var ( notHTML = errors.New("not HTML content") tooBig = errors.New("content too big") iconvErr = errors.New("charset converter error") ) const ( MB = 1024 * 1024 maxLen = 500 // approx 510 - len("Title: ") ) func findTitle(n *html.Node) (s string) { if n.Type == html.ElementNode && n.Data == "title" { for c := n.FirstChild; c != nil; c = c.NextSibling { s += c.Data } return strings.TrimSpace(s) } for c := n.FirstChild; c != nil; c = c.NextSibling { if t := findTitle(c); t != "" { return t } } return "" } func FetchTitle(uri string) (string, error) { client := &http.Client{} if strings.Contains(uri, "reddit") { u, _ := url.Parse(uri) cookies := []*http.Cookie{ &http.Cookie{ Name: "over18", Value: "1", }, } client.Jar, _ = cookiejar.New(nil) client.Jar.SetCookies(u, cookies) } resp, err := client.Get(uri) if err != nil { return "", err } defer resp.Body.Close() ct := resp.Header.Get("Content-Type") if !strings.HasPrefix(ct, "text/html") { return "", notHTML } if resp.ContentLength > 8*MB { return "", tooBig } r, err := charset.NewReader(resp.Body, ct) if err != nil { return "", iconvErr } doc, err := html.Parse(r) if err != nil { return "", err } title := findTitle(doc) if len(title) > maxLen { title = title[:maxLen] + "..." } return title, nil } func ExtractLinks(conn *irc.Conn, line *irc.Line) { for _, v := range strings.Fields(line.Text()) { switch { case strings.HasPrefix(v, "www"): v = "http://" + v fallthrough case strings.HasPrefix(v, "http"): go func(uri string) { log.Println(line.Nick, uri) t, err := FetchTitle(uri) if err != nil { log.Println(err) } if t != "" { conn.Notice(line.Target(), "Title: "+t) } }(v) } } }