package main import ( "errors" "fmt" "log" "net/http" "strings" "golang.org/x/net/html" "golang.org/x/net/html/charset" ) var errNotHTML = errors.New("Not HTML") func title(n *html.Node) (s string) { if n.Type == html.ElementNode && n.Data == "title" { for c := n.FirstChild; c != nil; c = c.NextSibling { s += c.Data } return strings.TrimSpace(s) } for c := n.FirstChild; c != nil; c = c.NextSibling { if t := title(c); t != "" { return t } } return "" } func getTitle(uri string) (string, error) { resp, err := http.Get(uri) if err != nil { return "", err } defer resp.Body.Close() ct := resp.Header.Get("Content-Type") if !strings.HasPrefix(ct, "text/html") { return "", errNotHTML } r, err := charset.NewReader(resp.Body, ct) if err != nil { return "", err } doc, err := html.Parse(r) if err != nil { return "", err } return title(doc), nil } func getLinks(s string) (ret []string) { for _, v := range strings.Fields(s) { switch { case strings.HasPrefix(v, "www"): v = "http://" + v fallthrough case strings.HasPrefix(v, "http:"), strings.HasPrefix(v, "https:"): ret = append(ret, v) } } return } func linker(out chan string) chan string { c := make(chan string, 1) go func() { for l := range c { for _, v := range getLinks(l) { log.Println("URL", v) t, err := getTitle(v) if err != nil { log.Println(err) } if t != "" { out <- fmt.Sprintf("Title: %v", t) } } } }() return c }