summaryrefslogtreecommitdiff
path: root/href.go
blob: d06d04c0eceb99d3397ee3153a46cf5a2d9623be (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
package main

import (
	"errors"
	"log"
	"net/http"
	"net/http/cookiejar"
	"net/url"
	"strings"

	irc "github.com/fluffle/goirc/client"
	"golang.org/x/net/html"
	"golang.org/x/net/html/charset"
)

var (
	errNotHTML = errors.New("not HTML content")
	errTooBig  = errors.New("content too big")
	errIconv   = errors.New("charset converter error")
)

const (
	MB     = 1024 * 1024
	maxLen = 500 // approx 510 - len("Title: ")
)

func findTitle(n *html.Node) (s string) {
	if n.Type == html.ElementNode && n.Data == "title" {
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			s += c.Data
		}
		return strings.TrimSpace(s)
	}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		if t := findTitle(c); t != "" {
			return t
		}
	}
	return ""
}

func FetchTitle(uri string) (string, error) {
	client := &http.Client{}
	if strings.Contains(uri, "reddit") {
		u, _ := url.Parse(uri)
		cookies := []*http.Cookie{
			&http.Cookie{
				Name:  "over18",
				Value: "1",
			},
		}
		client.Jar, _ = cookiejar.New(nil)
		client.Jar.SetCookies(u, cookies)
	}
	resp, err := client.Get(uri)
	if err != nil {
		return "", err
	}
	defer resp.Body.Close()

	ct := resp.Header.Get("Content-Type")
	if !strings.HasPrefix(ct, "text/html") {
		return "", errNotHTML
	}

	if resp.ContentLength > 8*MB {
		return "", errTooBig
	}

	r, err := charset.NewReader(resp.Body, ct)
	if err != nil {
		return "", errIconv
	}

	doc, err := html.Parse(r)
	if err != nil {
		return "", err
	}

	title := findTitle(doc)

	if len(title) > maxLen {
		title = title[:maxLen] + "..."
	}

	return title, nil
}

func ExtractLinks(conn *irc.Conn, line *irc.Line) {
	for _, v := range strings.Fields(line.Text()) {
		switch {
		case strings.HasPrefix(v, "www"):
			v = "http://" + v
			fallthrough
		case strings.HasPrefix(v, "http"):
			go func(uri string) {
				log.Println(line.Nick, uri)
				t, err := FetchTitle(uri)
				if err != nil {
					log.Println(err)
				}
				if t != "" {
					conn.Notice(line.Target(), "Title: "+t)
				}
			}(v)
		}
	}
}