From be263b349d54318de512b25ba2488044c867df0d Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Sat, 6 Jul 2019 18:47:57 +0200 Subject: rename --- internal/href/href.go | 130 -------------------------------------------------- 1 file changed, 130 deletions(-) delete mode 100644 internal/href/href.go (limited to 'internal/href/href.go') diff --git a/internal/href/href.go b/internal/href/href.go deleted file mode 100644 index f224d27..0000000 --- a/internal/href/href.go +++ /dev/null @@ -1,130 +0,0 @@ -package href - -import ( - "context" - "errors" - "fmt" - "io" - "net/http" - "strings" - "time" - "unicode/utf8" - - lru "github.com/hashicorp/golang-lru" - "golang.org/x/net/html" - "golang.org/x/net/html/charset" -) - -var ( - errNotHTML = errors.New("not HTML") - errNotOK = errors.New("not OK") - errTooBig = errors.New("content too big") - errNoTitle = errors.New("no title") - errNotText = errors.New("invalid UTF-8") -) - -const maxLength = 10 * 1024 * 1024 // 10MB - -type Titles struct { - cache *lru.Cache - w io.Writer -} - -func NewTitles(w io.Writer) *Titles { - cache, _ := lru.New(100) - return &Titles{cache: cache, w: w} -} - -func (t *Titles) Resolve(text string) { - for _, v := range parseLinks(text) { - if v == "" { - continue - } - title, ok := t.cache.Get(v) - if ok { - fmt.Fprintf(t.w, "Title: %v (cached)", title) - continue - } - title, err := fetch(v) - if err == nil { - t.cache.Add(v, title) - fmt.Fprintf(t.w, "Title: %v", title) - } - } -} - -func title(r io.Reader) (string, error) { - var inTitle bool - z := html.NewTokenizer(r) - for { - switch tt := z.Next(); tt { - case html.ErrorToken: - return "", z.Err() - case html.StartTagToken: - name, _ := z.TagName() - if string(name) == "title" { - inTitle = true - } - case html.TextToken: - if inTitle { - t := z.Text() - if !utf8.Valid(t) { - return "", errNotText - } - if s := strings.TrimSpace(string(t)); len(s) > 0 { - return s, nil - } - return "", errNoTitle - } - } - } - return "", errNoTitle -} - -func fetch(uri string) (string, error) { - req, err := http.NewRequest("GET", uri, nil) - if err != nil { - return "", err - } - ctx, cancel := context.WithTimeout(req.Context(), time.Minute) - defer cancel() - - resp, err := http.DefaultClient.Do(req.WithContext(ctx)) - if err != nil { - return "", err - } - defer resp.Body.Close() - - ct := resp.Header.Get("Content-Type") - if !strings.HasPrefix(ct, "text/html") { - return "", errNotHTML - } - - if resp.StatusCode != http.StatusOK { - return "", errNotOK - } - - if resp.ContentLength > maxLength { - return "", errTooBig - } - - r, err := charset.NewReader(io.LimitReader(resp.Body, maxLength), ct) - if err != nil { - return "", err - } - - return title(r) -} - -func parseLinks(s string) (ret []string) { - for _, v := range strings.Fields(s) { - switch { - case strings.HasPrefix(v, "www."): - v = "http://" + v - fallthrough - case strings.HasPrefix(v, "http:"), strings.HasPrefix(v, "https:"): - ret = append(ret, v) - } - } - return -} -- cgit v1.2.3