summaryrefslogtreecommitdiff
path: root/internal/href/href.go
diff options
context:
space:
mode:
Diffstat (limited to 'internal/href/href.go')
-rw-r--r--internal/href/href.go130
1 files changed, 0 insertions, 130 deletions
diff --git a/internal/href/href.go b/internal/href/href.go
deleted file mode 100644
index f224d27..0000000
--- a/internal/href/href.go
+++ /dev/null
@@ -1,130 +0,0 @@
-package href
-
-import (
- "context"
- "errors"
- "fmt"
- "io"
- "net/http"
- "strings"
- "time"
- "unicode/utf8"
-
- lru "github.com/hashicorp/golang-lru"
- "golang.org/x/net/html"
- "golang.org/x/net/html/charset"
-)
-
-var (
- errNotHTML = errors.New("not HTML")
- errNotOK = errors.New("not OK")
- errTooBig = errors.New("content too big")
- errNoTitle = errors.New("no title")
- errNotText = errors.New("invalid UTF-8")
-)
-
-const maxLength = 10 * 1024 * 1024 // 10MB
-
-type Titles struct {
- cache *lru.Cache
- w io.Writer
-}
-
-func NewTitles(w io.Writer) *Titles {
- cache, _ := lru.New(100)
- return &Titles{cache: cache, w: w}
-}
-
-func (t *Titles) Resolve(text string) {
- for _, v := range parseLinks(text) {
- if v == "" {
- continue
- }
- title, ok := t.cache.Get(v)
- if ok {
- fmt.Fprintf(t.w, "Title: %v (cached)", title)
- continue
- }
- title, err := fetch(v)
- if err == nil {
- t.cache.Add(v, title)
- fmt.Fprintf(t.w, "Title: %v", title)
- }
- }
-}
-
-func title(r io.Reader) (string, error) {
- var inTitle bool
- z := html.NewTokenizer(r)
- for {
- switch tt := z.Next(); tt {
- case html.ErrorToken:
- return "", z.Err()
- case html.StartTagToken:
- name, _ := z.TagName()
- if string(name) == "title" {
- inTitle = true
- }
- case html.TextToken:
- if inTitle {
- t := z.Text()
- if !utf8.Valid(t) {
- return "", errNotText
- }
- if s := strings.TrimSpace(string(t)); len(s) > 0 {
- return s, nil
- }
- return "", errNoTitle
- }
- }
- }
- return "", errNoTitle
-}
-
-func fetch(uri string) (string, error) {
- req, err := http.NewRequest("GET", uri, nil)
- if err != nil {
- return "", err
- }
- ctx, cancel := context.WithTimeout(req.Context(), time.Minute)
- defer cancel()
-
- resp, err := http.DefaultClient.Do(req.WithContext(ctx))
- if err != nil {
- return "", err
- }
- defer resp.Body.Close()
-
- ct := resp.Header.Get("Content-Type")
- if !strings.HasPrefix(ct, "text/html") {
- return "", errNotHTML
- }
-
- if resp.StatusCode != http.StatusOK {
- return "", errNotOK
- }
-
- if resp.ContentLength > maxLength {
- return "", errTooBig
- }
-
- r, err := charset.NewReader(io.LimitReader(resp.Body, maxLength), ct)
- if err != nil {
- return "", err
- }
-
- return title(r)
-}
-
-func parseLinks(s string) (ret []string) {
- for _, v := range strings.Fields(s) {
- switch {
- case strings.HasPrefix(v, "www."):
- v = "http://" + v
- fallthrough
- case strings.HasPrefix(v, "http:"), strings.HasPrefix(v, "https:"):
- ret = append(ret, v)
- }
- }
- return
-}