summaryrefslogtreecommitdiff
path: root/href.go
diff options
context:
space:
mode:
authorDimitri Sokolyuk <demon@dim13.org>2019-07-06 17:17:46 +0200
committerDimitri Sokolyuk <demon@dim13.org>2019-07-06 17:17:46 +0200
commit1d2ca509c77cbb2af0475b1319cd840f8ce9a1d0 (patch)
tree05a838baaf4f96fbcce03d06090a2403ee56c878 /href.go
parent87e820722cf02054225b47a58f97d0824118292f (diff)
Split in packages
Diffstat (limited to 'href.go')
-rw-r--r--href.go100
1 files changed, 0 insertions, 100 deletions
diff --git a/href.go b/href.go
deleted file mode 100644
index 007731f..0000000
--- a/href.go
+++ /dev/null
@@ -1,100 +0,0 @@
-package main
-
-import (
- "context"
- "errors"
- "io"
- "net/http"
- "strings"
- "time"
- "unicode/utf8"
-
- "golang.org/x/net/html"
- "golang.org/x/net/html/charset"
-)
-
-var (
- errNotHTML = errors.New("not HTML")
- errNotOK = errors.New("not OK")
- errTooBig = errors.New("content too big")
- errNoTitle = errors.New("no title")
- errNotText = errors.New("invalid UTF-8")
-)
-
-const maxLength = 10 * 1024 * 1024 // 10MB
-
-func title(r io.Reader) (string, error) {
- var inTitle bool
- z := html.NewTokenizer(r)
- for {
- switch tt := z.Next(); tt {
- case html.ErrorToken:
- return "", z.Err()
- case html.StartTagToken:
- name, _ := z.TagName()
- if string(name) == "title" {
- inTitle = true
- }
- case html.TextToken:
- if inTitle {
- t := z.Text()
- if !utf8.Valid(t) {
- return "", errNotText
- }
- if s := strings.TrimSpace(string(t)); len(s) > 0 {
- return s, nil
- }
- return "", errNoTitle
- }
- }
- }
- return "", errNoTitle
-}
-
-func getTitle(uri string) (string, error) {
- req, err := http.NewRequest("GET", uri, nil)
- if err != nil {
- return "", err
- }
- ctx, cancel := context.WithTimeout(req.Context(), time.Minute)
- defer cancel()
-
- resp, err := http.DefaultClient.Do(req.WithContext(ctx))
- if err != nil {
- return "", err
- }
- defer resp.Body.Close()
-
- ct := resp.Header.Get("Content-Type")
- if !strings.HasPrefix(ct, "text/html") {
- return "", errNotHTML
- }
-
- if resp.StatusCode != http.StatusOK {
- return "", errNotOK
- }
-
- if resp.ContentLength > maxLength {
- return "", errTooBig
- }
-
- r, err := charset.NewReader(io.LimitReader(resp.Body, maxLength), ct)
- if err != nil {
- return "", err
- }
-
- return title(r)
-}
-
-func getLinks(s string) (ret []string) {
- for _, v := range strings.Fields(s) {
- switch {
- case strings.HasPrefix(v, "www."):
- v = "http://" + v
- fallthrough
- case strings.HasPrefix(v, "http:"), strings.HasPrefix(v, "https:"):
- ret = append(ret, v)
- }
- }
- return
-}