diff options
author | Dimitri Sokolyuk <demon@dim13.org> | 2019-07-06 17:17:46 +0200 |
---|---|---|
committer | Dimitri Sokolyuk <demon@dim13.org> | 2019-07-06 17:17:46 +0200 |
commit | 1d2ca509c77cbb2af0475b1319cd840f8ce9a1d0 (patch) | |
tree | 05a838baaf4f96fbcce03d06090a2403ee56c878 /href.go | |
parent | 87e820722cf02054225b47a58f97d0824118292f (diff) |
Split in packages
Diffstat (limited to 'href.go')
-rw-r--r-- | href.go | 100 |
1 files changed, 0 insertions, 100 deletions
diff --git a/href.go b/href.go deleted file mode 100644 index 007731f..0000000 --- a/href.go +++ /dev/null @@ -1,100 +0,0 @@ -package main - -import ( - "context" - "errors" - "io" - "net/http" - "strings" - "time" - "unicode/utf8" - - "golang.org/x/net/html" - "golang.org/x/net/html/charset" -) - -var ( - errNotHTML = errors.New("not HTML") - errNotOK = errors.New("not OK") - errTooBig = errors.New("content too big") - errNoTitle = errors.New("no title") - errNotText = errors.New("invalid UTF-8") -) - -const maxLength = 10 * 1024 * 1024 // 10MB - -func title(r io.Reader) (string, error) { - var inTitle bool - z := html.NewTokenizer(r) - for { - switch tt := z.Next(); tt { - case html.ErrorToken: - return "", z.Err() - case html.StartTagToken: - name, _ := z.TagName() - if string(name) == "title" { - inTitle = true - } - case html.TextToken: - if inTitle { - t := z.Text() - if !utf8.Valid(t) { - return "", errNotText - } - if s := strings.TrimSpace(string(t)); len(s) > 0 { - return s, nil - } - return "", errNoTitle - } - } - } - return "", errNoTitle -} - -func getTitle(uri string) (string, error) { - req, err := http.NewRequest("GET", uri, nil) - if err != nil { - return "", err - } - ctx, cancel := context.WithTimeout(req.Context(), time.Minute) - defer cancel() - - resp, err := http.DefaultClient.Do(req.WithContext(ctx)) - if err != nil { - return "", err - } - defer resp.Body.Close() - - ct := resp.Header.Get("Content-Type") - if !strings.HasPrefix(ct, "text/html") { - return "", errNotHTML - } - - if resp.StatusCode != http.StatusOK { - return "", errNotOK - } - - if resp.ContentLength > maxLength { - return "", errTooBig - } - - r, err := charset.NewReader(io.LimitReader(resp.Body, maxLength), ct) - if err != nil { - return "", err - } - - return title(r) -} - -func getLinks(s string) (ret []string) { - for _, v := range strings.Fields(s) { - switch { - case strings.HasPrefix(v, "www."): - v = "http://" + v - fallthrough - case strings.HasPrefix(v, "http:"), strings.HasPrefix(v, "https:"): - ret = append(ret, v) - } - } - return -} |