From 1d2ca509c77cbb2af0475b1319cd840f8ce9a1d0 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Sat, 6 Jul 2019 17:17:46 +0200 Subject: Split in packages --- href.go | 100 ---------------------------------------------------------------- 1 file changed, 100 deletions(-) delete mode 100644 href.go (limited to 'href.go') diff --git a/href.go b/href.go deleted file mode 100644 index 007731f..0000000 --- a/href.go +++ /dev/null @@ -1,100 +0,0 @@ -package main - -import ( - "context" - "errors" - "io" - "net/http" - "strings" - "time" - "unicode/utf8" - - "golang.org/x/net/html" - "golang.org/x/net/html/charset" -) - -var ( - errNotHTML = errors.New("not HTML") - errNotOK = errors.New("not OK") - errTooBig = errors.New("content too big") - errNoTitle = errors.New("no title") - errNotText = errors.New("invalid UTF-8") -) - -const maxLength = 10 * 1024 * 1024 // 10MB - -func title(r io.Reader) (string, error) { - var inTitle bool - z := html.NewTokenizer(r) - for { - switch tt := z.Next(); tt { - case html.ErrorToken: - return "", z.Err() - case html.StartTagToken: - name, _ := z.TagName() - if string(name) == "title" { - inTitle = true - } - case html.TextToken: - if inTitle { - t := z.Text() - if !utf8.Valid(t) { - return "", errNotText - } - if s := strings.TrimSpace(string(t)); len(s) > 0 { - return s, nil - } - return "", errNoTitle - } - } - } - return "", errNoTitle -} - -func getTitle(uri string) (string, error) { - req, err := http.NewRequest("GET", uri, nil) - if err != nil { - return "", err - } - ctx, cancel := context.WithTimeout(req.Context(), time.Minute) - defer cancel() - - resp, err := http.DefaultClient.Do(req.WithContext(ctx)) - if err != nil { - return "", err - } - defer resp.Body.Close() - - ct := resp.Header.Get("Content-Type") - if !strings.HasPrefix(ct, "text/html") { - return "", errNotHTML - } - - if resp.StatusCode != http.StatusOK { - return "", errNotOK - } - - if resp.ContentLength > maxLength { - return "", errTooBig - } - - r, err := charset.NewReader(io.LimitReader(resp.Body, maxLength), ct) - if err != nil { - return "", err - } - - return title(r) -} - -func getLinks(s string) (ret []string) { - for _, v := range strings.Fields(s) { - switch { - case strings.HasPrefix(v, "www."): - v = "http://" + v - fallthrough - case strings.HasPrefix(v, "http:"), strings.HasPrefix(v, "https:"): - ret = append(ret, v) - } - } - return -} -- cgit v1.2.3