summaryrefslogtreecommitdiff
path: root/internal/href/href.go
diff options
context:
space:
mode:
Diffstat (limited to 'internal/href/href.go')
-rw-r--r--internal/href/href.go100
1 files changed, 100 insertions, 0 deletions
diff --git a/internal/href/href.go b/internal/href/href.go
new file mode 100644
index 0000000..d134f67
--- /dev/null
+++ b/internal/href/href.go
@@ -0,0 +1,100 @@
+package href
+
+import (
+ "context"
+ "errors"
+ "io"
+ "net/http"
+ "strings"
+ "time"
+ "unicode/utf8"
+
+ "golang.org/x/net/html"
+ "golang.org/x/net/html/charset"
+)
+
+var (
+ errNotHTML = errors.New("not HTML")
+ errNotOK = errors.New("not OK")
+ errTooBig = errors.New("content too big")
+ errNoTitle = errors.New("no title")
+ errNotText = errors.New("invalid UTF-8")
+)
+
+const maxLength = 10 * 1024 * 1024 // 10MB
+
+func title(r io.Reader) (string, error) {
+ var inTitle bool
+ z := html.NewTokenizer(r)
+ for {
+ switch tt := z.Next(); tt {
+ case html.ErrorToken:
+ return "", z.Err()
+ case html.StartTagToken:
+ name, _ := z.TagName()
+ if string(name) == "title" {
+ inTitle = true
+ }
+ case html.TextToken:
+ if inTitle {
+ t := z.Text()
+ if !utf8.Valid(t) {
+ return "", errNotText
+ }
+ if s := strings.TrimSpace(string(t)); len(s) > 0 {
+ return s, nil
+ }
+ return "", errNoTitle
+ }
+ }
+ }
+ return "", errNoTitle
+}
+
+func Title(uri string) (string, error) {
+ req, err := http.NewRequest("GET", uri, nil)
+ if err != nil {
+ return "", err
+ }
+ ctx, cancel := context.WithTimeout(req.Context(), time.Minute)
+ defer cancel()
+
+ resp, err := http.DefaultClient.Do(req.WithContext(ctx))
+ if err != nil {
+ return "", err
+ }
+ defer resp.Body.Close()
+
+ ct := resp.Header.Get("Content-Type")
+ if !strings.HasPrefix(ct, "text/html") {
+ return "", errNotHTML
+ }
+
+ if resp.StatusCode != http.StatusOK {
+ return "", errNotOK
+ }
+
+ if resp.ContentLength > maxLength {
+ return "", errTooBig
+ }
+
+ r, err := charset.NewReader(io.LimitReader(resp.Body, maxLength), ct)
+ if err != nil {
+ return "", err
+ }
+
+ return title(r)
+}
+
+func Links(s string) (ret []string) {
+ for _, v := range strings.Fields(s) {
+ switch {
+ case strings.HasPrefix(v, "www."):
+ v = "http://" + v
+ fallthrough
+ case strings.HasPrefix(v, "http:"), strings.HasPrefix(v, "https:"):
+ ret = append(ret, v)
+ }
+ }
+ return
+}