summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitri Sokolyuk <demon@dim13.org>2018-07-24 14:28:18 +0200
committerDimitri Sokolyuk <demon@dim13.org>2018-07-24 14:28:18 +0200
commit5b9a4a158b81aa6e94a5a56d0851bea938b87bef (patch)
treee7a5da36af43753efa58afb48882fdb09147696b
parentbe18e907600668e5ea78d1ba09f86b8122be8253 (diff)
filter out binary gibberish
-rw-r--r--href.go8
1 files changed, 7 insertions, 1 deletions
diff --git a/href.go b/href.go
index b6f835b..9e0139b 100644
--- a/href.go
+++ b/href.go
@@ -5,6 +5,7 @@ import (
"io"
"net/http"
"strings"
+ "unicode/utf8"
"golang.org/x/net/html"
"golang.org/x/net/html/charset"
@@ -15,6 +16,7 @@ var (
errNotOK = errors.New("not OK")
errTooBig = errors.New("content too big")
errNoTitle = errors.New("no title")
+ errNotText = errors.New("invalid UTF-8")
)
const maxLength = 10 * 1024 * 1024 // 10MB
@@ -33,7 +35,11 @@ func title(r io.Reader) (string, error) {
}
case html.TextToken:
if inTitle {
- return string(z.Text()), nil
+ t := z.Text()
+ if !utf8.Valid(t) {
+ return "", errNotText
+ }
+ return string(t), nil
}
}
}