diff options
author | Dimitri Sokolyuk <demon@dim13.org> | 2018-07-24 14:28:18 +0200 |
---|---|---|
committer | Dimitri Sokolyuk <demon@dim13.org> | 2018-07-24 14:28:18 +0200 |
commit | 5b9a4a158b81aa6e94a5a56d0851bea938b87bef (patch) | |
tree | e7a5da36af43753efa58afb48882fdb09147696b /href.go | |
parent | be18e907600668e5ea78d1ba09f86b8122be8253 (diff) |
filter out binary gibberish
Diffstat (limited to 'href.go')
-rw-r--r-- | href.go | 8 |
1 files changed, 7 insertions, 1 deletions
@@ -5,6 +5,7 @@ import ( "io" "net/http" "strings" + "unicode/utf8" "golang.org/x/net/html" "golang.org/x/net/html/charset" @@ -15,6 +16,7 @@ var ( errNotOK = errors.New("not OK") errTooBig = errors.New("content too big") errNoTitle = errors.New("no title") + errNotText = errors.New("invalid UTF-8") ) const maxLength = 10 * 1024 * 1024 // 10MB @@ -33,7 +35,11 @@ func title(r io.Reader) (string, error) { } case html.TextToken: if inTitle { - return string(z.Text()), nil + t := z.Text() + if !utf8.Valid(t) { + return "", errNotText + } + return string(t), nil } } } |