From 5b9a4a158b81aa6e94a5a56d0851bea938b87bef Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Tue, 24 Jul 2018 14:28:18 +0200 Subject: filter out binary gibberish --- href.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/href.go b/href.go index b6f835b..9e0139b 100644 --- a/href.go +++ b/href.go @@ -5,6 +5,7 @@ import ( "io" "net/http" "strings" + "unicode/utf8" "golang.org/x/net/html" "golang.org/x/net/html/charset" @@ -15,6 +16,7 @@ var ( errNotOK = errors.New("not OK") errTooBig = errors.New("content too big") errNoTitle = errors.New("no title") + errNotText = errors.New("invalid UTF-8") ) const maxLength = 10 * 1024 * 1024 // 10MB @@ -33,7 +35,11 @@ func title(r io.Reader) (string, error) { } case html.TextToken: if inTitle { - return string(z.Text()), nil + t := z.Text() + if !utf8.Valid(t) { + return "", errNotText + } + return string(t), nil } } } -- cgit v1.2.3