From bc97efd506df415243d0d6a13a4ec066de847ff8 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Fri, 10 Jul 2015 14:26:36 +0200 Subject: Fix multiline titles --- href.go | 12 +++++++----- href_test.go | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 5 deletions(-) create mode 100644 href_test.go diff --git a/href.go b/href.go index 9ba955f..3a9239f 100644 --- a/href.go +++ b/href.go @@ -2,19 +2,21 @@ package main import ( "net/http" + "strings" "golang.org/x/net/html" ) -func findTitle(n *html.Node) string { +func findTitle(n *html.Node) (s string) { if n.Type == html.ElementNode && n.Data == "title" { - if c := n.FirstChild; n != nil { - return c.Data + for c := n.FirstChild; c != nil; c = c.NextSibling { + s += c.Data } + return strings.TrimSpace(s) } for c := n.FirstChild; c != nil; c = c.NextSibling { - if s := findTitle(c); s != "" { - return s + if t := findTitle(c); t != "" { + return t } } return "" diff --git a/href_test.go b/href_test.go new file mode 100644 index 0000000..864b702 --- /dev/null +++ b/href_test.go @@ -0,0 +1,39 @@ +package main + +import "testing" + +type titleTest struct { + URL string + Title string + Broken bool +} + +var titleTestData = []titleTest{ + { + URL: `http://www.theinquirer.net/inquirer/news/2416607/linux-founder-says-you-must-be-on-drugs-if-youre-scared-of-ai`, + Title: `Linux founder says you must be 'on drugs' if you're scared of AI- The Inquirer`, + Broken: false, + }, + { + URL: `http://www.ffoms.ru/portal/page/portal/top/index`, + Title: `Федеральный Фонд Обязательного Медицинского Страхования`, + Broken: true, // This site is just fucked up beyond all repair + }, + { + URL: `https://www.linux.org.ru`, + Title: `LINUX.ORG.RU - Русская информация об ОС Linux`, + Broken: false, + }, +} + +func TestTitle(t *testing.T) { + for _, test := range titleTestData { + title, err := FetchTitle(test.URL) + if err != nil { + t.Error(err) + } + if !test.Broken && title != test.Title { + t.Error("Expected", test.Title, "got", title) + } + } +} -- cgit v1.2.3