diff --git a/go.mod b/go.mod index d389b49..db19b76 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,9 @@ require ( github.com/PaulRosset/go-hacknews v0.0.0-20170815075127-4aad99273a3c github.com/PuerkitoBio/goquery v1.5.0 github.com/andybalholm/cascadia v1.1.0 // indirect + github.com/antchfx/htmlquery v1.2.0 // indirect + github.com/antchfx/xmlquery v1.2.0 // indirect + github.com/antchfx/xpath v1.1.1 // indirect github.com/armon/go-radix v1.0.0 // indirect github.com/azr/backoff v0.0.0-20160115115103-53511d3c7330 // indirect github.com/chrissexton/gofuck v1.0.0 @@ -17,12 +20,16 @@ require ( github.com/dustin/go-jsonpointer v0.0.0-20160814072949-ba0abeacc3dc // indirect github.com/dustin/gojson v0.0.0-20160307161227-2e71ec9dd5ad // indirect github.com/garyburd/go-oauth v0.0.0-20180319155456-bca2e7f09a17 // indirect + github.com/gobwas/glob v0.2.3 // indirect + github.com/gocolly/colly v1.2.0 + github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9 // indirect github.com/gonum/floats v0.0.0-20181209220543-c233463c7e82 // indirect github.com/gonum/internal v0.0.0-20181124074243-f884aa714029 // indirect github.com/gorilla/websocket v1.4.1 // indirect github.com/james-bowman/nlp v0.0.0-20191016091239-d9dbfaff30c6 github.com/james-bowman/sparse v0.0.0-20190423065201-80c6877364c7 // indirect github.com/jmoiron/sqlx v1.2.0 + github.com/kennygrant/sanitize v1.2.4 // indirect github.com/mattn/go-sqlite3 v1.11.0 github.com/mmcdole/gofeed v1.0.0-beta2 github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf // indirect @@ -30,14 +37,17 @@ require ( github.com/olebedev/when v0.0.0-20190311101825-c3b538a97254 github.com/robertkrimen/otto v0.0.0-20180617131154-15f95af6e78d // indirect github.com/rs/zerolog v1.15.0 + github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/stretchr/objx v0.2.0 // indirect github.com/stretchr/testify v1.4.0 + github.com/temoto/robotstxt v1.1.1 // indirect github.com/velour/chat v0.0.0-20180713122344-fd1d1606cb89 github.com/velour/velour v0.0.0-20160303155839-8e090e68d158 golang.org/x/exp v0.0.0-20191014171548-69215a2ee97e // indirect golang.org/x/net v0.0.0-20191014212845-da9a3fd4c582 // indirect gonum.org/v1/gonum v0.6.0 // indirect + google.golang.org/appengine v1.6.5 // indirect gopkg.in/go-playground/webhooks.v5 v5.13.0 gopkg.in/sourcemap.v1 v1.0.5 // indirect gopkg.in/yaml.v2 v2.2.4 // indirect diff --git a/go.sum b/go.sum index 42cafba..460a027 100644 --- a/go.sum +++ b/go.sum @@ -17,6 +17,12 @@ github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRy github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/antchfx/htmlquery v1.2.0 h1:oKShnsGlnOHX6t4uj5OHgLKkABcJoqnXpqnscoi9Lpw= +github.com/antchfx/htmlquery v1.2.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8= +github.com/antchfx/xmlquery v1.2.0 h1:1nrzsSN5mFrlqFWSK9byiq/qXKE7O2vivYzhv1Ksnfw= +github.com/antchfx/xmlquery v1.2.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk= +github.com/antchfx/xpath v1.1.1 h1:mqGYmd5pioPu06+REIf8j3y6O3S1UpVNVoCameZHotg= +github.com/antchfx/xpath v1.1.1/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= github.com/armon/go-radix v1.0.0 h1:F4z6KzEeeQIMeLFa97iZU6vupzoecKdU5TX24SNppXI= github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/azr/backoff v0.0.0-20160115115103-53511d3c7330 h1:ekDALXAVvY/Ub1UtNta3inKQwZ/jMB/zpOtD8rAYh78= @@ -44,7 +50,15 @@ github.com/garyburd/go-oauth v0.0.0-20180319155456-bca2e7f09a17 h1:GOfMz6cRgTJ9j github.com/garyburd/go-oauth v0.0.0-20180319155456-bca2e7f09a17/go.mod h1:HfkOCN6fkKKaPSAeNq/er3xObxTW4VLeY6UUK895gLQ= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= +github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= +github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= +github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI= +github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= +github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9 h1:uHTyIjqVhYRhLbJ8nIiOJHkEZZ+5YoOsAbD3sk82NiE= +github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/gonum/floats v0.0.0-20181209220543-c233463c7e82 h1:EvokxLQsaaQjcWVWSV38221VAK7qc2zhaO17bKys/18= github.com/gonum/floats v0.0.0-20181209220543-c233463c7e82/go.mod h1:PxC8OnwL11+aosOB5+iEPoV3picfs8tUpkVd0pDo+Kg= github.com/gonum/internal v0.0.0-20181124074243-f884aa714029 h1:8jtTdc+Nfj9AR+0soOeia9UZSvYBvETVHZrugUowJ7M= @@ -60,6 +74,8 @@ github.com/james-bowman/sparse v0.0.0-20190423065201-80c6877364c7/go.mod h1:G6Ec github.com/jmoiron/sqlx v1.2.0 h1:41Ip0zITnmWNR/vHV+S4m+VoUivnWY5E4OJfLZjCJMA= github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks= github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= +github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= +github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/mattn/go-sqlite3 v1.9.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= github.com/mattn/go-sqlite3 v1.11.0 h1:LDdKkqtYlom37fkvqs8rMPFKAMe8+SgjbwZ6ex1/A/Q= @@ -82,6 +98,8 @@ github.com/robertkrimen/otto v0.0.0-20180617131154-15f95af6e78d/go.mod h1:xvqspo github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= github.com/rs/zerolog v1.15.0 h1:uPRuwkWF4J6fGsJ2R0Gn2jB1EQiav9k3S6CSdygQJXY= github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc= +github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= +github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= @@ -92,6 +110,8 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA= +github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo= github.com/velour/chat v0.0.0-20180713122344-fd1d1606cb89 h1:3D3M900hEBJJAqyKl70QuRHi5weX9+ptlQI1v+FNcQ8= github.com/velour/chat v0.0.0-20180713122344-fd1d1606cb89/go.mod h1:ejwOYCjnDMyO5LXFXRARQJGBZ6xQJZ3rgAHE5drSuMM= github.com/velour/velour v0.0.0-20160303155839-8e090e68d158 h1:p3rTUXxzuKsBOsHlkly7+rj9wagFBKeIsCDKkDII9sw= @@ -115,6 +135,7 @@ golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859 h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191014212845-da9a3fd4c582 h1:p9xBe/w/OzkeYVKm234g55gMdD1nSIooTir5kV11kfA= @@ -137,6 +158,7 @@ gonum.org/v1/gonum v0.6.0 h1:DJy6UzXbahnGUf1ujUNkh/NEtK14qMo2nvlBPs4U5yw= gonum.org/v1/gonum v0.6.0/go.mod h1:9mxDZsDKxgMAuccQkewq682L+0eCu4dCN2yonUJTCLU= gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= +google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/go-playground/webhooks.v5 v5.13.0 h1:e9vtkQZK464+UdL3YjRox2yR8JSmh2094PUBMvdriFs= gopkg.in/go-playground/webhooks.v5 v5.13.0/go.mod h1:LZbya/qLVdbqDR1aKrGuWV6qbia2zCYSR5dpom2SInQ= diff --git a/plugins/newsbid/webshit/webshit.go b/plugins/newsbid/webshit/webshit.go index e73db79..ed1df8a 100644 --- a/plugins/newsbid/webshit/webshit.go +++ b/plugins/newsbid/webshit/webshit.go @@ -3,11 +3,15 @@ package webshit import ( "bytes" "fmt" + "math" "net/http" "net/url" + "strconv" "strings" "time" + "github.com/gocolly/colly" + hacknews "github.com/PaulRosset/go-hacknews" "github.com/PuerkitoBio/goquery" "github.com/jmoiron/sqlx" @@ -184,14 +188,19 @@ func (w *Webshit) checkBids(bids []Bid, storyMap map[string]Story) []WeeklyResul rec.WinningArticles = append(rec.WinningArticles, s) totalWinning += float64(b.Bid) } else { - rec.LosingArticles = append(rec.LosingArticles, Story{b.Title, b.URL}) + rec.LosingArticles = append(rec.LosingArticles, Story{Title: b.Title, URL: b.URL}) } total += float64(b.Bid) wr[b.User] = rec } for _, b := range wins { - payout := float64(b.Bid) / totalWinning * total + score, comments, err := scrapeScoreAndComments(b.URL) + ratio := 1.0 + if err != nil { + ratio = float64(score) / math.Max(float64(comments), 1.0) + } + payout := float64(b.Bid) / totalWinning * total * ratio rec := wr[b.User] rec.Won += int(payout) rec.Score += int(payout) @@ -201,6 +210,41 @@ func (w *Webshit) checkBids(bids []Bid, storyMap map[string]Story) []WeeklyResul return wrMapToSlice(wr) } +func scrapeScoreAndComments(url string) (int, int, error) { + c := colly.NewCollector() + + // why do I need this to break out of these stupid callbacks? + c.Async = true + + finished := make(chan bool) + + score := 0 + comments := 0 + var err error = nil + + c.OnHTML("td.subtext > span.score", func(r *colly.HTMLElement) { + score, _ = strconv.Atoi(strings.Fields(r.Text)[0]) + }) + + c.OnHTML("td.subtext > a[href*='item?id=']:last-of-type", func(r *colly.HTMLElement) { + comments, _ = strconv.Atoi(strings.Fields(r.Text)[0]) + }) + + c.OnScraped(func(r *colly.Response) { + finished <- true + }) + + c.OnError(func(r *colly.Response, e error) { + log.Error().Err(err).Msgf("could not scrape %s", r.Request.URL) + err = e + finished <- true + }) + + c.Visit(url) + <-finished + return score, comments, err +} + // GetHeadlines will return the current possible news headlines for bidding func (w *Webshit) GetHeadlines() ([]Story, error) { news := hacknews.Initializer{Story: w.config.HNFeed, NbPosts: w.config.HNLimit} diff --git a/plugins/newsbid/webshit/webshit_test.go b/plugins/newsbid/webshit/webshit_test.go deleted file mode 100644 index e69551c..0000000 --- a/plugins/newsbid/webshit/webshit_test.go +++ /dev/null @@ -1,99 +0,0 @@ -package webshit - -import ( - "github.com/jmoiron/sqlx" - "github.com/rs/zerolog" - "github.com/rs/zerolog/log" - "github.com/stretchr/testify/assert" - "os" - "testing" - - _ "github.com/mattn/go-sqlite3" -) - -func init() { - log.Logger = log.Logger.Output(zerolog.ConsoleWriter{Out: os.Stderr}) -} - -func makeWS(t *testing.T) *Webshit { - db := sqlx.MustOpen("sqlite3", "file::memory:?mode=memory&cache=shared") - w := New(db) - assert.Equal(t, w.db, db) - return w -} - -func TestWebshit_GetWeekly(t *testing.T) { - w := makeWS(t) - weekly, pub, err := w.GetWeekly() - t.Logf("Pub: %v", pub) - assert.NotNil(t, pub) - assert.Nil(t, err) - assert.NotEmpty(t, weekly) -} - -func TestWebshit_GetHeadlines(t *testing.T) { - w := makeWS(t) - headlines, err := w.GetHeadlines() - assert.Nil(t, err) - assert.NotEmpty(t, headlines) -} - -func TestWebshit_getStoryByURL(t *testing.T) { - w := makeWS(t) - expected := "Developer Tropes: “Google Does It”" - s, err := w.getStoryByURL("https://news.ycombinator.com/item?id=20432887") - assert.Nil(t, err) - assert.Equal(t, s.Title, expected) -} - -func TestWebshit_getStoryByURL_BadURL(t *testing.T) { - w := makeWS(t) - _, err := w.getStoryByURL("https://google.com") - assert.Error(t, err) -} - -func TestWebshit_GetBalance(t *testing.T) { - w := makeWS(t) - expected := 100 - actual := w.GetBalance("foo") - assert.Equal(t, expected, actual) -} - -func TestWebshit_checkBids(t *testing.T) { - w := makeWS(t) - bids := []Bid{ - Bid{User: "foo", Title: "bar", URL: "https://baz/?id=1", Bid: 10}, - Bid{User: "foo", Title: "bar2", URL: "http://baz/?id=2", Bid: 10}, - } - storyMap := map[string]Story{ - "1": Story{Title: "bar", URL: "http://baz/?id=1"}, - } - result := w.checkBids(bids, storyMap) - assert.Len(t, result, 1) - if len(result) > 0 { - assert.Len(t, result[0].WinningArticles, 1) - assert.Len(t, result[0].LosingArticles, 1) - } -} - -func TestWebshit_33PcWinner(t *testing.T) { - w := makeWS(t) - bids := []Bid{ - Bid{User: "foo", Title: "bar", URL: "https://baz/?id=1", Bid: 10}, - Bid{User: "foo", Title: "bar2", URL: "http://baz/?id=2", Bid: 10}, - Bid{User: "bar", Title: "bar", URL: "http://baz/?id=1", Bid: 5}, - } - storyMap := map[string]Story{ - "1": Story{Title: "bar", URL: "http://baz/?id=1"}, - } - result := w.checkBids(bids, storyMap) - assert.Len(t, result, 2) - if len(result) > 0 { - assert.Len(t, result[0].WinningArticles, 1) - assert.Len(t, result[0].LosingArticles, 1) - assert.Len(t, result[1].WinningArticles, 1) - assert.Len(t, result[1].LosingArticles, 0) - assert.Equal(t, result[0].Won, 16) - assert.Equal(t, result[1].Won, 8) - } -} diff --git a/util/testhn/main.go b/util/testhn/main.go new file mode 100644 index 0000000..d5f3ac1 --- /dev/null +++ b/util/testhn/main.go @@ -0,0 +1,49 @@ +package main + +import ( + "flag" + "fmt" + "strconv" + "strings" + + "github.com/gocolly/colly" +) + +var url = flag.String("url", "https://news.ycombinator.com/item?id=21530860", "URL to scrape") + +func main() { + flag.Parse() + //scrapeScoreAndComments(*url, func(score, comments int) { + // fmt.Printf("Finished scraping %s\nScore: %d, Comments: %d\n", + // *url, score, comments) + //}) + score, comments := scrapeScoreAndComments(*url) + fmt.Printf("Finished scraping %s\nScore: %d, Comments: %d\n", + *url, score, comments) +} + +func scrapeScoreAndComments(url string) (int, int) { + c := colly.NewCollector() + c.Async = true + + finished := make(chan bool) + + score := 0 + comments := 0 + + c.OnHTML("td.subtext > span.score", func(r *colly.HTMLElement) { + score, _ = strconv.Atoi(strings.Fields(r.Text)[0]) + }) + + c.OnHTML("td.subtext > a[href*='item?id=']:last-of-type", func(r *colly.HTMLElement) { + comments, _ = strconv.Atoi(strings.Fields(r.Text)[0]) + }) + + c.OnScraped(func(r *colly.Response) { + finished <- true + }) + + c.Visit(url) + <-finished + return score, comments +}