mirror of https://github.com/velour/catbase.git
bids: get a vote:comment ratio for scoring
* Ratio defaults to 1.0 on error * Ratio bonus if a 0 comment article wins * Remove buggy tests * Add example scraping util for debugging
This commit is contained in:
parent
30cd91079c
commit
905da629b9
10
go.mod
10
go.mod
|
@ -9,6 +9,9 @@ require (
|
|||
github.com/PaulRosset/go-hacknews v0.0.0-20170815075127-4aad99273a3c
|
||||
github.com/PuerkitoBio/goquery v1.5.0
|
||||
github.com/andybalholm/cascadia v1.1.0 // indirect
|
||||
github.com/antchfx/htmlquery v1.2.0 // indirect
|
||||
github.com/antchfx/xmlquery v1.2.0 // indirect
|
||||
github.com/antchfx/xpath v1.1.1 // indirect
|
||||
github.com/armon/go-radix v1.0.0 // indirect
|
||||
github.com/azr/backoff v0.0.0-20160115115103-53511d3c7330 // indirect
|
||||
github.com/chrissexton/gofuck v1.0.0
|
||||
|
@ -17,12 +20,16 @@ require (
|
|||
github.com/dustin/go-jsonpointer v0.0.0-20160814072949-ba0abeacc3dc // indirect
|
||||
github.com/dustin/gojson v0.0.0-20160307161227-2e71ec9dd5ad // indirect
|
||||
github.com/garyburd/go-oauth v0.0.0-20180319155456-bca2e7f09a17 // indirect
|
||||
github.com/gobwas/glob v0.2.3 // indirect
|
||||
github.com/gocolly/colly v1.2.0
|
||||
github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9 // indirect
|
||||
github.com/gonum/floats v0.0.0-20181209220543-c233463c7e82 // indirect
|
||||
github.com/gonum/internal v0.0.0-20181124074243-f884aa714029 // indirect
|
||||
github.com/gorilla/websocket v1.4.1 // indirect
|
||||
github.com/james-bowman/nlp v0.0.0-20191016091239-d9dbfaff30c6
|
||||
github.com/james-bowman/sparse v0.0.0-20190423065201-80c6877364c7 // indirect
|
||||
github.com/jmoiron/sqlx v1.2.0
|
||||
github.com/kennygrant/sanitize v1.2.4 // indirect
|
||||
github.com/mattn/go-sqlite3 v1.11.0
|
||||
github.com/mmcdole/gofeed v1.0.0-beta2
|
||||
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf // indirect
|
||||
|
@ -30,14 +37,17 @@ require (
|
|||
github.com/olebedev/when v0.0.0-20190311101825-c3b538a97254
|
||||
github.com/robertkrimen/otto v0.0.0-20180617131154-15f95af6e78d // indirect
|
||||
github.com/rs/zerolog v1.15.0
|
||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
|
||||
github.com/spaolacci/murmur3 v1.1.0 // indirect
|
||||
github.com/stretchr/objx v0.2.0 // indirect
|
||||
github.com/stretchr/testify v1.4.0
|
||||
github.com/temoto/robotstxt v1.1.1 // indirect
|
||||
github.com/velour/chat v0.0.0-20180713122344-fd1d1606cb89
|
||||
github.com/velour/velour v0.0.0-20160303155839-8e090e68d158
|
||||
golang.org/x/exp v0.0.0-20191014171548-69215a2ee97e // indirect
|
||||
golang.org/x/net v0.0.0-20191014212845-da9a3fd4c582 // indirect
|
||||
gonum.org/v1/gonum v0.6.0 // indirect
|
||||
google.golang.org/appengine v1.6.5 // indirect
|
||||
gopkg.in/go-playground/webhooks.v5 v5.13.0
|
||||
gopkg.in/sourcemap.v1 v1.0.5 // indirect
|
||||
gopkg.in/yaml.v2 v2.2.4 // indirect
|
||||
|
|
22
go.sum
22
go.sum
|
@ -17,6 +17,12 @@ github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRy
|
|||
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
|
||||
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||
github.com/antchfx/htmlquery v1.2.0 h1:oKShnsGlnOHX6t4uj5OHgLKkABcJoqnXpqnscoi9Lpw=
|
||||
github.com/antchfx/htmlquery v1.2.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8=
|
||||
github.com/antchfx/xmlquery v1.2.0 h1:1nrzsSN5mFrlqFWSK9byiq/qXKE7O2vivYzhv1Ksnfw=
|
||||
github.com/antchfx/xmlquery v1.2.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk=
|
||||
github.com/antchfx/xpath v1.1.1 h1:mqGYmd5pioPu06+REIf8j3y6O3S1UpVNVoCameZHotg=
|
||||
github.com/antchfx/xpath v1.1.1/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
|
||||
github.com/armon/go-radix v1.0.0 h1:F4z6KzEeeQIMeLFa97iZU6vupzoecKdU5TX24SNppXI=
|
||||
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
|
||||
github.com/azr/backoff v0.0.0-20160115115103-53511d3c7330 h1:ekDALXAVvY/Ub1UtNta3inKQwZ/jMB/zpOtD8rAYh78=
|
||||
|
@ -44,7 +50,15 @@ github.com/garyburd/go-oauth v0.0.0-20180319155456-bca2e7f09a17 h1:GOfMz6cRgTJ9j
|
|||
github.com/garyburd/go-oauth v0.0.0-20180319155456-bca2e7f09a17/go.mod h1:HfkOCN6fkKKaPSAeNq/er3xObxTW4VLeY6UUK895gLQ=
|
||||
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
|
||||
github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
|
||||
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
|
||||
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
|
||||
github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
|
||||
github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
|
||||
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
|
||||
github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9 h1:uHTyIjqVhYRhLbJ8nIiOJHkEZZ+5YoOsAbD3sk82NiE=
|
||||
github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
|
||||
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
|
||||
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/gonum/floats v0.0.0-20181209220543-c233463c7e82 h1:EvokxLQsaaQjcWVWSV38221VAK7qc2zhaO17bKys/18=
|
||||
github.com/gonum/floats v0.0.0-20181209220543-c233463c7e82/go.mod h1:PxC8OnwL11+aosOB5+iEPoV3picfs8tUpkVd0pDo+Kg=
|
||||
github.com/gonum/internal v0.0.0-20181124074243-f884aa714029 h1:8jtTdc+Nfj9AR+0soOeia9UZSvYBvETVHZrugUowJ7M=
|
||||
|
@ -60,6 +74,8 @@ github.com/james-bowman/sparse v0.0.0-20190423065201-80c6877364c7/go.mod h1:G6Ec
|
|||
github.com/jmoiron/sqlx v1.2.0 h1:41Ip0zITnmWNR/vHV+S4m+VoUivnWY5E4OJfLZjCJMA=
|
||||
github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks=
|
||||
github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
|
||||
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
|
||||
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
|
||||
github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
|
||||
github.com/mattn/go-sqlite3 v1.9.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
|
||||
github.com/mattn/go-sqlite3 v1.11.0 h1:LDdKkqtYlom37fkvqs8rMPFKAMe8+SgjbwZ6ex1/A/Q=
|
||||
|
@ -82,6 +98,8 @@ github.com/robertkrimen/otto v0.0.0-20180617131154-15f95af6e78d/go.mod h1:xvqspo
|
|||
github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=
|
||||
github.com/rs/zerolog v1.15.0 h1:uPRuwkWF4J6fGsJ2R0Gn2jB1EQiav9k3S6CSdygQJXY=
|
||||
github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc=
|
||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
|
||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
|
||||
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
|
||||
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
|
||||
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
|
||||
|
@ -92,6 +110,8 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
|
|||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA=
|
||||
github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
|
||||
github.com/velour/chat v0.0.0-20180713122344-fd1d1606cb89 h1:3D3M900hEBJJAqyKl70QuRHi5weX9+ptlQI1v+FNcQ8=
|
||||
github.com/velour/chat v0.0.0-20180713122344-fd1d1606cb89/go.mod h1:ejwOYCjnDMyO5LXFXRARQJGBZ6xQJZ3rgAHE5drSuMM=
|
||||
github.com/velour/velour v0.0.0-20160303155839-8e090e68d158 h1:p3rTUXxzuKsBOsHlkly7+rj9wagFBKeIsCDKkDII9sw=
|
||||
|
@ -115,6 +135,7 @@ golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73r
|
|||
golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628=
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859 h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20191014212845-da9a3fd4c582 h1:p9xBe/w/OzkeYVKm234g55gMdD1nSIooTir5kV11kfA=
|
||||
|
@ -137,6 +158,7 @@ gonum.org/v1/gonum v0.6.0 h1:DJy6UzXbahnGUf1ujUNkh/NEtK14qMo2nvlBPs4U5yw=
|
|||
gonum.org/v1/gonum v0.6.0/go.mod h1:9mxDZsDKxgMAuccQkewq682L+0eCu4dCN2yonUJTCLU=
|
||||
gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
|
||||
gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
|
||||
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/go-playground/webhooks.v5 v5.13.0 h1:e9vtkQZK464+UdL3YjRox2yR8JSmh2094PUBMvdriFs=
|
||||
gopkg.in/go-playground/webhooks.v5 v5.13.0/go.mod h1:LZbya/qLVdbqDR1aKrGuWV6qbia2zCYSR5dpom2SInQ=
|
||||
|
|
|
@ -3,11 +3,15 @@ package webshit
|
|||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gocolly/colly"
|
||||
|
||||
hacknews "github.com/PaulRosset/go-hacknews"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/jmoiron/sqlx"
|
||||
|
@ -184,14 +188,19 @@ func (w *Webshit) checkBids(bids []Bid, storyMap map[string]Story) []WeeklyResul
|
|||
rec.WinningArticles = append(rec.WinningArticles, s)
|
||||
totalWinning += float64(b.Bid)
|
||||
} else {
|
||||
rec.LosingArticles = append(rec.LosingArticles, Story{b.Title, b.URL})
|
||||
rec.LosingArticles = append(rec.LosingArticles, Story{Title: b.Title, URL: b.URL})
|
||||
}
|
||||
total += float64(b.Bid)
|
||||
wr[b.User] = rec
|
||||
}
|
||||
|
||||
for _, b := range wins {
|
||||
payout := float64(b.Bid) / totalWinning * total
|
||||
score, comments, err := scrapeScoreAndComments(b.URL)
|
||||
ratio := 1.0
|
||||
if err != nil {
|
||||
ratio = float64(score) / math.Max(float64(comments), 1.0)
|
||||
}
|
||||
payout := float64(b.Bid) / totalWinning * total * ratio
|
||||
rec := wr[b.User]
|
||||
rec.Won += int(payout)
|
||||
rec.Score += int(payout)
|
||||
|
@ -201,6 +210,41 @@ func (w *Webshit) checkBids(bids []Bid, storyMap map[string]Story) []WeeklyResul
|
|||
return wrMapToSlice(wr)
|
||||
}
|
||||
|
||||
func scrapeScoreAndComments(url string) (int, int, error) {
|
||||
c := colly.NewCollector()
|
||||
|
||||
// why do I need this to break out of these stupid callbacks?
|
||||
c.Async = true
|
||||
|
||||
finished := make(chan bool)
|
||||
|
||||
score := 0
|
||||
comments := 0
|
||||
var err error = nil
|
||||
|
||||
c.OnHTML("td.subtext > span.score", func(r *colly.HTMLElement) {
|
||||
score, _ = strconv.Atoi(strings.Fields(r.Text)[0])
|
||||
})
|
||||
|
||||
c.OnHTML("td.subtext > a[href*='item?id=']:last-of-type", func(r *colly.HTMLElement) {
|
||||
comments, _ = strconv.Atoi(strings.Fields(r.Text)[0])
|
||||
})
|
||||
|
||||
c.OnScraped(func(r *colly.Response) {
|
||||
finished <- true
|
||||
})
|
||||
|
||||
c.OnError(func(r *colly.Response, e error) {
|
||||
log.Error().Err(err).Msgf("could not scrape %s", r.Request.URL)
|
||||
err = e
|
||||
finished <- true
|
||||
})
|
||||
|
||||
c.Visit(url)
|
||||
<-finished
|
||||
return score, comments, err
|
||||
}
|
||||
|
||||
// GetHeadlines will return the current possible news headlines for bidding
|
||||
func (w *Webshit) GetHeadlines() ([]Story, error) {
|
||||
news := hacknews.Initializer{Story: w.config.HNFeed, NbPosts: w.config.HNLimit}
|
||||
|
|
|
@ -1,99 +0,0 @@
|
|||
package webshit
|
||||
|
||||
import (
|
||||
"github.com/jmoiron/sqlx"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func init() {
|
||||
log.Logger = log.Logger.Output(zerolog.ConsoleWriter{Out: os.Stderr})
|
||||
}
|
||||
|
||||
func makeWS(t *testing.T) *Webshit {
|
||||
db := sqlx.MustOpen("sqlite3", "file::memory:?mode=memory&cache=shared")
|
||||
w := New(db)
|
||||
assert.Equal(t, w.db, db)
|
||||
return w
|
||||
}
|
||||
|
||||
func TestWebshit_GetWeekly(t *testing.T) {
|
||||
w := makeWS(t)
|
||||
weekly, pub, err := w.GetWeekly()
|
||||
t.Logf("Pub: %v", pub)
|
||||
assert.NotNil(t, pub)
|
||||
assert.Nil(t, err)
|
||||
assert.NotEmpty(t, weekly)
|
||||
}
|
||||
|
||||
func TestWebshit_GetHeadlines(t *testing.T) {
|
||||
w := makeWS(t)
|
||||
headlines, err := w.GetHeadlines()
|
||||
assert.Nil(t, err)
|
||||
assert.NotEmpty(t, headlines)
|
||||
}
|
||||
|
||||
func TestWebshit_getStoryByURL(t *testing.T) {
|
||||
w := makeWS(t)
|
||||
expected := "Developer Tropes: “Google Does It”"
|
||||
s, err := w.getStoryByURL("https://news.ycombinator.com/item?id=20432887")
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, s.Title, expected)
|
||||
}
|
||||
|
||||
func TestWebshit_getStoryByURL_BadURL(t *testing.T) {
|
||||
w := makeWS(t)
|
||||
_, err := w.getStoryByURL("https://google.com")
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func TestWebshit_GetBalance(t *testing.T) {
|
||||
w := makeWS(t)
|
||||
expected := 100
|
||||
actual := w.GetBalance("foo")
|
||||
assert.Equal(t, expected, actual)
|
||||
}
|
||||
|
||||
func TestWebshit_checkBids(t *testing.T) {
|
||||
w := makeWS(t)
|
||||
bids := []Bid{
|
||||
Bid{User: "foo", Title: "bar", URL: "https://baz/?id=1", Bid: 10},
|
||||
Bid{User: "foo", Title: "bar2", URL: "http://baz/?id=2", Bid: 10},
|
||||
}
|
||||
storyMap := map[string]Story{
|
||||
"1": Story{Title: "bar", URL: "http://baz/?id=1"},
|
||||
}
|
||||
result := w.checkBids(bids, storyMap)
|
||||
assert.Len(t, result, 1)
|
||||
if len(result) > 0 {
|
||||
assert.Len(t, result[0].WinningArticles, 1)
|
||||
assert.Len(t, result[0].LosingArticles, 1)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWebshit_33PcWinner(t *testing.T) {
|
||||
w := makeWS(t)
|
||||
bids := []Bid{
|
||||
Bid{User: "foo", Title: "bar", URL: "https://baz/?id=1", Bid: 10},
|
||||
Bid{User: "foo", Title: "bar2", URL: "http://baz/?id=2", Bid: 10},
|
||||
Bid{User: "bar", Title: "bar", URL: "http://baz/?id=1", Bid: 5},
|
||||
}
|
||||
storyMap := map[string]Story{
|
||||
"1": Story{Title: "bar", URL: "http://baz/?id=1"},
|
||||
}
|
||||
result := w.checkBids(bids, storyMap)
|
||||
assert.Len(t, result, 2)
|
||||
if len(result) > 0 {
|
||||
assert.Len(t, result[0].WinningArticles, 1)
|
||||
assert.Len(t, result[0].LosingArticles, 1)
|
||||
assert.Len(t, result[1].WinningArticles, 1)
|
||||
assert.Len(t, result[1].LosingArticles, 0)
|
||||
assert.Equal(t, result[0].Won, 16)
|
||||
assert.Equal(t, result[1].Won, 8)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/gocolly/colly"
|
||||
)
|
||||
|
||||
var url = flag.String("url", "https://news.ycombinator.com/item?id=21530860", "URL to scrape")
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
//scrapeScoreAndComments(*url, func(score, comments int) {
|
||||
// fmt.Printf("Finished scraping %s\nScore: %d, Comments: %d\n",
|
||||
// *url, score, comments)
|
||||
//})
|
||||
score, comments := scrapeScoreAndComments(*url)
|
||||
fmt.Printf("Finished scraping %s\nScore: %d, Comments: %d\n",
|
||||
*url, score, comments)
|
||||
}
|
||||
|
||||
func scrapeScoreAndComments(url string) (int, int) {
|
||||
c := colly.NewCollector()
|
||||
c.Async = true
|
||||
|
||||
finished := make(chan bool)
|
||||
|
||||
score := 0
|
||||
comments := 0
|
||||
|
||||
c.OnHTML("td.subtext > span.score", func(r *colly.HTMLElement) {
|
||||
score, _ = strconv.Atoi(strings.Fields(r.Text)[0])
|
||||
})
|
||||
|
||||
c.OnHTML("td.subtext > a[href*='item?id=']:last-of-type", func(r *colly.HTMLElement) {
|
||||
comments, _ = strconv.Atoi(strings.Fields(r.Text)[0])
|
||||
})
|
||||
|
||||
c.OnScraped(func(r *colly.Response) {
|
||||
finished <- true
|
||||
})
|
||||
|
||||
c.Visit(url)
|
||||
<-finished
|
||||
return score, comments
|
||||
}
|
Loading…
Reference in New Issue