mirror of https://github.com/velour/catbase.git
hn: remove more scrapes
This commit is contained in:
parent
af9fc12038
commit
b3f3e09d89
|
@ -6,14 +6,10 @@ import (
|
||||||
"math"
|
"math"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/velour/catbase/plugins/newsbid/webshit/hn"
|
"github.com/velour/catbase/plugins/newsbid/webshit/hn"
|
||||||
|
|
||||||
"github.com/gocolly/colly"
|
|
||||||
|
|
||||||
hacknews "github.com/PaulRosset/go-hacknews"
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
"github.com/jmoiron/sqlx"
|
"github.com/jmoiron/sqlx"
|
||||||
"github.com/mmcdole/gofeed"
|
"github.com/mmcdole/gofeed"
|
||||||
|
@ -182,7 +178,11 @@ func (w *Webshit) checkBids(bids []Bid, storyMap map[string]hn.Item) []WeeklyRes
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, b := range wins {
|
for _, b := range wins {
|
||||||
score, comments, err := scrapeScoreAndComments(b.URL)
|
u, _ := url.Parse(b.URL)
|
||||||
|
id, _ := strconv.Atoi(u.Query().Get("id"))
|
||||||
|
item, err := hn.GetItem(id)
|
||||||
|
score := item.Score
|
||||||
|
comments := item.Descendants
|
||||||
ratio := 1.0
|
ratio := 1.0
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ratio = float64(score) / math.Max(float64(comments), 1.0)
|
ratio = float64(score) / math.Max(float64(comments), 1.0)
|
||||||
|
@ -197,62 +197,6 @@ func (w *Webshit) checkBids(bids []Bid, storyMap map[string]hn.Item) []WeeklyRes
|
||||||
return wrMapToSlice(wr)
|
return wrMapToSlice(wr)
|
||||||
}
|
}
|
||||||
|
|
||||||
func scrapeScoreAndComments(url string) (int, int, error) {
|
|
||||||
c := colly.NewCollector()
|
|
||||||
|
|
||||||
// why do I need this to break out of these stupid callbacks?
|
|
||||||
c.Async = true
|
|
||||||
|
|
||||||
finished := make(chan bool)
|
|
||||||
|
|
||||||
score := 0
|
|
||||||
comments := 0
|
|
||||||
var err error = nil
|
|
||||||
|
|
||||||
c.OnHTML("td.subtext > span.score", func(r *colly.HTMLElement) {
|
|
||||||
score, _ = strconv.Atoi(strings.Fields(r.Text)[0])
|
|
||||||
})
|
|
||||||
|
|
||||||
c.OnHTML("td.subtext > a[href*='item?id=']:last-of-type", func(r *colly.HTMLElement) {
|
|
||||||
comments, _ = strconv.Atoi(strings.Fields(r.Text)[0])
|
|
||||||
})
|
|
||||||
|
|
||||||
c.OnScraped(func(r *colly.Response) {
|
|
||||||
finished <- true
|
|
||||||
})
|
|
||||||
|
|
||||||
c.OnError(func(r *colly.Response, e error) {
|
|
||||||
log.Error().Err(err).Msgf("could not scrape %s", r.Request.URL)
|
|
||||||
err = e
|
|
||||||
finished <- true
|
|
||||||
})
|
|
||||||
|
|
||||||
c.Visit(url)
|
|
||||||
<-finished
|
|
||||||
return score, comments, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetHeadlines will return the current possible news headlines for bidding
|
|
||||||
func (w *Webshit) GetHeadlines() (hn.Items, error) {
|
|
||||||
news := hacknews.Initializer{Story: w.config.HNFeed, NbPosts: w.config.HNLimit}
|
|
||||||
ids, err := news.GetCodesStory()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
posts, err := news.GetPostStory(ids)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
var stories hn.Items
|
|
||||||
for _, p := range posts {
|
|
||||||
stories = append(stories, hn.Item{
|
|
||||||
Title: p.Title,
|
|
||||||
URL: p.Url,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return stories, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetWeekly will return the headlines in the last webshit weekly report
|
// GetWeekly will return the headlines in the last webshit weekly report
|
||||||
func (w *Webshit) GetWeekly() (hn.Items, *time.Time, error) {
|
func (w *Webshit) GetWeekly() (hn.Items, *time.Time, error) {
|
||||||
fp := gofeed.NewParser()
|
fp := gofeed.NewParser()
|
||||||
|
@ -348,7 +292,9 @@ func (w *Webshit) Bid(user string, amount int, URL string) (Bid, error) {
|
||||||
_, err = tx.Exec(`insert into webshit_bids (user,title,url,bid,placed,processed) values (?,?,?,?,?,0)`,
|
_, err = tx.Exec(`insert into webshit_bids (user,title,url,bid,placed,processed) values (?,?,?,?,?,0)`,
|
||||||
user, story.Title, story.URL, amount, ts)
|
user, story.Title, story.URL, amount, ts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
tx.Rollback()
|
if err := tx.Rollback(); err != nil {
|
||||||
|
return Bid{}, err
|
||||||
|
}
|
||||||
return Bid{}, err
|
return Bid{}, err
|
||||||
}
|
}
|
||||||
q := `insert into webshit_balances (user,balance,score) values (?,?,0)
|
q := `insert into webshit_balances (user,balance,score) values (?,?,0)
|
||||||
|
@ -358,7 +304,7 @@ func (w *Webshit) Bid(user string, amount int, URL string) (Bid, error) {
|
||||||
tx.Rollback()
|
tx.Rollback()
|
||||||
return Bid{}, err
|
return Bid{}, err
|
||||||
}
|
}
|
||||||
tx.Commit()
|
err = tx.Commit()
|
||||||
|
|
||||||
return Bid{
|
return Bid{
|
||||||
User: user,
|
User: user,
|
||||||
|
@ -377,7 +323,10 @@ func (w *Webshit) getStoryByURL(URL string) (hn.Item, error) {
|
||||||
if u.Host != "news.ycombinator.com" {
|
if u.Host != "news.ycombinator.com" {
|
||||||
return hn.Item{}, fmt.Errorf("expected HN link")
|
return hn.Item{}, fmt.Errorf("expected HN link")
|
||||||
}
|
}
|
||||||
id, _ := strconv.Atoi(u.Query().Get("id"))
|
id, err := strconv.Atoi(u.Query().Get("id"))
|
||||||
|
if id == 0 || err != nil {
|
||||||
|
return hn.Item{}, fmt.Errorf("invalid item ID")
|
||||||
|
}
|
||||||
return hn.GetItem(id)
|
return hn.GetItem(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -386,7 +335,9 @@ func (w *Webshit) updateScores(results []WeeklyResult) error {
|
||||||
for _, res := range results {
|
for _, res := range results {
|
||||||
if _, err := tx.Exec(`update webshit_balances set score=? where user=?`,
|
if _, err := tx.Exec(`update webshit_balances set score=? where user=?`,
|
||||||
res.Score, res.User); err != nil {
|
res.Score, res.User); err != nil {
|
||||||
tx.Rollback()
|
if err := tx.Rollback(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue