catbase/plugins/newsbid/webshit/webshit.go

404 lines
8.9 KiB
Go
Raw Normal View History

package webshit
import (
"bytes"
"fmt"
"math"
"net/url"
"strconv"
2019-07-15 17:39:40 +00:00
"strings"
2019-07-15 18:57:23 +00:00
"time"
2019-08-13 20:14:48 +00:00
2019-11-21 16:59:52 +00:00
"github.com/velour/catbase/plugins/newsbid/webshit/hn"
"github.com/gocolly/colly"
2019-08-13 20:14:48 +00:00
hacknews "github.com/PaulRosset/go-hacknews"
"github.com/PuerkitoBio/goquery"
"github.com/jmoiron/sqlx"
"github.com/mmcdole/gofeed"
"github.com/rs/zerolog/log"
)
type Config struct {
HNFeed string
HNLimit int
BalanceReferesh int
}
var DefaultConfig = Config{
HNFeed: "topstories",
HNLimit: 10,
BalanceReferesh: 100,
}
type Webshit struct {
db *sqlx.DB
config Config
}
2019-07-15 17:39:40 +00:00
type Bid struct {
2019-11-21 16:59:52 +00:00
ID int
User string
Title string
URL string
Bid int
PlacedScore int
ProcessedScore int
Placed int64
Processed int64
}
func (b Bid) PlacedParsed() time.Time {
return time.Unix(b.Placed, 0)
}
type Balance struct {
User string
Balance int
Score int
2019-07-15 17:39:40 +00:00
}
2019-07-15 18:57:23 +00:00
type WeeklyResult struct {
User string
Won int
2019-11-21 16:59:52 +00:00
WinningArticles hn.Items
LosingArticles hn.Items
Score int
2019-07-15 18:57:23 +00:00
}
func New(db *sqlx.DB) *Webshit {
return NewConfig(db, DefaultConfig)
}
func NewConfig(db *sqlx.DB, cfg Config) *Webshit {
w := &Webshit{db: db, config: cfg}
w.setup()
return w
}
// setup will create any necessary SQL tables and populate them with minimal data
func (w *Webshit) setup() {
w.db.MustExec(`create table if not exists webshit_bids (
id integer primary key autoincrement,
2019-07-15 17:39:40 +00:00
user string,
title string,
url string,
2019-07-15 18:57:23 +00:00
bid integer,
2019-11-21 16:59:52 +00:00
placed_score integer,
processed_score integer,
placed integer,
processed integer
)`)
w.db.MustExec(`create table if not exists webshit_balances (
2019-07-15 17:39:40 +00:00
user string primary key,
balance int,
score int
)`)
}
func (w *Webshit) Check() ([]WeeklyResult, error) {
2019-07-15 18:57:23 +00:00
stories, published, err := w.GetWeekly()
if err != nil {
return nil, err
}
var bids []Bid
if err = w.db.Select(&bids, `select user,title,url,bid from webshit_bids where placed < ? and processed=0`,
2019-07-15 18:57:23 +00:00
published.Unix()); err != nil {
return nil, err
}
// Assuming no bids earlier than the weekly means there hasn't been a new weekly
if len(bids) == 0 {
return nil, fmt.Errorf("there are no bids against the current ngate post")
2019-07-15 18:57:23 +00:00
}
2019-11-21 16:59:52 +00:00
storyMap := map[string]hn.Item{}
2019-07-15 18:57:23 +00:00
for _, s := range stories {
u, err := url.Parse(s.URL)
if err != nil {
log.Error().Err(err).Msg("couldn't parse URL")
continue
}
id := u.Query().Get("id")
storyMap[id] = s
2019-07-15 18:57:23 +00:00
}
wr := w.checkBids(bids, storyMap)
// Update all balance scores in a tx
if err := w.updateScores(wr); err != nil {
return nil, err
}
// Delete all those bids
if _, err = w.db.Exec(`update webshit_bids set processed=? where placed < ?`,
time.Now().Unix(), published.Unix()); err != nil {
2019-07-15 18:57:23 +00:00
return nil, err
}
// Set all balances to 100
if _, err = w.db.Exec(`update webshit_balances set balance=?`,
w.config.BalanceReferesh); err != nil {
2019-07-15 18:57:23 +00:00
return nil, err
}
return wr, nil
}
2019-11-21 16:59:52 +00:00
func (w *Webshit) checkBids(bids []Bid, storyMap map[string]hn.Item) []WeeklyResult {
var wins []Bid
2019-08-13 20:14:48 +00:00
total, totalWinning := 0.0, 0.0
2019-07-15 18:57:23 +00:00
wr := map[string]WeeklyResult{}
2019-07-15 18:57:23 +00:00
for _, b := range bids {
score := w.GetScore(b.User)
if _, ok := wr[b.User]; !ok {
2019-07-15 18:57:23 +00:00
wr[b.User] = WeeklyResult{
User: b.User,
Score: score,
2019-07-15 18:57:23 +00:00
}
}
rec := wr[b.User]
u, err := url.Parse(b.URL)
if err != nil {
log.Error().Err(err).Msg("couldn't parse URL")
continue
}
id := u.Query().Get("id")
if s, ok := storyMap[id]; ok {
wins = append(wins, b)
rec.WinningArticles = append(rec.WinningArticles, s)
2019-08-13 20:14:48 +00:00
totalWinning += float64(b.Bid)
2019-07-15 18:57:23 +00:00
} else {
2019-11-21 16:59:52 +00:00
rec.LosingArticles = append(rec.LosingArticles, hn.Item{Title: b.Title, URL: b.URL})
2019-07-15 18:57:23 +00:00
}
2019-08-13 20:14:48 +00:00
total += float64(b.Bid)
wr[b.User] = rec
2019-07-15 18:57:23 +00:00
}
for _, b := range wins {
score, comments, err := scrapeScoreAndComments(b.URL)
ratio := 1.0
if err != nil {
ratio = float64(score) / math.Max(float64(comments), 1.0)
}
payout := float64(b.Bid) / totalWinning * total * ratio
rec := wr[b.User]
2019-08-13 20:14:48 +00:00
rec.Won += int(payout)
rec.Score += int(payout)
wr[b.User] = rec
}
return wrMapToSlice(wr)
2019-07-15 18:57:23 +00:00
}
func scrapeScoreAndComments(url string) (int, int, error) {
c := colly.NewCollector()
// why do I need this to break out of these stupid callbacks?
c.Async = true
finished := make(chan bool)
score := 0
comments := 0
var err error = nil
c.OnHTML("td.subtext > span.score", func(r *colly.HTMLElement) {
score, _ = strconv.Atoi(strings.Fields(r.Text)[0])
})
c.OnHTML("td.subtext > a[href*='item?id=']:last-of-type", func(r *colly.HTMLElement) {
comments, _ = strconv.Atoi(strings.Fields(r.Text)[0])
})
c.OnScraped(func(r *colly.Response) {
finished <- true
})
c.OnError(func(r *colly.Response, e error) {
log.Error().Err(err).Msgf("could not scrape %s", r.Request.URL)
err = e
finished <- true
})
c.Visit(url)
<-finished
return score, comments, err
}
// GetHeadlines will return the current possible news headlines for bidding
2019-11-21 16:59:52 +00:00
func (w *Webshit) GetHeadlines() (hn.Items, error) {
news := hacknews.Initializer{Story: w.config.HNFeed, NbPosts: w.config.HNLimit}
ids, err := news.GetCodesStory()
if err != nil {
return nil, err
}
posts, err := news.GetPostStory(ids)
if err != nil {
return nil, err
}
2019-11-21 16:59:52 +00:00
var stories hn.Items
2019-07-15 17:39:40 +00:00
for _, p := range posts {
2019-11-21 16:59:52 +00:00
stories = append(stories, hn.Item{
2019-07-15 17:39:40 +00:00
Title: p.Title,
URL: p.Url,
})
}
return stories, nil
}
// GetWeekly will return the headlines in the last webshit weekly report
2019-11-21 16:59:52 +00:00
func (w *Webshit) GetWeekly() (hn.Items, *time.Time, error) {
fp := gofeed.NewParser()
feed, err := fp.ParseURL("http://n-gate.com/hackernews/index.rss")
if err != nil {
2019-07-15 18:57:23 +00:00
return nil, nil, err
}
if len(feed.Items) <= 0 {
2019-07-15 18:57:23 +00:00
return nil, nil, fmt.Errorf("no webshit weekly found")
}
published := feed.Items[0].PublishedParsed
2019-07-15 18:57:23 +00:00
buf := bytes.NewBufferString(feed.Items[0].Description)
doc, err := goquery.NewDocumentFromReader(buf)
if err != nil {
2019-07-15 18:57:23 +00:00
return nil, nil, err
}
2019-11-21 16:59:52 +00:00
var items hn.Items
doc.Find(".storylink").Each(func(i int, s *goquery.Selection) {
2019-11-21 16:59:52 +00:00
story := hn.Item{
2019-07-15 18:57:23 +00:00
Title: s.Find("a").Text(),
URL: s.SiblingsFiltered(".small").First().Find("a").AttrOr("href", ""),
2019-07-15 18:57:23 +00:00
}
items = append(items, story)
log.Debug().
Str("URL", story.URL).
Str("Title", story.Title).
Msg("Parsed webshit story")
})
2019-07-15 18:57:23 +00:00
return items, published, nil
}
// GetBalances returns the current balance for all known users
// Any unknown user has a default balance on their first bid
2019-07-15 17:39:40 +00:00
func (w *Webshit) GetBalance(user string) int {
q := `select balance from webshit_balances where user=?`
var balance int
err := w.db.Get(&balance, q, user)
if err != nil {
return 100
}
return balance
}
func (w *Webshit) GetScore(user string) int {
q := `select score from webshit_balances where user=?`
var score int
err := w.db.Get(&score, q, user)
if err != nil {
return 0
}
return score
}
func (w *Webshit) GetAllBids() ([]Bid, error) {
var bids []Bid
err := w.db.Select(&bids, `select * from webshit_bids where processed=0`)
if err != nil {
return nil, err
}
return bids, nil
}
func (w *Webshit) GetAllBalances() ([]Balance, error) {
var balances []Balance
err := w.db.Select(&balances, `select * from webshit_balances`)
if err != nil {
return nil, err
}
return balances, nil
}
// Bid allows a user to place a bid on a particular story
func (w *Webshit) Bid(user string, amount int, URL string) (Bid, error) {
2019-07-15 17:39:40 +00:00
bal := w.GetBalance(user)
2019-07-18 18:57:24 +00:00
if amount < 0 {
return Bid{}, fmt.Errorf("cannot bid less than 0")
}
2019-07-15 17:39:40 +00:00
if bal < amount {
return Bid{}, fmt.Errorf("cannot bid more than balance, %d", bal)
2019-07-15 17:39:40 +00:00
}
story, err := w.getStoryByURL(URL)
if err != nil {
return Bid{}, err
2019-07-15 17:39:40 +00:00
}
ts := time.Now().Unix()
2019-07-15 18:57:23 +00:00
tx := w.db.MustBegin()
_, err = tx.Exec(`insert into webshit_bids (user,title,url,bid,placed,processed) values (?,?,?,?,?,0)`,
user, story.Title, story.URL, amount, ts)
2019-07-15 18:57:23 +00:00
if err != nil {
tx.Rollback()
return Bid{}, err
2019-07-15 18:57:23 +00:00
}
q := `insert into webshit_balances (user,balance,score) values (?,?,0)
on conflict(user) do update set balance=?`
_, err = tx.Exec(q, user, bal-amount, bal-amount)
2019-07-15 18:57:23 +00:00
if err != nil {
tx.Rollback()
return Bid{}, err
2019-07-15 18:57:23 +00:00
}
tx.Commit()
2019-07-15 17:39:40 +00:00
return Bid{
User: user,
Title: story.Title,
URL: story.URL,
Placed: ts,
}, err
2019-07-15 17:39:40 +00:00
}
// getStoryByURL scrapes the URL for a title
2019-11-21 16:59:52 +00:00
func (w *Webshit) getStoryByURL(URL string) (hn.Item, error) {
2019-07-15 17:39:40 +00:00
u, err := url.Parse(URL)
if err != nil {
2019-11-21 16:59:52 +00:00
return hn.Item{}, err
2019-07-15 17:39:40 +00:00
}
if u.Host != "news.ycombinator.com" {
2019-11-21 16:59:52 +00:00
return hn.Item{}, fmt.Errorf("expected HN link")
2019-07-15 17:39:40 +00:00
}
2019-11-21 16:59:52 +00:00
id, _ := strconv.Atoi(u.Query().Get("id"))
return hn.GetItem(id)
}
2019-07-15 18:57:23 +00:00
func (w *Webshit) updateScores(results []WeeklyResult) error {
2019-07-15 18:57:23 +00:00
tx := w.db.MustBegin()
for _, res := range results {
if _, err := tx.Exec(`update webshit_balances set score=? where user=?`,
res.Score, res.User); err != nil {
2019-07-15 18:57:23 +00:00
tx.Rollback()
return err
}
}
err := tx.Commit()
return err
}
func wrMapToSlice(wr map[string]WeeklyResult) []WeeklyResult {
var out = []WeeklyResult{}
for _, r := range wr {
out = append(out, r)
}
return out
}