1
0
mirror of https://github.com/velour/catbase.git synced 2025-04-03 19:51:42 +00:00
catbase/util/testhn/main.go
Chris Sexton 905da629b9 bids: get a vote:comment ratio for scoring
* Ratio defaults to 1.0 on error
* Ratio bonus if a 0 comment article wins
* Remove buggy tests
* Add example scraping util for debugging
2019-11-14 10:15:39 -05:00

50 lines
1.0 KiB
Go

package main
import (
"flag"
"fmt"
"strconv"
"strings"
"github.com/gocolly/colly"
)
var url = flag.String("url", "https://news.ycombinator.com/item?id=21530860", "URL to scrape")
func main() {
flag.Parse()
//scrapeScoreAndComments(*url, func(score, comments int) {
// fmt.Printf("Finished scraping %s\nScore: %d, Comments: %d\n",
// *url, score, comments)
//})
score, comments := scrapeScoreAndComments(*url)
fmt.Printf("Finished scraping %s\nScore: %d, Comments: %d\n",
*url, score, comments)
}
func scrapeScoreAndComments(url string) (int, int) {
c := colly.NewCollector()
c.Async = true
finished := make(chan bool)
score := 0
comments := 0
c.OnHTML("td.subtext > span.score", func(r *colly.HTMLElement) {
score, _ = strconv.Atoi(strings.Fields(r.Text)[0])
})
c.OnHTML("td.subtext > a[href*='item?id=']:last-of-type", func(r *colly.HTMLElement) {
comments, _ = strconv.Atoi(strings.Fields(r.Text)[0])
})
c.OnScraped(func(r *colly.Response) {
finished <- true
})
c.Visit(url)
<-finished
return score, comments
}