2019-03-22 00:12:15 +00:00
|
|
|
package tldr
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"strings"
|
2019-03-26 21:51:28 +00:00
|
|
|
"time"
|
2019-03-22 00:12:15 +00:00
|
|
|
|
|
|
|
"github.com/velour/catbase/bot"
|
|
|
|
"github.com/velour/catbase/bot/msg"
|
|
|
|
|
|
|
|
"github.com/rs/zerolog/log"
|
|
|
|
|
|
|
|
"github.com/james-bowman/nlp"
|
|
|
|
)
|
|
|
|
|
|
|
|
type TLDRPlugin struct {
|
2019-03-26 21:51:28 +00:00
|
|
|
bot bot.Bot
|
|
|
|
history []history
|
|
|
|
index int
|
|
|
|
lastRequest time.Time
|
|
|
|
}
|
|
|
|
|
|
|
|
type history struct {
|
|
|
|
timestamp time.Time
|
|
|
|
user string
|
|
|
|
body string
|
2019-03-22 00:12:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func New(b bot.Bot) *TLDRPlugin {
|
|
|
|
plugin := &TLDRPlugin{
|
2019-03-26 21:51:28 +00:00
|
|
|
bot: b,
|
|
|
|
history: []history{},
|
|
|
|
index: 0,
|
|
|
|
lastRequest: time.Now().Add(-24 * time.Hour),
|
2019-03-22 00:12:15 +00:00
|
|
|
}
|
|
|
|
b.Register(plugin, bot.Message, plugin.message)
|
|
|
|
b.Register(plugin, bot.Help, plugin.help)
|
|
|
|
return plugin
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *TLDRPlugin) message(kind bot.Kind, message msg.Message, args ...interface{}) bool {
|
2019-03-26 21:51:28 +00:00
|
|
|
timeLimit := time.Duration(p.bot.Config().GetInt("TLDR.HourLimit", 1))
|
2019-03-22 00:12:15 +00:00
|
|
|
lowercaseMessage := strings.ToLower(message.Body)
|
2019-03-26 21:51:28 +00:00
|
|
|
if lowercaseMessage == "tl;dr" && p.lastRequest.After(time.Now().Add(-timeLimit*time.Hour)) {
|
|
|
|
p.bot.Send(bot.Message, message.Channel, "Slow down, cowboy. Read that tiny backlog.")
|
|
|
|
return true
|
|
|
|
} else if lowercaseMessage == "tl;dr" {
|
|
|
|
p.lastRequest = time.Now()
|
|
|
|
nTopics := p.bot.Config().GetInt("TLDR.Topics", 5)
|
2019-03-22 00:12:15 +00:00
|
|
|
|
2019-03-28 21:38:04 +00:00
|
|
|
stopWordSlice := p.bot.Config().GetArray("TLDR.StopWords", []string{})
|
|
|
|
if len(stopWordSlice) == 0 {
|
|
|
|
stopWordSlice = THESE_ARE_NOT_THE_WORDS_YOU_ARE_LOOKING_FOR
|
|
|
|
p.bot.Config().SetArray("TLDR.StopWords", stopWordSlice)
|
|
|
|
}
|
|
|
|
|
|
|
|
vectoriser := nlp.NewCountVectoriser(stopWordSlice...)
|
2019-03-22 00:12:15 +00:00
|
|
|
lda := nlp.NewLatentDirichletAllocation(nTopics)
|
|
|
|
pipeline := nlp.NewPipeline(vectoriser, lda)
|
2019-03-26 21:51:28 +00:00
|
|
|
docsOverTopics, err := pipeline.FitTransform(p.getTopics()...)
|
2019-03-22 00:12:15 +00:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
log.Error().Err(err)
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2019-03-22 01:46:28 +00:00
|
|
|
bestScores := make([][]float64, nTopics)
|
2019-03-26 21:51:28 +00:00
|
|
|
bestDocs := make([][]history, nTopics)
|
2019-03-22 01:46:28 +00:00
|
|
|
|
2019-03-26 21:51:28 +00:00
|
|
|
supportingDocs := p.bot.Config().GetInt("TLDR.Support", 3)
|
2019-03-22 02:29:10 +00:00
|
|
|
for i := 0; i < nTopics; i++ {
|
2019-03-22 01:46:28 +00:00
|
|
|
bestScores[i] = make([]float64, supportingDocs)
|
2019-03-26 21:51:28 +00:00
|
|
|
bestDocs[i] = make([]history, supportingDocs)
|
2019-03-22 01:46:28 +00:00
|
|
|
}
|
2019-03-22 00:12:15 +00:00
|
|
|
|
|
|
|
dr, dc := docsOverTopics.Dims()
|
2019-03-22 01:46:28 +00:00
|
|
|
for topic := 0; topic < dr; topic++ {
|
|
|
|
minScore, minIndex := min(bestScores[topic])
|
|
|
|
|
|
|
|
for doc := 0; doc < dc; doc++ {
|
2019-03-22 00:12:15 +00:00
|
|
|
score := docsOverTopics.At(topic, doc)
|
2019-03-22 01:46:28 +00:00
|
|
|
if score > minScore {
|
|
|
|
bestScores[topic][minIndex] = score
|
2019-03-26 21:51:28 +00:00
|
|
|
bestDocs[topic][minIndex] = p.history[doc]
|
2019-03-22 01:46:28 +00:00
|
|
|
minScore, minIndex = min(bestScores[topic])
|
2019-03-22 00:12:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
topicsOverWords := lda.Components()
|
|
|
|
tr, tc := topicsOverWords.Dims()
|
|
|
|
|
|
|
|
vocab := make([]string, len(vectoriser.Vocabulary))
|
|
|
|
for k, v := range vectoriser.Vocabulary {
|
|
|
|
vocab[v] = k
|
|
|
|
}
|
|
|
|
|
|
|
|
response := "Here you go captain 'too good to read backlog':\n"
|
|
|
|
|
|
|
|
for topic := 0; topic < tr; topic++ {
|
2019-03-22 01:36:11 +00:00
|
|
|
bestScore := -1.
|
|
|
|
bestTopic := ""
|
2019-03-22 00:12:15 +00:00
|
|
|
for word := 0; word < tc; word++ {
|
|
|
|
score := topicsOverWords.At(topic, word)
|
2019-03-22 01:36:11 +00:00
|
|
|
if score > bestScore {
|
|
|
|
bestScore = score
|
|
|
|
bestTopic = vocab[word]
|
2019-03-22 00:12:15 +00:00
|
|
|
}
|
|
|
|
}
|
2019-03-26 23:31:09 +00:00
|
|
|
response += fmt.Sprintf("\n*Topic #%d: %s*\n", topic, bestTopic)
|
2019-03-22 01:46:28 +00:00
|
|
|
for i := range bestDocs[topic] {
|
2019-03-26 23:31:09 +00:00
|
|
|
response += fmt.Sprintf("<%s>%s [%f]\n", bestDocs[topic][i].user, bestDocs[topic][i].body, bestScores[topic][i])
|
2019-03-22 01:46:28 +00:00
|
|
|
}
|
2019-03-26 23:31:09 +00:00
|
|
|
|
2019-03-22 00:12:15 +00:00
|
|
|
}
|
|
|
|
|
2019-03-26 21:51:28 +00:00
|
|
|
p.bot.Send(bot.Message, message.Channel, response)
|
2019-03-22 00:12:15 +00:00
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
if shouldKeepMessage(lowercaseMessage) {
|
2019-03-26 21:51:28 +00:00
|
|
|
currentHistorySize := len(p.history)
|
|
|
|
maxHistorySize := p.bot.Config().GetInt("TLDR.HistorySize", 1000)
|
|
|
|
hist := history{
|
|
|
|
body: lowercaseMessage,
|
|
|
|
user: message.User.Name,
|
|
|
|
timestamp: time.Now(),
|
|
|
|
}
|
2019-03-22 00:12:15 +00:00
|
|
|
if currentHistorySize < maxHistorySize {
|
2019-03-26 21:51:28 +00:00
|
|
|
p.history = append(p.history, hist)
|
|
|
|
p.index = 0
|
2019-03-22 00:12:15 +00:00
|
|
|
} else {
|
|
|
|
if currentHistorySize > maxHistorySize {
|
|
|
|
// We could resize this but we want to prune the oldest stuff, and
|
|
|
|
// I don't care to do this correctly so might as well not do it at all
|
|
|
|
}
|
|
|
|
|
2019-03-26 21:51:28 +00:00
|
|
|
if p.index >= currentHistorySize {
|
|
|
|
p.index = 0
|
2019-03-22 00:12:15 +00:00
|
|
|
}
|
|
|
|
|
2019-03-26 21:51:28 +00:00
|
|
|
p.history[p.index] = hist
|
|
|
|
p.index++
|
2019-03-22 00:12:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2019-03-26 21:51:28 +00:00
|
|
|
func (p *TLDRPlugin) getTopics() []string {
|
|
|
|
hist := []string{}
|
|
|
|
for _, h := range p.history {
|
|
|
|
hist = append(hist, h.body)
|
|
|
|
}
|
|
|
|
return hist
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *TLDRPlugin) pruneHistory() {
|
|
|
|
out := []history{}
|
|
|
|
yesterday := time.Now().Add(-24 * time.Hour)
|
|
|
|
for _, h := range p.history {
|
|
|
|
if yesterday.Before(h.timestamp) {
|
|
|
|
out = append(out, h)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
p.history = out
|
2019-03-26 21:55:07 +00:00
|
|
|
p.index = len(out)
|
2019-03-26 21:51:28 +00:00
|
|
|
}
|
|
|
|
|
2019-03-22 00:12:15 +00:00
|
|
|
// Help responds to help requests. Every plugin must implement a help function.
|
|
|
|
func (p *TLDRPlugin) help(kind bot.Kind, message msg.Message, args ...interface{}) bool {
|
2019-03-26 21:51:28 +00:00
|
|
|
p.bot.Send(bot.Message, message.Channel, "tl;dr")
|
2019-03-22 00:12:15 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
func shouldKeepMessage(message string) bool {
|
|
|
|
return true
|
|
|
|
}
|
2019-03-22 01:46:28 +00:00
|
|
|
|
|
|
|
func min(slice []float64) (float64, int) {
|
|
|
|
minVal := 1.
|
|
|
|
minIndex := -1
|
|
|
|
for index, val := range slice {
|
|
|
|
if val < minVal {
|
|
|
|
minVal = val
|
|
|
|
minIndex = index
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return minVal, minIndex
|
|
|
|
}
|