catbase/plugins/tldr/tldr.go

package tldr

import (
	"fmt"
	"strings"

	"github.com/velour/catbase/bot"
	"github.com/velour/catbase/bot/msg"

	"github.com/rs/zerolog/log"

	"github.com/james-bowman/nlp"
)

var (
	THESE_ARE_NOT_THE_WORDS_YOU_ARE_LOOKING_FOR = []string{"p", "z", "i", "c", "e", "s", "x", "n", "b", "t", "d", "m", "r", "a", "f", "l", "w", "o", "g", "h", "v", "k", "y", "j", "u", "q", "th", "wu", "qt", "so", "ru", "pm", "in", "is", "am", "me", "on", "by", "kw", "hu", "bg", "ob", "re", "wx", "go", "hl", "vc", "bl", "rg", "wr", "cw", "pj", "tf", "nr", "aw", "qc", "it", "cj", "or", "ty", "hk", "be", "wc", "de", "lf", "mj", "bw", "at", "as", "gd", "ww", "ko", "og", "gg", "cz", "an", "mh", "we", "rb", "mv", "uk", "wt", "us", "hq", "if", "mu", "pn", "js", "my", "ol", "ul", "io", "lm", "do", "cd", "fo", "no", "vg", "lu", "dg", "zu", "sv", "wn", "fu", "dk", "tv", "la", "sn", "wb", "pc", "he", "pk", "ii", "wm", "up", "bo", "ca", "fd", "uh", "hh", "al", "id", "bd", "uw", "co", "pf", "ez", "df", "ro", "et", "dh", "ui", "gl", "st", "rl", "ev", "jj", "fp", "hc", "en", "eh", "rp", "ka", "rj", "bm", "oh", "tb", "ix", "ad", "cg", "ny", "rn", "cn", "dc", "vp", "jm", "tp", "om", "ok", "ms", "wp", "hi", "aj", "oc", "sq", "hp", "yu", "sk", "dx", "eg", "ip", "bk", "hz", "pa", "fg", "rh", "tx", "ve", "za", "ht", "ie", "el", "ma", "xi", "ou", "dp", "nu", "mw", "mf", "md", "fl", "mb", "mr", "ld", "uc", "il", "ln", "mm", "ur", "ed", "pd", "le", "jc", "az", "un", "mi", "dm", "wy", "jd", "oe", "to", "pb", "dr", "kb", "pp", "na", "rx", "os", "nb", "yn", "ci", "gc", "ex", "dt", "au", "fi", "np", "nc", "po", "va", "rd", "sc", "ws", "cu", "se", "di", "km", "ga", "ac", "ft", "lc", "fa", "im", "vs", "ar", "mo", "sa", "sg", "uv", "xp", "je", "eq", "lt", "eu", "cc", "wa", "dj", "ls", "cm", "wi", "dl", "ct", "fx", "yo", "da", "vb", "of", "nj", "hr", "em", "iv", "nn", "rw", "fs", "ye", "um", "ni", "ne", "du", "oo", "bp", "gs", "fw", "nt", "es", "fc", "ti", "cb", "cv", "gb", "bc", "pr", "fr", "aa", "mt", "ir", "gp", "oz", "mg", "tc", "hb", "sl", "af", "bt", "ch", "sd", "jp", "lb", "rs", "ep", "ef", "rr", "fy", "tu", "dv", "xl", "ss", "tt", "ap", "nm", "mn", "nd", "pe", "op", "ng", "tn", "ge", "ts", "gr", "ce", "mx", "ab", "ic", "yr", "ot", "ai", "pi", "rv", "hs", "ae", "tm", "sp", "sh", "gt", "nh", "ho", "cl", "ll", "fm", "gi", "ta", "db", "ph", "ia", "pt", "bi", "ha", "ds", "ea", "lg", "bs", "ja", "ns", "wv", "nw", "sm", "ff", "ah", "sb", "td", "fe", "ak", "rf", "ps", "ky", "pl", "br", "lo", "ml", "dd", "cp", "cs", "rt", "ri", "gm", "sf", "kg", "ut", "si", "mc", "vt", "lp", "cf", "rm", "ag", "vi", "ec", "ba", "rc", "cr", "pg", "ee", "ra", "ks", "sw", "av", "te", "hd", "nz", "bb", "er", "jr", "tr", "nv", "ya", "nl", "li", "su", "mp", "sr", "ted", "bid", "can", "the", "nat", "car", "wan", "dig", "neo", "enb", "pvc", "dod", "fri", "dvd", "cia", "tex", "wed", "une", "how", "inn", "lid", "mia", "ltd", "los", "are", "yen", "cho", "dui", "inc", "win", "col", "upc", "bed", "dsc", "ste", "aye", "nhs", "dow", "tue", "cio", "ooo", "cas", "thu", "sea", "cut", "mpg", "rrp", "tel", "its", "ips", "pts", "own", "kit", "mug", "has", "sku", "nbc", "dip", "acm", "boy", "end", "ids", "him", "est", "son", "ict", "mac", "iii", "gmt", "max", "per", "xml", "big", "bin", "law", "sap", "ala", "art", "cir", "lip", "bat", "top", "eco", "sol", "van", "had", "buf", "rip", "ads", "usa", "wma", "seq", "pop", "int", "rid", "rna", "sim", "abs", "hit", "but", "wal", "ati", "doe", "eye", "geo", "old", "arg", "usb", "uni", "php", "etc", "diy", "leo", "tgp", "mud", "msn", "fee", "rpg", "las", "ide", "sic", "min", "aid", "avi", "ons", "non", "mel", "div", "ppc", "day", "fat", "saw", "cet", "cow", "mls", "pst", "why", "phi", "bra", "mae", "tom", "fin", "sub", "irc", "gpl", "led", "fan", "low", "ten", "gif", "ate", "man", "cat", "die", "ton", "tmp", "rec", "two", "ddr", "our", "gsm", "pet", "guy", "dev", "cup", "vol", "one", "you", "mag", "dee", "pit", "mba", "lee", "job", "boc", "pmc", "cfr", "bee", "vii", "llp", "too", "tap", "for", "bob", "fit", "men", "met", "mem", "por", "www", "cgi", "soa", "jvc", "tft", "ccd", "liz", "ice", "dat", "ali", "box", "llc", "sec", "bus", "ash", "bag", "gay", "all", "tub", "sox", "ibm", "sas", "gig", "qui", "pty", "dns", "air", "nor", "bug", "mid", "pas", "icq"
)

type TLDRPlugin struct {
	Bot     bot.Bot
	History []string
	Index   int
}

func New(b bot.Bot) *TLDRPlugin {
	plugin := &TLDRPlugin{
		Bot:     b,
		History: []string{},
		Index:   0,
	}
	b.Register(plugin, bot.Message, plugin.message)
	b.Register(plugin, bot.Help, plugin.help)
	return plugin
}

func (p *TLDRPlugin) message(kind bot.Kind, message msg.Message, args ...interface{}) bool {
	lowercaseMessage := strings.ToLower(message.Body)
	if lowercaseMessage == "tl;dr" {
		for _, str := range p.History {
			fmt.Println(str)
		}

		nTopics := p.Bot.Config().GetInt("TLDR.Topics", 5)

		vectoriser := nlp.NewCountVectoriser(THESE_ARE_NOT_THE_WORDS_YOU_ARE_LOOKING_FOR...)
		lda := nlp.NewLatentDirichletAllocation(nTopics)
		pipeline := nlp.NewPipeline(vectoriser, lda)
		docsOverTopics, err := pipeline.FitTransform(p.History...)

		if err != nil {
			log.Error().Err(err)
			return false
		}

		bestScores := make([]float64, nTopics)
		bestDocs := make([]string, nTopics)

		dr, dc := docsOverTopics.Dims()
		for doc := 0; doc < dc; doc++ {
			for topic := 0; topic < dr; topic++ {
				score := docsOverTopics.At(topic, doc)
				if score > bestScores[topic] {
					bestScores[topic] = score
					bestDocs[topic] = p.History[doc]
				}
			}
		}

		topicsOverWords := lda.Components()
		tr, tc := topicsOverWords.Dims()

		vocab := make([]string, len(vectoriser.Vocabulary))
		for k, v := range vectoriser.Vocabulary {
			vocab[v] = k
		}

		response := "Here you go captain 'too good to read backlog':\n"

		for topic := 0; topic < tr; topic++ {
			max := -1.
			best := ""
			for word := 0; word < tc; word++ {
				score := topicsOverWords.At(topic, word)
				if score > max {
					max = score
					best = vocab[word]
				}
			}
			response += fmt.Sprintf("Topic #%d : %s\n", topic, best)
			response += fmt.Sprintf("\t%s\n", bestDocs[topic])
		}

		p.Bot.Send(bot.Message, message.Channel, response)

		return true
	}

	if shouldKeepMessage(lowercaseMessage) {
		currentHistorySize := len(p.History)
		maxHistorySize := p.Bot.Config().GetInt("TLDR.HistorySize", 1000)
		if currentHistorySize < maxHistorySize {
			p.History = append(p.History, lowercaseMessage)
			p.Index = 0
		} else {
			if currentHistorySize > maxHistorySize {
				// We could resize this but we want to prune the oldest stuff, and
				// I don't care to do this correctly so might as well not do it at all
			}

			if p.Index >= currentHistorySize {
				p.Index = 0
			}

			p.History[p.Index] = lowercaseMessage
			p.Index++
		}
	}
	return false
}

// Help responds to help requests. Every plugin must implement a help function.
func (p *TLDRPlugin) help(kind bot.Kind, message msg.Message, args ...interface{}) bool {
	p.Bot.Send(bot.Message, message.Channel, "tl;dr")
	return true
}

func shouldKeepMessage(message string) bool {
	return true
}
TL;DR Latent Dirichlet Allocation summarizer 2019-03-22 00:12:15 +00:00			`package tldr`

			`import (`
			`"fmt"`
			`"strings"`

			`"github.com/velour/catbase/bot"`
			`"github.com/velour/catbase/bot/msg"`

			`"github.com/rs/zerolog/log"`

			`"github.com/james-bowman/nlp"`
			`)`

			`var (`
			THESE_ARE_NOT_THE_WORDS_YOU_ARE_LOOKING_FOR = []string{"p", "z", "i", "c", "e", "s", "x", "n", "b", "t", "d", "m", "r", "a", "f", "l", "w", "o", "g", "h", "v", "k", "y", "j", "u", "q", "th", "wu", "qt", "so", "ru", "pm", "in", "is", "am", "me", "on", "by", "kw", "hu", "bg", "ob", "re", "wx", "go", "hl", "vc", "bl", "rg", "wr", "cw", "pj", "tf", "nr", "aw", "qc", "it", "cj", "or", "ty", "hk", "be", "wc", "de", "lf", "mj", "bw", "at", "as", "gd", "ww", "ko", "og", "gg", "cz", "an", "mh", "we", "rb", "mv", "uk", "wt", "us", "hq", "if", "mu", "pn", "js", "my", "ol", "ul", "io", "lm", "do", "cd", "fo", "no", "vg", "lu", "dg", "zu", "sv", "wn", "fu", "dk", "tv", "la", "sn", "wb", "pc", "he", "pk", "ii", "wm", "up", "bo", "ca", "fd", "uh", "hh", "al", "id", "bd", "uw", "co", "pf", "ez", "df", "ro", "et", "dh", "ui", "gl", "st", "rl", "ev", "jj", "fp", "hc", "en", "eh", "rp", "ka", "rj", "bm", "oh", "tb", "ix", "ad", "cg", "ny", "rn", "cn", "dc", "vp", "jm", "tp", "om", "ok", "ms", "wp", "hi", "aj", "oc", "sq", "hp", "yu", "sk", "dx", "eg", "ip", "bk", "hz", "pa", "fg", "rh", "tx", "ve", "za", "ht", "ie", "el", "ma", "xi", "ou", "dp", "nu", "mw", "mf", "md", "fl", "mb", "mr", "ld", "uc", "il", "ln", "mm", "ur", "ed", "pd", "le", "jc", "az", "un", "mi", "dm", "wy", "jd", "oe", "to", "pb", "dr", "kb", "pp", "na", "rx", "os", "nb", "yn", "ci", "gc", "ex", "dt", "au", "fi", "np", "nc", "po", "va", "rd", "sc", "ws", "cu", "se", "di", "km", "ga", "ac", "ft", "lc", "fa", "im", "vs", "ar", "mo", "sa", "sg", "uv", "xp", "je", "eq", "lt", "eu", "cc", "wa", "dj", "ls", "cm", "wi", "dl", "ct", "fx", "yo", "da", "vb", "of", "nj", "hr", "em", "iv", "nn", "rw", "fs", "ye", "um", "ni", "ne", "du", "oo", "bp", "gs", "fw", "nt", "es", "fc", "ti", "cb", "cv", "gb", "bc", "pr", "fr", "aa", "mt", "ir", "gp", "oz", "mg", "tc", "hb", "sl", "af", "bt", "ch", "sd", "jp", "lb", "rs", "ep", "ef", "rr", "fy", "tu", "dv", "xl", "ss", "tt", "ap", "nm", "mn", "nd", "pe", "op", "ng", "tn", "ge", "ts", "gr", "ce", "mx", "ab", "ic", "yr", "ot", "ai", "pi", "rv", "hs", "ae", "tm", "sp", "sh", "gt", "nh", "ho", "cl", "ll", "fm", "gi", "ta", "db", "ph", "ia", "pt", "bi", "ha", "ds", "ea", "lg", "bs", "ja", "ns", "wv", "nw", "sm", "ff", "ah", "sb", "td", "fe", "ak", "rf", "ps", "ky", "pl", "br", "lo", "ml", "dd", "cp", "cs", "rt", "ri", "gm", "sf", "kg", "ut", "si", "mc", "vt", "lp", "cf", "rm", "ag", "vi", "ec", "ba", "rc", "cr", "pg", "ee", "ra", "ks", "sw", "av", "te", "hd", "nz", "bb", "er", "jr", "tr", "nv", "ya", "nl", "li", "su", "mp", "sr", "ted", "bid", "can", "the", "nat", "car", "wan", "dig", "neo", "enb", "pvc", "dod", "fri", "dvd", "cia", "tex", "wed", "une", "how", "inn", "lid", "mia", "ltd", "los", "are", "yen", "cho", "dui", "inc", "win", "col", "upc", "bed", "dsc", "ste", "aye", "nhs", "dow", "tue", "cio", "ooo", "cas", "thu", "sea", "cut", "mpg", "rrp", "tel", "its", "ips", "pts", "own", "kit", "mug", "has", "sku", "nbc", "dip", "acm", "boy", "end", "ids", "him", "est", "son", "ict", "mac", "iii", "gmt", "max", "per", "xml", "big", "bin", "law", "sap", "ala", "art", "cir", "lip", "bat", "top", "eco", "sol", "van", "had", "buf", "rip", "ads", "usa", "wma", "seq", "pop", "int", "rid", "rna", "sim", "abs", "hit", "but", "wal", "ati", "doe", "eye", "geo", "old", "arg", "usb", "uni", "php", "etc", "diy", "leo", "tgp", "mud", "msn", "fee", "rpg", "las", "ide", "sic", "min", "aid", "avi", "ons", "non", "mel", "div", "ppc", "day", "fat", "saw", "cet", "cow", "mls", "pst", "why", "phi", "bra", "mae", "tom", "fin", "sub", "irc", "gpl", "led", "fan", "low", "ten", "gif", "ate", "man", "cat", "die", "ton", "tmp", "rec", "two", "ddr", "our", "gsm", "pet", "guy", "dev", "cup", "vol", "one", "you", "mag", "dee", "pit", "mba", "lee", "job", "boc", "pmc", "cfr", "bee", "vii", "llp", "too", "tap", "for", "bob", "fit", "men", "met", "mem", "por", "www", "cgi", "soa", "jvc", "tft", "ccd", "liz", "ice", "dat", "ali", "box", "llc", "sec", "bus", "ash", "bag", "gay", "all", "tub", "sox", "ibm", "sas", "gig", "qui", "pty", "dns", "air", "nor", "bug", "mid", "pas", "icq"
			`)`

			`type TLDRPlugin struct {`
			`Bot bot.Bot`
			`History []string`
			`Index int`
			`}`

			`func New(b bot.Bot) *TLDRPlugin {`
			`plugin := &TLDRPlugin{`
			`Bot: b,`
			`History: []string{},`
			`Index: 0,`
			`}`
			`b.Register(plugin, bot.Message, plugin.message)`
			`b.Register(plugin, bot.Help, plugin.help)`
			`return plugin`
			`}`

			`func (p *TLDRPlugin) message(kind bot.Kind, message msg.Message, args ...interface{}) bool {`
			`lowercaseMessage := strings.ToLower(message.Body)`
			`if lowercaseMessage == "tl;dr" {`
			`for _, str := range p.History {`
			`fmt.Println(str)`
			`}`

			`nTopics := p.Bot.Config().GetInt("TLDR.Topics", 5)`

			`vectoriser := nlp.NewCountVectoriser(THESE_ARE_NOT_THE_WORDS_YOU_ARE_LOOKING_FOR...)`
			`lda := nlp.NewLatentDirichletAllocation(nTopics)`
			`pipeline := nlp.NewPipeline(vectoriser, lda)`
			`docsOverTopics, err := pipeline.FitTransform(p.History...)`

			`if err != nil {`
			`log.Error().Err(err)`
			`return false`
			`}`

			`bestScores := make([]float64, nTopics)`
			`bestDocs := make([]string, nTopics)`

			`dr, dc := docsOverTopics.Dims()`
			`for doc := 0; doc < dc; doc++ {`
			`for topic := 0; topic < dr; topic++ {`
			`score := docsOverTopics.At(topic, doc)`
			`if score > bestScores[topic] {`
			`bestScores[topic] = score`
			`bestDocs[topic] = p.History[doc]`
			`}`
			`}`
			`}`

			`topicsOverWords := lda.Components()`
			`tr, tc := topicsOverWords.Dims()`

			`vocab := make([]string, len(vectoriser.Vocabulary))`
			`for k, v := range vectoriser.Vocabulary {`
			`vocab[v] = k`
			`}`

			`response := "Here you go captain 'too good to read backlog':\n"`

			`for topic := 0; topic < tr; topic++ {`
			`max := -1.`
			`best := ""`
			`for word := 0; word < tc; word++ {`
			`score := topicsOverWords.At(topic, word)`
			`if score > max {`
			`max = score`
			`best = vocab[word]`
			`}`
			`}`
			`response += fmt.Sprintf("Topic #%d : %s\n", topic, best)`
			`response += fmt.Sprintf("\t%s\n", bestDocs[topic])`
			`}`

			`p.Bot.Send(bot.Message, message.Channel, response)`

			`return true`
			`}`

			`if shouldKeepMessage(lowercaseMessage) {`
			`currentHistorySize := len(p.History)`
			`maxHistorySize := p.Bot.Config().GetInt("TLDR.HistorySize", 1000)`
			`if currentHistorySize < maxHistorySize {`
			`p.History = append(p.History, lowercaseMessage)`
			`p.Index = 0`
			`} else {`
			`if currentHistorySize > maxHistorySize {`
			`// We could resize this but we want to prune the oldest stuff, and`
			`// I don't care to do this correctly so might as well not do it at all`
			`}`

			`if p.Index >= currentHistorySize {`
			`p.Index = 0`
			`}`

			`p.History[p.Index] = lowercaseMessage`
			`p.Index++`
			`}`
			`}`
			`return false`
			`}`

			`// Help responds to help requests. Every plugin must implement a help function.`
			`func (p *TLDRPlugin) help(kind bot.Kind, message msg.Message, args ...interface{}) bool {`
			`p.Bot.Send(bot.Message, message.Channel, "tl;dr")`
			`return true`
			`}`

			`func shouldKeepMessage(message string) bool {`
			`return true`
			`}`