diff --git a/plugins/reaction/naive_bayes.go b/plugins/reaction/naive_bayes.go new file mode 100644 index 0000000..c5f5156 --- /dev/null +++ b/plugins/reaction/naive_bayes.go @@ -0,0 +1,59 @@ +package reaction + +import ( + "encoding/json" + "io/ioutil" + "strings" + + "github.com/cdipaolo/goml/base" + "github.com/cdipaolo/goml/text" + "github.com/rs/zerolog/log" +) + +type MetaData struct { + NClasses uint8 + ClassList []string +} + +type bayesReactor struct { + model *text.NaiveBayes + meta MetaData +} + +func newBayesReactor(jsonPath string) *bayesReactor { + reactor := &bayesReactor{} + f, err := ioutil.ReadFile(jsonPath) + if err != nil { + log.Error().Err(err).Msgf("error reading json") + return reactor + } + var meta MetaData + err = json.Unmarshal(f, &meta) + if err != nil { + log.Error().Err(err).Msgf("error reading json") + return reactor + } + reactor.meta = meta + + stream := make(chan base.TextDatapoint, 100) + //errors := make(chan error) + model := text.NewNaiveBayes(stream, meta.NClasses, base.OnlyWordsAndNumbers) + err = model.RestoreFromFile(strings.TrimSuffix(jsonPath, ".json")) + if err != nil { + log.Error().Err(err).Msgf("error reading json") + return reactor + } + reactor.model = model + + return reactor +} + +// React returns an emojy and probability given an input +func (b *bayesReactor) React(input string) (string, float64) { + if b.model == nil { + return "", 0.0 + } + class, prob := b.model.Probability(input) + emojy := b.meta.ClassList[class] + return emojy, prob +} diff --git a/plugins/reaction/reaction.go b/plugins/reaction/reaction.go index 7506031..81d71fb 100644 --- a/plugins/reaction/reaction.go +++ b/plugins/reaction/reaction.go @@ -3,12 +3,12 @@ package reaction import ( - "math/rand" "strings" "github.com/rs/zerolog/log" "github.com/chrissexton/sentiment" + "github.com/velour/catbase/bot" "github.com/velour/catbase/bot/msg" "github.com/velour/catbase/config" @@ -19,6 +19,7 @@ type ReactionPlugin struct { config *config.Config model sentiment.Models + br *bayesReactor } func New(b bot.Bot) *ReactionPlugin { @@ -26,35 +27,31 @@ func New(b bot.Bot) *ReactionPlugin { if err != nil { log.Fatal().Err(err).Msg("Couldn't restore sentiment model") } + c := b.Config() + path := c.GetString("reaction.modelpath", "emojy.model.json") rp := &ReactionPlugin{ bot: b, - config: b.Config(), + config: c, model: model, + br: newBayesReactor(path), } b.Register(rp, bot.Message, rp.message) return rp } func (p *ReactionPlugin) message(c bot.Connector, kind bot.Kind, message msg.Message, args ...interface{}) bool { - chance := p.config.GetFloat64("Reaction.GeneralChance", 0.01) - if rand.Float64() < chance { - analysis := p.model.SentimentAnalysis(message.Body, sentiment.English) + emojy, prob := p.br.React(message.Body) + target := p.config.GetFloat64("reaction.confidence", 0.5) - log.Debug(). - Uint8("score", analysis.Score). - Str("body", message.Body). - Msg("sentiment of statement") + log.Debug(). + Float64("prob", prob). + Float64("target", target). + Bool("accept", prob > target). + Str("emojy", emojy). + Msgf("Reaction check") - var reactions []string - if analysis.Score > 0 { - reactions = p.config.GetArray("Reaction.PositiveReactions", []string{}) - } else { - reactions = p.config.GetArray("Reaction.NegativeReactions", []string{}) - } - - reaction := reactions[rand.Intn(len(reactions))] - - p.bot.Send(c, bot.Reaction, message.Channel, reaction, message) + if prob > target { + p.bot.Send(c, bot.Reaction, message.Channel, emojy, message) } p.checkReactions(c, message) diff --git a/util/eval_emojy/main.go b/util/eval_emojy/main.go new file mode 100644 index 0000000..c4bd523 --- /dev/null +++ b/util/eval_emojy/main.go @@ -0,0 +1,58 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "io/ioutil" + "os" + "strings" + + "github.com/cdipaolo/goml/base" + "github.com/cdipaolo/goml/text" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" +) + +type MetaData struct { + NClasses uint8 + ClassList []string +} + +func main() { + log.Logger = log.With().Caller().Stack().Logger() + log.Logger = log.Logger.Output(zerolog.ConsoleWriter{Out: os.Stderr}) + + jsonPath := flag.String("path", "", "path to model JSON") + + flag.Parse() + if *jsonPath == "" { + fmt.Fprintf(os.Stderr, "You must provide a model path.\n\n") + flag.Usage() + os.Exit(1) + } + + input := strings.Join(flag.Args(), " ") + + f, err := ioutil.ReadFile(*jsonPath) + if err != nil { + log.Fatal().Err(err).Msgf("error reading json") + } + var meta MetaData + err = json.Unmarshal(f, &meta) + if err != nil { + log.Fatal().Err(err).Msgf("error reading json") + } + + stream := make(chan base.TextDatapoint, 100) + //errors := make(chan error) + model := text.NewNaiveBayes(stream, meta.NClasses, base.OnlyWordsAndNumbers) + err = model.RestoreFromFile(strings.TrimSuffix(*jsonPath, ".json")) + if err != nil { + log.Fatal().Err(err).Msgf("error reading json") + } + + class, prob := model.Probability(input) + emojy := meta.ClassList[class] + fmt.Printf("%s: %s (%.2f)\n", input, emojy, prob) +} diff --git a/util/learn_emojy/main.go b/util/learn_emojy/main.go new file mode 100644 index 0000000..ca9d7b4 --- /dev/null +++ b/util/learn_emojy/main.go @@ -0,0 +1,185 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/cdipaolo/goml/base" + "github.com/cdipaolo/goml/text" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" +) + +type logEntry struct { + Who string + Author string + Body string + Emojy string +} + +type logs []logEntry + +type emojySet map[string]bool + +type MetaData struct { + NClasses uint8 + ClassList []string +} + +func main() { + log.Logger = log.With().Caller().Stack().Logger() + log.Logger = log.Logger.Output(zerolog.ConsoleWriter{Out: os.Stderr}) + + logDir := flag.String("path", "", "path to logs") + outFile := flag.String("out", "emojy.model", "path to store model") + + flag.Parse() + if *logDir == "" { + fmt.Fprintf(os.Stderr, "You must provide a log path.\n\n") + flag.Usage() + os.Exit(1) + } + + logs, classes := prepLogs(*logDir) + model, meta := bayes(logs, classes) + err := model.PersistToFile(*outFile) + if err != nil { + log.Fatal().Err(err).Msgf("failed to save model") + } + metaJSON, err := json.Marshal(meta) + if err != nil { + log.Fatal().Err(err).Msgf("failed to save model") + } + err = ioutil.WriteFile(*outFile+".json", metaJSON, 0666) + if err != nil { + log.Fatal().Err(err).Msgf("failed to save model") + } +} + +var re = regexp.MustCompile(`(?i)^\[.+\] <(?P[[:punct:][:alnum:]]+)> reacted to (?P[[:punct:][:alnum:]]+): (?P.+) with :(?P[[:punct:][:alnum:]]+):$`) + +func prepLogs(path string) (logs, emojySet) { + entries := logs{} + emojies := emojySet{} + err := filepath.Walk(path, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + tmp, err := ioutil.ReadFile(path) + content := string(tmp) + if err != nil { + return err + } + for _, line := range strings.Split(content, "\n") { + if strings.Contains(line, "unknown event") { + continue + } + if !re.MatchString(line) { + continue + } + entry := parseEntry(line) + emojies[entry.Emojy] = true + log.Debug(). + Interface("entry", entry). + Str("line", line). + Msgf("Found emojy reaction entry") + entries = append(entries, entry) + } + return nil + }) + if err != nil { + log.Fatal().Msgf("Error walking: %s", err) + } + return entries, emojies +} + +func parseEntry(content string) logEntry { + out := logEntry{} + subs := re.FindStringSubmatch(content) + if len(subs) == 0 { + return out + } + for i, n := range re.SubexpNames() { + switch n { + case "Who": + out.Who = subs[i] + case "Author": + out.Author = subs[i] + case "Body": + out.Body = subs[i] + case "Emojy": + out.Emojy = subs[i] + } + } + return out +} + +func bayes(logs logs, classes emojySet) (*text.NaiveBayes, MetaData) { + // create the channel of data and errors + stream := make(chan base.TextDatapoint, 100) + errors := make(chan error) + + nClasses := uint8(len(classes)) + + classMap := map[string]uint8{} + classList := []string{} + for k, _ := range classes { + classList = append(classList, k) + classMap[k] = uint8(len(classList) - 1) + } + + log.Debug().Strs("classList", classList).Interface("classMap", classMap).Int("nLogs", len(logs)).Msgf("about to train") + + // make a new NaiveBayes model with + // 2 classes expected (classes in + // datapoints will now expect {0,1}. + // in general, given n as the classes + // variable, the model will expect + // datapoint classes in {0,...,n-1}) + // + // Note that the model is filtering + // the text to omit anything except + // words and numbers (and spaces + // obviously) + model := text.NewNaiveBayes(stream, nClasses, base.OnlyWordsAndNumbers) + go model.OnlineLearn(errors) + + for _, l := range logs { + stream <- base.TextDatapoint{ + X: l.Body, + Y: classMap[l.Emojy], + } + } + + close(stream) + for { + err := <-errors + if err != nil { + log.Error().Err(err).Msg("Error passed") + } else { + // training is done! + break + } + } + // now you can predict like normal + in := "Should work properly once that number of documents increases." + class := model.Predict(in) // 0 + emojy := classList[class] + log.Debug().Msgf("Class prediction for %s: %v", in, emojy) + + meta := MetaData{ + NClasses: nClasses, + ClassList: classList, + } + + return model, meta +}