reaction: add emojy language model

This commit is contained in:
Chris Sexton 2020-07-24 12:31:57 -04:00 committed by Chris Sexton
parent 1ff79ce2c9
commit d69d708245
4 changed files with 318 additions and 19 deletions

View File

@ -0,0 +1,59 @@
package reaction
import (
"encoding/json"
"io/ioutil"
"strings"
"github.com/cdipaolo/goml/base"
"github.com/cdipaolo/goml/text"
"github.com/rs/zerolog/log"
)
type MetaData struct {
NClasses uint8
ClassList []string
}
type bayesReactor struct {
model *text.NaiveBayes
meta MetaData
}
func newBayesReactor(jsonPath string) *bayesReactor {
reactor := &bayesReactor{}
f, err := ioutil.ReadFile(jsonPath)
if err != nil {
log.Error().Err(err).Msgf("error reading json")
return reactor
}
var meta MetaData
err = json.Unmarshal(f, &meta)
if err != nil {
log.Error().Err(err).Msgf("error reading json")
return reactor
}
reactor.meta = meta
stream := make(chan base.TextDatapoint, 100)
//errors := make(chan error)
model := text.NewNaiveBayes(stream, meta.NClasses, base.OnlyWordsAndNumbers)
err = model.RestoreFromFile(strings.TrimSuffix(jsonPath, ".json"))
if err != nil {
log.Error().Err(err).Msgf("error reading json")
return reactor
}
reactor.model = model
return reactor
}
// React returns an emojy and probability given an input
func (b *bayesReactor) React(input string) (string, float64) {
if b.model == nil {
return "", 0.0
}
class, prob := b.model.Probability(input)
emojy := b.meta.ClassList[class]
return emojy, prob
}

View File

@ -3,12 +3,12 @@
package reaction
import (
"math/rand"
"strings"
"github.com/rs/zerolog/log"
"github.com/chrissexton/sentiment"
"github.com/velour/catbase/bot"
"github.com/velour/catbase/bot/msg"
"github.com/velour/catbase/config"
@ -19,6 +19,7 @@ type ReactionPlugin struct {
config *config.Config
model sentiment.Models
br *bayesReactor
}
func New(b bot.Bot) *ReactionPlugin {
@ -26,35 +27,31 @@ func New(b bot.Bot) *ReactionPlugin {
if err != nil {
log.Fatal().Err(err).Msg("Couldn't restore sentiment model")
}
c := b.Config()
path := c.GetString("reaction.modelpath", "emojy.model.json")
rp := &ReactionPlugin{
bot: b,
config: b.Config(),
config: c,
model: model,
br: newBayesReactor(path),
}
b.Register(rp, bot.Message, rp.message)
return rp
}
func (p *ReactionPlugin) message(c bot.Connector, kind bot.Kind, message msg.Message, args ...interface{}) bool {
chance := p.config.GetFloat64("Reaction.GeneralChance", 0.01)
if rand.Float64() < chance {
analysis := p.model.SentimentAnalysis(message.Body, sentiment.English)
emojy, prob := p.br.React(message.Body)
target := p.config.GetFloat64("reaction.confidence", 0.5)
log.Debug().
Uint8("score", analysis.Score).
Str("body", message.Body).
Msg("sentiment of statement")
Float64("prob", prob).
Float64("target", target).
Bool("accept", prob > target).
Str("emojy", emojy).
Msgf("Reaction check")
var reactions []string
if analysis.Score > 0 {
reactions = p.config.GetArray("Reaction.PositiveReactions", []string{})
} else {
reactions = p.config.GetArray("Reaction.NegativeReactions", []string{})
}
reaction := reactions[rand.Intn(len(reactions))]
p.bot.Send(c, bot.Reaction, message.Channel, reaction, message)
if prob > target {
p.bot.Send(c, bot.Reaction, message.Channel, emojy, message)
}
p.checkReactions(c, message)

58
util/eval_emojy/main.go Normal file
View File

@ -0,0 +1,58 @@
package main
import (
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"os"
"strings"
"github.com/cdipaolo/goml/base"
"github.com/cdipaolo/goml/text"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
type MetaData struct {
NClasses uint8
ClassList []string
}
func main() {
log.Logger = log.With().Caller().Stack().Logger()
log.Logger = log.Logger.Output(zerolog.ConsoleWriter{Out: os.Stderr})
jsonPath := flag.String("path", "", "path to model JSON")
flag.Parse()
if *jsonPath == "" {
fmt.Fprintf(os.Stderr, "You must provide a model path.\n\n")
flag.Usage()
os.Exit(1)
}
input := strings.Join(flag.Args(), " ")
f, err := ioutil.ReadFile(*jsonPath)
if err != nil {
log.Fatal().Err(err).Msgf("error reading json")
}
var meta MetaData
err = json.Unmarshal(f, &meta)
if err != nil {
log.Fatal().Err(err).Msgf("error reading json")
}
stream := make(chan base.TextDatapoint, 100)
//errors := make(chan error)
model := text.NewNaiveBayes(stream, meta.NClasses, base.OnlyWordsAndNumbers)
err = model.RestoreFromFile(strings.TrimSuffix(*jsonPath, ".json"))
if err != nil {
log.Fatal().Err(err).Msgf("error reading json")
}
class, prob := model.Probability(input)
emojy := meta.ClassList[class]
fmt.Printf("%s: %s (%.2f)\n", input, emojy, prob)
}

185
util/learn_emojy/main.go Normal file
View File

@ -0,0 +1,185 @@
package main
import (
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/cdipaolo/goml/base"
"github.com/cdipaolo/goml/text"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
type logEntry struct {
Who string
Author string
Body string
Emojy string
}
type logs []logEntry
type emojySet map[string]bool
type MetaData struct {
NClasses uint8
ClassList []string
}
func main() {
log.Logger = log.With().Caller().Stack().Logger()
log.Logger = log.Logger.Output(zerolog.ConsoleWriter{Out: os.Stderr})
logDir := flag.String("path", "", "path to logs")
outFile := flag.String("out", "emojy.model", "path to store model")
flag.Parse()
if *logDir == "" {
fmt.Fprintf(os.Stderr, "You must provide a log path.\n\n")
flag.Usage()
os.Exit(1)
}
logs, classes := prepLogs(*logDir)
model, meta := bayes(logs, classes)
err := model.PersistToFile(*outFile)
if err != nil {
log.Fatal().Err(err).Msgf("failed to save model")
}
metaJSON, err := json.Marshal(meta)
if err != nil {
log.Fatal().Err(err).Msgf("failed to save model")
}
err = ioutil.WriteFile(*outFile+".json", metaJSON, 0666)
if err != nil {
log.Fatal().Err(err).Msgf("failed to save model")
}
}
var re = regexp.MustCompile(`(?i)^\[.+\] <(?P<Who>[[:punct:][:alnum:]]+)> reacted to (?P<Author>[[:punct:][:alnum:]]+): (?P<Body>.+) with :(?P<Emojy>[[:punct:][:alnum:]]+):$`)
func prepLogs(path string) (logs, emojySet) {
entries := logs{}
emojies := emojySet{}
err := filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
tmp, err := ioutil.ReadFile(path)
content := string(tmp)
if err != nil {
return err
}
for _, line := range strings.Split(content, "\n") {
if strings.Contains(line, "unknown event") {
continue
}
if !re.MatchString(line) {
continue
}
entry := parseEntry(line)
emojies[entry.Emojy] = true
log.Debug().
Interface("entry", entry).
Str("line", line).
Msgf("Found emojy reaction entry")
entries = append(entries, entry)
}
return nil
})
if err != nil {
log.Fatal().Msgf("Error walking: %s", err)
}
return entries, emojies
}
func parseEntry(content string) logEntry {
out := logEntry{}
subs := re.FindStringSubmatch(content)
if len(subs) == 0 {
return out
}
for i, n := range re.SubexpNames() {
switch n {
case "Who":
out.Who = subs[i]
case "Author":
out.Author = subs[i]
case "Body":
out.Body = subs[i]
case "Emojy":
out.Emojy = subs[i]
}
}
return out
}
func bayes(logs logs, classes emojySet) (*text.NaiveBayes, MetaData) {
// create the channel of data and errors
stream := make(chan base.TextDatapoint, 100)
errors := make(chan error)
nClasses := uint8(len(classes))
classMap := map[string]uint8{}
classList := []string{}
for k, _ := range classes {
classList = append(classList, k)
classMap[k] = uint8(len(classList) - 1)
}
log.Debug().Strs("classList", classList).Interface("classMap", classMap).Int("nLogs", len(logs)).Msgf("about to train")
// make a new NaiveBayes model with
// 2 classes expected (classes in
// datapoints will now expect {0,1}.
// in general, given n as the classes
// variable, the model will expect
// datapoint classes in {0,...,n-1})
//
// Note that the model is filtering
// the text to omit anything except
// words and numbers (and spaces
// obviously)
model := text.NewNaiveBayes(stream, nClasses, base.OnlyWordsAndNumbers)
go model.OnlineLearn(errors)
for _, l := range logs {
stream <- base.TextDatapoint{
X: l.Body,
Y: classMap[l.Emojy],
}
}
close(stream)
for {
err := <-errors
if err != nil {
log.Error().Err(err).Msg("Error passed")
} else {
// training is done!
break
}
}
// now you can predict like normal
in := "Should work properly once that number of documents increases."
class := model.Predict(in) // 0
emojy := classList[class]
log.Debug().Msgf("Class prediction for %s: %v", in, emojy)
meta := MetaData{
NClasses: nClasses,
ClassList: classList,
}
return model, meta
}