mirror of https://github.com/velour/catbase.git
reaction: add emojy language model
This commit is contained in:
parent
1ff79ce2c9
commit
d69d708245
|
@ -0,0 +1,59 @@
|
|||
package reaction
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
|
||||
"github.com/cdipaolo/goml/base"
|
||||
"github.com/cdipaolo/goml/text"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type MetaData struct {
|
||||
NClasses uint8
|
||||
ClassList []string
|
||||
}
|
||||
|
||||
type bayesReactor struct {
|
||||
model *text.NaiveBayes
|
||||
meta MetaData
|
||||
}
|
||||
|
||||
func newBayesReactor(jsonPath string) *bayesReactor {
|
||||
reactor := &bayesReactor{}
|
||||
f, err := ioutil.ReadFile(jsonPath)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msgf("error reading json")
|
||||
return reactor
|
||||
}
|
||||
var meta MetaData
|
||||
err = json.Unmarshal(f, &meta)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msgf("error reading json")
|
||||
return reactor
|
||||
}
|
||||
reactor.meta = meta
|
||||
|
||||
stream := make(chan base.TextDatapoint, 100)
|
||||
//errors := make(chan error)
|
||||
model := text.NewNaiveBayes(stream, meta.NClasses, base.OnlyWordsAndNumbers)
|
||||
err = model.RestoreFromFile(strings.TrimSuffix(jsonPath, ".json"))
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msgf("error reading json")
|
||||
return reactor
|
||||
}
|
||||
reactor.model = model
|
||||
|
||||
return reactor
|
||||
}
|
||||
|
||||
// React returns an emojy and probability given an input
|
||||
func (b *bayesReactor) React(input string) (string, float64) {
|
||||
if b.model == nil {
|
||||
return "", 0.0
|
||||
}
|
||||
class, prob := b.model.Probability(input)
|
||||
emojy := b.meta.ClassList[class]
|
||||
return emojy, prob
|
||||
}
|
|
@ -3,12 +3,12 @@
|
|||
package reaction
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"strings"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/chrissexton/sentiment"
|
||||
|
||||
"github.com/velour/catbase/bot"
|
||||
"github.com/velour/catbase/bot/msg"
|
||||
"github.com/velour/catbase/config"
|
||||
|
@ -19,6 +19,7 @@ type ReactionPlugin struct {
|
|||
config *config.Config
|
||||
|
||||
model sentiment.Models
|
||||
br *bayesReactor
|
||||
}
|
||||
|
||||
func New(b bot.Bot) *ReactionPlugin {
|
||||
|
@ -26,35 +27,31 @@ func New(b bot.Bot) *ReactionPlugin {
|
|||
if err != nil {
|
||||
log.Fatal().Err(err).Msg("Couldn't restore sentiment model")
|
||||
}
|
||||
c := b.Config()
|
||||
path := c.GetString("reaction.modelpath", "emojy.model.json")
|
||||
rp := &ReactionPlugin{
|
||||
bot: b,
|
||||
config: b.Config(),
|
||||
config: c,
|
||||
model: model,
|
||||
br: newBayesReactor(path),
|
||||
}
|
||||
b.Register(rp, bot.Message, rp.message)
|
||||
return rp
|
||||
}
|
||||
|
||||
func (p *ReactionPlugin) message(c bot.Connector, kind bot.Kind, message msg.Message, args ...interface{}) bool {
|
||||
chance := p.config.GetFloat64("Reaction.GeneralChance", 0.01)
|
||||
if rand.Float64() < chance {
|
||||
analysis := p.model.SentimentAnalysis(message.Body, sentiment.English)
|
||||
emojy, prob := p.br.React(message.Body)
|
||||
target := p.config.GetFloat64("reaction.confidence", 0.5)
|
||||
|
||||
log.Debug().
|
||||
Uint8("score", analysis.Score).
|
||||
Str("body", message.Body).
|
||||
Msg("sentiment of statement")
|
||||
log.Debug().
|
||||
Float64("prob", prob).
|
||||
Float64("target", target).
|
||||
Bool("accept", prob > target).
|
||||
Str("emojy", emojy).
|
||||
Msgf("Reaction check")
|
||||
|
||||
var reactions []string
|
||||
if analysis.Score > 0 {
|
||||
reactions = p.config.GetArray("Reaction.PositiveReactions", []string{})
|
||||
} else {
|
||||
reactions = p.config.GetArray("Reaction.NegativeReactions", []string{})
|
||||
}
|
||||
|
||||
reaction := reactions[rand.Intn(len(reactions))]
|
||||
|
||||
p.bot.Send(c, bot.Reaction, message.Channel, reaction, message)
|
||||
if prob > target {
|
||||
p.bot.Send(c, bot.Reaction, message.Channel, emojy, message)
|
||||
}
|
||||
|
||||
p.checkReactions(c, message)
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/cdipaolo/goml/base"
|
||||
"github.com/cdipaolo/goml/text"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type MetaData struct {
|
||||
NClasses uint8
|
||||
ClassList []string
|
||||
}
|
||||
|
||||
func main() {
|
||||
log.Logger = log.With().Caller().Stack().Logger()
|
||||
log.Logger = log.Logger.Output(zerolog.ConsoleWriter{Out: os.Stderr})
|
||||
|
||||
jsonPath := flag.String("path", "", "path to model JSON")
|
||||
|
||||
flag.Parse()
|
||||
if *jsonPath == "" {
|
||||
fmt.Fprintf(os.Stderr, "You must provide a model path.\n\n")
|
||||
flag.Usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
input := strings.Join(flag.Args(), " ")
|
||||
|
||||
f, err := ioutil.ReadFile(*jsonPath)
|
||||
if err != nil {
|
||||
log.Fatal().Err(err).Msgf("error reading json")
|
||||
}
|
||||
var meta MetaData
|
||||
err = json.Unmarshal(f, &meta)
|
||||
if err != nil {
|
||||
log.Fatal().Err(err).Msgf("error reading json")
|
||||
}
|
||||
|
||||
stream := make(chan base.TextDatapoint, 100)
|
||||
//errors := make(chan error)
|
||||
model := text.NewNaiveBayes(stream, meta.NClasses, base.OnlyWordsAndNumbers)
|
||||
err = model.RestoreFromFile(strings.TrimSuffix(*jsonPath, ".json"))
|
||||
if err != nil {
|
||||
log.Fatal().Err(err).Msgf("error reading json")
|
||||
}
|
||||
|
||||
class, prob := model.Probability(input)
|
||||
emojy := meta.ClassList[class]
|
||||
fmt.Printf("%s: %s (%.2f)\n", input, emojy, prob)
|
||||
}
|
|
@ -0,0 +1,185 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/cdipaolo/goml/base"
|
||||
"github.com/cdipaolo/goml/text"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type logEntry struct {
|
||||
Who string
|
||||
Author string
|
||||
Body string
|
||||
Emojy string
|
||||
}
|
||||
|
||||
type logs []logEntry
|
||||
|
||||
type emojySet map[string]bool
|
||||
|
||||
type MetaData struct {
|
||||
NClasses uint8
|
||||
ClassList []string
|
||||
}
|
||||
|
||||
func main() {
|
||||
log.Logger = log.With().Caller().Stack().Logger()
|
||||
log.Logger = log.Logger.Output(zerolog.ConsoleWriter{Out: os.Stderr})
|
||||
|
||||
logDir := flag.String("path", "", "path to logs")
|
||||
outFile := flag.String("out", "emojy.model", "path to store model")
|
||||
|
||||
flag.Parse()
|
||||
if *logDir == "" {
|
||||
fmt.Fprintf(os.Stderr, "You must provide a log path.\n\n")
|
||||
flag.Usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
logs, classes := prepLogs(*logDir)
|
||||
model, meta := bayes(logs, classes)
|
||||
err := model.PersistToFile(*outFile)
|
||||
if err != nil {
|
||||
log.Fatal().Err(err).Msgf("failed to save model")
|
||||
}
|
||||
metaJSON, err := json.Marshal(meta)
|
||||
if err != nil {
|
||||
log.Fatal().Err(err).Msgf("failed to save model")
|
||||
}
|
||||
err = ioutil.WriteFile(*outFile+".json", metaJSON, 0666)
|
||||
if err != nil {
|
||||
log.Fatal().Err(err).Msgf("failed to save model")
|
||||
}
|
||||
}
|
||||
|
||||
var re = regexp.MustCompile(`(?i)^\[.+\] <(?P<Who>[[:punct:][:alnum:]]+)> reacted to (?P<Author>[[:punct:][:alnum:]]+): (?P<Body>.+) with :(?P<Emojy>[[:punct:][:alnum:]]+):$`)
|
||||
|
||||
func prepLogs(path string) (logs, emojySet) {
|
||||
entries := logs{}
|
||||
emojies := emojySet{}
|
||||
err := filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
tmp, err := ioutil.ReadFile(path)
|
||||
content := string(tmp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, line := range strings.Split(content, "\n") {
|
||||
if strings.Contains(line, "unknown event") {
|
||||
continue
|
||||
}
|
||||
if !re.MatchString(line) {
|
||||
continue
|
||||
}
|
||||
entry := parseEntry(line)
|
||||
emojies[entry.Emojy] = true
|
||||
log.Debug().
|
||||
Interface("entry", entry).
|
||||
Str("line", line).
|
||||
Msgf("Found emojy reaction entry")
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatal().Msgf("Error walking: %s", err)
|
||||
}
|
||||
return entries, emojies
|
||||
}
|
||||
|
||||
func parseEntry(content string) logEntry {
|
||||
out := logEntry{}
|
||||
subs := re.FindStringSubmatch(content)
|
||||
if len(subs) == 0 {
|
||||
return out
|
||||
}
|
||||
for i, n := range re.SubexpNames() {
|
||||
switch n {
|
||||
case "Who":
|
||||
out.Who = subs[i]
|
||||
case "Author":
|
||||
out.Author = subs[i]
|
||||
case "Body":
|
||||
out.Body = subs[i]
|
||||
case "Emojy":
|
||||
out.Emojy = subs[i]
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func bayes(logs logs, classes emojySet) (*text.NaiveBayes, MetaData) {
|
||||
// create the channel of data and errors
|
||||
stream := make(chan base.TextDatapoint, 100)
|
||||
errors := make(chan error)
|
||||
|
||||
nClasses := uint8(len(classes))
|
||||
|
||||
classMap := map[string]uint8{}
|
||||
classList := []string{}
|
||||
for k, _ := range classes {
|
||||
classList = append(classList, k)
|
||||
classMap[k] = uint8(len(classList) - 1)
|
||||
}
|
||||
|
||||
log.Debug().Strs("classList", classList).Interface("classMap", classMap).Int("nLogs", len(logs)).Msgf("about to train")
|
||||
|
||||
// make a new NaiveBayes model with
|
||||
// 2 classes expected (classes in
|
||||
// datapoints will now expect {0,1}.
|
||||
// in general, given n as the classes
|
||||
// variable, the model will expect
|
||||
// datapoint classes in {0,...,n-1})
|
||||
//
|
||||
// Note that the model is filtering
|
||||
// the text to omit anything except
|
||||
// words and numbers (and spaces
|
||||
// obviously)
|
||||
model := text.NewNaiveBayes(stream, nClasses, base.OnlyWordsAndNumbers)
|
||||
go model.OnlineLearn(errors)
|
||||
|
||||
for _, l := range logs {
|
||||
stream <- base.TextDatapoint{
|
||||
X: l.Body,
|
||||
Y: classMap[l.Emojy],
|
||||
}
|
||||
}
|
||||
|
||||
close(stream)
|
||||
for {
|
||||
err := <-errors
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("Error passed")
|
||||
} else {
|
||||
// training is done!
|
||||
break
|
||||
}
|
||||
}
|
||||
// now you can predict like normal
|
||||
in := "Should work properly once that number of documents increases."
|
||||
class := model.Predict(in) // 0
|
||||
emojy := classList[class]
|
||||
log.Debug().Msgf("Class prediction for %s: %v", in, emojy)
|
||||
|
||||
meta := MetaData{
|
||||
NClasses: nClasses,
|
||||
ClassList: classList,
|
||||
}
|
||||
|
||||
return model, meta
|
||||
}
|
Loading…
Reference in New Issue