sensitive.go 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. package service
  2. import (
  3. "bytes"
  4. "fmt"
  5. "github.com/anknown/ahocorasick"
  6. "one-api/constant"
  7. "strings"
  8. )
  9. // SensitiveWordContains 是否包含敏感词,返回是否包含敏感词和敏感词列表
  10. func SensitiveWordContains(text string) (bool, []string) {
  11. checkText := strings.ToLower(text)
  12. // 构建一个AC自动机
  13. m := initAc()
  14. hits := m.MultiPatternSearch([]rune(checkText), false)
  15. if len(hits) > 0 {
  16. words := make([]string, 0)
  17. for _, hit := range hits {
  18. words = append(words, string(hit.Word))
  19. }
  20. return true, words
  21. }
  22. return false, nil
  23. }
  24. // SensitiveWordReplace 敏感词替换,返回是否包含敏感词和替换后的文本
  25. func SensitiveWordReplace(text string, returnImmediately bool) (bool, []string, string) {
  26. checkText := strings.ToLower(text)
  27. m := initAc()
  28. hits := m.MultiPatternSearch([]rune(checkText), returnImmediately)
  29. if len(hits) > 0 {
  30. words := make([]string, 0)
  31. for _, hit := range hits {
  32. pos := hit.Pos
  33. word := string(hit.Word)
  34. text = text[:pos] + "*###*" + text[pos+len(word):]
  35. words = append(words, word)
  36. }
  37. return true, words, text
  38. }
  39. return false, nil, text
  40. }
  41. func initAc() *goahocorasick.Machine {
  42. m := new(goahocorasick.Machine)
  43. dict := readRunes()
  44. if err := m.Build(dict); err != nil {
  45. fmt.Println(err)
  46. return nil
  47. }
  48. return m
  49. }
  50. func readRunes() [][]rune {
  51. var dict [][]rune
  52. for _, word := range constant.SensitiveWords {
  53. word = strings.ToLower(word)
  54. l := bytes.TrimSpace([]byte(word))
  55. dict = append(dict, bytes.Runes(l))
  56. }
  57. return dict
  58. }