sensitive.go 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. package service
  2. import (
  3. "bytes"
  4. "fmt"
  5. "github.com/anknown/ahocorasick"
  6. "one-api/constant"
  7. "strings"
  8. )
  9. // SensitiveWordContains 是否包含敏感词,返回是否包含敏感词和敏感词列表
  10. func SensitiveWordContains(text string) (bool, []string) {
  11. // 构建一个AC自动机
  12. m := initAc()
  13. hits := m.MultiPatternSearch([]rune(text), false)
  14. if len(hits) > 0 {
  15. words := make([]string, 0)
  16. for _, hit := range hits {
  17. words = append(words, string(hit.Word))
  18. }
  19. return true, words
  20. }
  21. return false, nil
  22. }
  23. // SensitiveWordReplace 敏感词替换,返回是否包含敏感词和替换后的文本
  24. func SensitiveWordReplace(text string) (bool, string) {
  25. m := initAc()
  26. hits := m.MultiPatternSearch([]rune(text), false)
  27. if len(hits) > 0 {
  28. for _, hit := range hits {
  29. pos := hit.Pos
  30. word := string(hit.Word)
  31. text = text[:pos] + strings.Repeat("*", len(word)) + text[pos+len(word):]
  32. }
  33. return true, text
  34. }
  35. return false, text
  36. }
  37. func initAc() *goahocorasick.Machine {
  38. m := new(goahocorasick.Machine)
  39. dict := readRunes()
  40. if err := m.Build(dict); err != nil {
  41. fmt.Println(err)
  42. return nil
  43. }
  44. return m
  45. }
  46. func readRunes() [][]rune {
  47. var dict [][]rune
  48. for _, word := range constant.SensitiveWords {
  49. l := bytes.TrimSpace([]byte(word))
  50. dict = append(dict, bytes.Runes(l))
  51. }
  52. return dict
  53. }