Trie.php 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. <?php
  2. declare(strict_types=1);
  3. namespace catcher\library;
  4. use catcher\CatchCacheKeys;
  5. use think\facade\Cache;
  6. class Trie
  7. {
  8. protected $tree = [];
  9. protected $end = 'end';
  10. protected $sensitiveWord = '';
  11. protected $sensitiveWords = [];
  12. /**
  13. * add
  14. *
  15. * @time 2020年06月17日
  16. * @param string $word
  17. * @return $this
  18. */
  19. public function add(string $word)
  20. {
  21. $words = mb_str_split($word);
  22. $array = [];
  23. $len = count($words);
  24. $end = true;
  25. while ($len > 0) {
  26. if ($end) {
  27. $array[] = [
  28. $words[$len - 1] => ['end' => true],
  29. ];
  30. } else {
  31. $latest = array_pop($array);
  32. $array[] = [
  33. $words[$len-1] => $latest,
  34. ];
  35. }
  36. $end = false;
  37. $len--;
  38. }
  39. $this->tree = array_merge_recursive($this->tree, array_pop($array));
  40. return $this;
  41. }
  42. /**
  43. * 获取
  44. *
  45. * @time 2020年06月17日
  46. * @throws \Psr\SimpleCache\InvalidArgumentException
  47. * @return array|bool
  48. */
  49. public function getTries()
  50. {
  51. if (!empty($this->tree)) {
  52. return $this->tree;
  53. }
  54. return Cache::store('redis')->get(CatchCacheKeys::TRIE_TREE);
  55. }
  56. /**
  57. * 获取敏感词
  58. *
  59. * @time 2020年06月17日
  60. * @param array $trieTree
  61. * @param string $content
  62. * @param bool $all
  63. * @return array|string
  64. */
  65. public function getSensitiveWords(array $trieTree, string $content, $all = true)
  66. {
  67. $words = mb_str_split($content);
  68. $len = count($words);
  69. for ($start = 0; $start < $len; $start++) {
  70. // 未搜索到
  71. if (!isset($trieTree[$words[$start]])) {
  72. continue;
  73. }
  74. $node = $trieTree[$words[$start]];
  75. $this->sensitiveWord = $words[$start];
  76. // 从敏感词开始查找内容中是否又符合的
  77. for ($i = $start+1; $i< $len; $i++) {
  78. $node = $node[$words[$i]] ?? null;
  79. $this->sensitiveWord .= $words[$i];
  80. if (isset($node['end'])) {
  81. if ($all) {
  82. $this->sensitiveWords[] = $this->sensitiveWord;
  83. $this->sensitiveWord = '';
  84. } else {
  85. break 2;
  86. }
  87. }
  88. if (!$node) {
  89. $this->sensitiveWord = '';
  90. $start = $i-1;
  91. break;
  92. }
  93. }
  94. // 防止内容比敏感词短 导致验证过去
  95. // 使用敏感词【傻子】校验【傻】这个词
  96. // 会提取【傻】
  97. // 再次判断是否是尾部
  98. if (!isset($node['end'])) {
  99. $this->sensitiveWord = '';
  100. }
  101. }
  102. return $all ? $this->sensitiveWords : $this->sensitiveWord;
  103. }
  104. /**
  105. * replace
  106. *
  107. * @time 2020年06月17日
  108. * @param $tree
  109. * @param string $content
  110. * @return string|string[]
  111. */
  112. public function replace($tree, string $content)
  113. {
  114. $sensitiveWords = $this->getSensitiveWords($tree, $content);
  115. $replace = [];
  116. foreach ($sensitiveWords as $word) {
  117. $replace[] = str_repeat('*', mb_strlen($word));
  118. }
  119. return str_replace($sensitiveWords, $replace, $content);
  120. }
  121. /**
  122. * cache
  123. *
  124. * @time 2020年06月17日
  125. */
  126. public function cached()
  127. {
  128. return Cache::store('redis')->set(CatchCacheKeys::TRIE_TREE, $this->tree);
  129. }
  130. }