trie !== null) { return; } $this->trie = []; foreach ($this->badWords as $word) { $temp = &$this->trie; $len = mb_strlen($word); for ($i = 0; $i < $len; $i++) { $char = mb_substr($word, $i, 1); if (! isset($temp[$char])) { $temp[$char] = []; } $temp = &$temp[$char]; } $temp['is_end'] = true; } } /** * 执行过滤净化,保障入库和显示安全。 * * @param string $content 原始用户发送内容 * @return string 净化后的内容 */ public function filter(string $content): string { if ($content === '') { return ''; } // 1. HTML 标签全量脱除,阻绝任意 XSS/HTML 注入 $content = strip_tags($content); // 2. 惰性初始化并构建 DFA 字典树 $this->buildTrie(); // 3. 将字符串转为多字节字符数组,进行 DFA 扫描与替换 $len = mb_strlen($content); $chars = []; for ($i = 0; $i < $len; $i++) { $chars[] = mb_substr($content, $i, 1); } $result = []; $i = 0; while ($i < $len) { $temp = &$this->trie; $matchLength = 0; $j = $i; while ($j < $len && isset($temp[$chars[$j]])) { $temp = &$temp[$chars[$j]]; if (isset($temp['is_end']) && $temp['is_end'] === true) { $matchLength = $j - $i + 1; // 匹配到最长敏感词 } $j++; } if ($matchLength > 0) { // 替换为相同长度的 * for ($k = 0; $k < $matchLength; $k++) { $result[] = '*'; } $i += $matchLength; // 跳过敏感词 } else { $result[] = $chars[$i]; $i++; } } $content = implode('', $result); // 4. 将连续的空格去重,只保留一个真正的空格 $content = preg_replace('/\s+/', ' ', $content); return trim($content); } }