perf: 重构敏感词过滤为DFA算法,并引入WebSocket房间频道鉴权Redis缓存

This commit is contained in:
pllx
2026-06-30 11:32:55 +08:00
parent b19073cf85
commit 83192ffcce
3 changed files with 85 additions and 8 deletions
+66 -7
View File
@@ -20,6 +20,35 @@ class MessageFilterService
'外挂', '刷单', '脚本', // 示例黑名单
];
/**
* Trie 字典树实例,用于 DFA 过滤
*/
private ?array $trie = null;
/**
* 构建 Trie 字典树
*/
private function buildTrie(): void
{
if ($this->trie !== null) {
return;
}
$this->trie = [];
foreach ($this->badWords as $word) {
$temp = &$this->trie;
$len = mb_strlen($word);
for ($i = 0; $i < $len; $i++) {
$char = mb_substr($word, $i, 1);
if (! isset($temp[$char])) {
$temp[$char] = [];
}
$temp = &$temp[$char];
}
$temp['is_end'] = true;
}
}
/**
* 执行过滤净化,保障入库和显示安全。
*
@@ -35,16 +64,46 @@ class MessageFilterService
// 1. HTML 标签全量脱除,阻绝任意 XSS/HTML 注入
$content = strip_tags($content);
// 2. 敏感词替换
foreach ($this->badWords as $word) {
if (mb_strpos($content, $word) !== false) {
// 将脏字替换为相同长度的 星号 或 提示
$replacement = str_repeat('*', mb_strlen($word));
$content = str_replace($word, $replacement, $content);
// 2. 惰性初始化并构建 DFA 字典树
$this->buildTrie();
// 3. 将字符串转为多字节字符数组,进行 DFA 扫描与替换
$len = mb_strlen($content);
$chars = [];
for ($i = 0; $i < $len; $i++) {
$chars[] = mb_substr($content, $i, 1);
}
$result = [];
$i = 0;
while ($i < $len) {
$temp = &$this->trie;
$matchLength = 0;
$j = $i;
while ($j < $len && isset($temp[$chars[$j]])) {
$temp = &$temp[$chars[$j]];
if (isset($temp['is_end']) && $temp['is_end'] === true) {
$matchLength = $j - $i + 1; // 匹配到最长敏感词
}
$j++;
}
if ($matchLength > 0) {
// 替换为相同长度的 *
for ($k = 0; $k < $matchLength; $k++) {
$result[] = '*';
}
$i += $matchLength; // 跳过敏感词
} else {
$result[] = $chars[$i];
$i++;
}
}
// 3. 将连续的空格去重,只保留一个真正的空格
$content = implode('', $result);
// 4. 将连续的空格去重,只保留一个真正的空格
$content = preg_replace('/\s+/', ' ', $content);
return trim($content);