perf: 重构敏感词过滤为DFA算法,并引入WebSocket房间频道鉴权Redis缓存
This commit is contained in:
@@ -20,6 +20,35 @@ class MessageFilterService
|
||||
'外挂', '刷单', '脚本', // 示例黑名单
|
||||
];
|
||||
|
||||
/**
|
||||
* Trie 字典树实例,用于 DFA 过滤
|
||||
*/
|
||||
private ?array $trie = null;
|
||||
|
||||
/**
|
||||
* 构建 Trie 字典树
|
||||
*/
|
||||
private function buildTrie(): void
|
||||
{
|
||||
if ($this->trie !== null) {
|
||||
return;
|
||||
}
|
||||
|
||||
$this->trie = [];
|
||||
foreach ($this->badWords as $word) {
|
||||
$temp = &$this->trie;
|
||||
$len = mb_strlen($word);
|
||||
for ($i = 0; $i < $len; $i++) {
|
||||
$char = mb_substr($word, $i, 1);
|
||||
if (! isset($temp[$char])) {
|
||||
$temp[$char] = [];
|
||||
}
|
||||
$temp = &$temp[$char];
|
||||
}
|
||||
$temp['is_end'] = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行过滤净化,保障入库和显示安全。
|
||||
*
|
||||
@@ -35,16 +64,46 @@ class MessageFilterService
|
||||
// 1. HTML 标签全量脱除,阻绝任意 XSS/HTML 注入
|
||||
$content = strip_tags($content);
|
||||
|
||||
// 2. 敏感词替换
|
||||
foreach ($this->badWords as $word) {
|
||||
if (mb_strpos($content, $word) !== false) {
|
||||
// 将脏字替换为相同长度的 星号 或 提示
|
||||
$replacement = str_repeat('*', mb_strlen($word));
|
||||
$content = str_replace($word, $replacement, $content);
|
||||
// 2. 惰性初始化并构建 DFA 字典树
|
||||
$this->buildTrie();
|
||||
|
||||
// 3. 将字符串转为多字节字符数组,进行 DFA 扫描与替换
|
||||
$len = mb_strlen($content);
|
||||
$chars = [];
|
||||
for ($i = 0; $i < $len; $i++) {
|
||||
$chars[] = mb_substr($content, $i, 1);
|
||||
}
|
||||
|
||||
$result = [];
|
||||
$i = 0;
|
||||
while ($i < $len) {
|
||||
$temp = &$this->trie;
|
||||
$matchLength = 0;
|
||||
$j = $i;
|
||||
|
||||
while ($j < $len && isset($temp[$chars[$j]])) {
|
||||
$temp = &$temp[$chars[$j]];
|
||||
if (isset($temp['is_end']) && $temp['is_end'] === true) {
|
||||
$matchLength = $j - $i + 1; // 匹配到最长敏感词
|
||||
}
|
||||
$j++;
|
||||
}
|
||||
|
||||
if ($matchLength > 0) {
|
||||
// 替换为相同长度的 *
|
||||
for ($k = 0; $k < $matchLength; $k++) {
|
||||
$result[] = '*';
|
||||
}
|
||||
$i += $matchLength; // 跳过敏感词
|
||||
} else {
|
||||
$result[] = $chars[$i];
|
||||
$i++;
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 将连续的空格去重,只保留一个真正的空格
|
||||
$content = implode('', $result);
|
||||
|
||||
// 4. 将连续的空格去重,只保留一个真正的空格
|
||||
$content = preg_replace('/\s+/', ' ', $content);
|
||||
|
||||
return trim($content);
|
||||
|
||||
Reference in New Issue
Block a user