112 lines
2.9 KiB
PHP
112 lines
2.9 KiB
PHP
<?php
|
|
|
|
/**
|
|
* 文件功能:聊天内容过滤器 (敏感词与 HTML 净化)
|
|
* 替代旧版 ASP中的 TrStr() / SHTM() 等各种过滤函数。
|
|
*
|
|
* @author ChatRoom Laravel
|
|
*
|
|
* @version 1.0.0
|
|
*/
|
|
|
|
namespace App\Services;
|
|
|
|
class MessageFilterService
|
|
{
|
|
/**
|
|
* 简单的关键词黑名单,未来可放到数据库或 Redis 动态加载
|
|
*/
|
|
private array $badWords = [
|
|
'外挂', '刷单', '脚本', // 示例黑名单
|
|
];
|
|
|
|
/**
|
|
* Trie 字典树实例,用于 DFA 过滤
|
|
*/
|
|
private ?array $trie = null;
|
|
|
|
/**
|
|
* 构建 Trie 字典树
|
|
*/
|
|
private function buildTrie(): void
|
|
{
|
|
if ($this->trie !== null) {
|
|
return;
|
|
}
|
|
|
|
$this->trie = [];
|
|
foreach ($this->badWords as $word) {
|
|
$temp = &$this->trie;
|
|
$len = mb_strlen($word);
|
|
for ($i = 0; $i < $len; $i++) {
|
|
$char = mb_substr($word, $i, 1);
|
|
if (! isset($temp[$char])) {
|
|
$temp[$char] = [];
|
|
}
|
|
$temp = &$temp[$char];
|
|
}
|
|
$temp['is_end'] = true;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* 执行过滤净化,保障入库和显示安全。
|
|
*
|
|
* @param string $content 原始用户发送内容
|
|
* @return string 净化后的内容
|
|
*/
|
|
public function filter(string $content): string
|
|
{
|
|
if ($content === '') {
|
|
return '';
|
|
}
|
|
|
|
// 1. HTML 标签全量脱除,阻绝任意 XSS/HTML 注入
|
|
$content = strip_tags($content);
|
|
|
|
// 2. 惰性初始化并构建 DFA 字典树
|
|
$this->buildTrie();
|
|
|
|
// 3. 将字符串转为多字节字符数组,进行 DFA 扫描与替换
|
|
$len = mb_strlen($content);
|
|
$chars = [];
|
|
for ($i = 0; $i < $len; $i++) {
|
|
$chars[] = mb_substr($content, $i, 1);
|
|
}
|
|
|
|
$result = [];
|
|
$i = 0;
|
|
while ($i < $len) {
|
|
$temp = &$this->trie;
|
|
$matchLength = 0;
|
|
$j = $i;
|
|
|
|
while ($j < $len && isset($temp[$chars[$j]])) {
|
|
$temp = &$temp[$chars[$j]];
|
|
if (isset($temp['is_end']) && $temp['is_end'] === true) {
|
|
$matchLength = $j - $i + 1; // 匹配到最长敏感词
|
|
}
|
|
$j++;
|
|
}
|
|
|
|
if ($matchLength > 0) {
|
|
// 替换为相同长度的 *
|
|
for ($k = 0; $k < $matchLength; $k++) {
|
|
$result[] = '*';
|
|
}
|
|
$i += $matchLength; // 跳过敏感词
|
|
} else {
|
|
$result[] = $chars[$i];
|
|
$i++;
|
|
}
|
|
}
|
|
|
|
$content = implode('', $result);
|
|
|
|
// 4. 将连续的空格去重,只保留一个真正的空格
|
|
$content = preg_replace('/\s+/', ' ', $content);
|
|
|
|
return trim($content);
|
|
}
|
|
}
|