whale-town/scripts/DialogueFilter.gd

extends Node
class_name DialogueFilter
## 对话过滤器
## 提供内容审核、过滤和安全检查功能

# 过滤规则配置
var enable_profanity_filter: bool = true
var enable_spam_detection: bool = true
var enable_length_limit: bool = true
var enable_rate_limiting: bool = true

# 长度限制
var max_message_length: int = 500
var min_message_length: int = 1

# 垃圾信息检测
var spam_detection_window: float = 10.0  # 10秒窗口
var max_messages_per_window: int = 5
var max_duplicate_messages: int = 3

# 违禁词列表（示例，实际使用时应该从配置文件加载）
var profanity_words: Array[String] = [
	# 基础违禁词（示例）
	"垃圾", "废物", "白痴", "蠢货", "混蛋",
	# 可以根据需要添加更多
]

# 敏感词替换
var profanity_replacement: String = "***"

# 消息历史（用于垃圾信息检测）
var message_history: Dictionary = {}  # user_id -> Array[Dictionary]

# 过滤统计
var filter_stats: Dictionary = {
	"total_messages": 0,
	"filtered_messages": 0,
	"profanity_blocked": 0,
	"spam_blocked": 0,
	"length_violations": 0,
	"rate_limit_violations": 0
}

# 信号
signal message_filtered(user_id: String, original_message: String, filtered_message: String, reason: String)
signal message_blocked(user_id: String, message: String, reason: String)

func _ready():
	"""初始化对话过滤器"""
	load_filter_config()
	print("DialogueFilter initialized")

## 过滤消息
func filter_message(user_id: String, message: String) -> Dictionary:
	"""
	过滤和验证消息
	@param user_id: 用户ID
	@param message: 原始消息
	@return: 过滤结果 {allowed: bool, filtered_message: String, reason: String}
	"""
	filter_stats.total_messages += 1

	var result = {
		"allowed": true,
		"filtered_message": message,
		"reason": ""
	}

	# 1. 长度检查
	if enable_length_limit:
		var length_check = _check_message_length(message)
		if not length_check.valid:
			result.allowed = false
			result.reason = length_check.reason
			filter_stats.length_violations += 1
			message_blocked.emit(user_id, message, result.reason)
			return result

	# 2. 速率限制检查
	if enable_rate_limiting:
		var rate_check = _check_rate_limit(user_id, message)
		if not rate_check.valid:
			result.allowed = false
			result.reason = rate_check.reason
			filter_stats.rate_limit_violations += 1
			message_blocked.emit(user_id, message, result.reason)
			return result

	# 3. 垃圾信息检测
	if enable_spam_detection:
		var spam_check = _check_spam(user_id, message)
		if not spam_check.valid:
			result.allowed = false
			result.reason = spam_check.reason
			filter_stats.spam_blocked += 1
			message_blocked.emit(user_id, message, result.reason)
			return result

	# 4. 违禁词过滤
	if enable_profanity_filter:
		var profanity_result = _filter_profanity(message)
		if profanity_result.has_profanity:
			result.filtered_message = profanity_result.filtered_message
			filter_stats.profanity_blocked += 1
			message_filtered.emit(user_id, message, result.filtered_message, "违禁词过滤")

	# 5. 记录消息历史（用于垃圾信息检测）
	_record_message(user_id, result.filtered_message)

	if result.filtered_message != message:
		filter_stats.filtered_messages += 1

	return result

## 检查消息长度
func _check_message_length(message: String) -> Dictionary:
	"""
	检查消息长度是否符合要求
	@param message: 消息内容
	@return: 验证结果
	"""
	var trimmed = message.strip_edges()

	if trimmed.length() < min_message_length:
		return {
			"valid": false,
			"reason": "消息不能为空"
		}

	if trimmed.length() > max_message_length:
		return {
			"valid": false,
			"reason": "消息长度超过限制（最多%d个字符）" % max_message_length
		}

	return {"valid": true, "reason": ""}

## 检查速率限制
func _check_rate_limit(user_id: String, _message: String) -> Dictionary:
	"""
	检查用户是否超过消息发送速率限制
	@param user_id: 用户ID
	@param _message: 消息内容（暂未使用）
	@return: 验证结果
	"""
	var current_time = Time.get_unix_time_from_system()

	if not message_history.has(user_id):
		return {"valid": true, "reason": ""}

	var user_messages = message_history[user_id]
	var recent_messages = []

	# 统计时间窗口内的消息
	for msg_record in user_messages:
		if current_time - msg_record.timestamp <= spam_detection_window:
			recent_messages.append(msg_record)

	if recent_messages.size() >= max_messages_per_window:
		return {
			"valid": false,
			"reason": "发送消息过于频繁，请稍后再试"
		}

	return {"valid": true, "reason": ""}

## 检查垃圾信息
func _check_spam(user_id: String, message: String) -> Dictionary:
	"""
	检查是否为垃圾信息
	@param user_id: 用户ID
	@param message: 消息内容
	@return: 验证结果
	"""
	if not message_history.has(user_id):
		return {"valid": true, "reason": ""}

	var user_messages = message_history[user_id]
	var duplicate_count = 0
	var current_time = Time.get_unix_time_from_system()

	# 检查重复消息
	for msg_record in user_messages:
		# 只检查最近的消息
		if current_time - msg_record.timestamp <= spam_detection_window:
			if msg_record.message.to_lower() == message.to_lower():
				duplicate_count += 1

	if duplicate_count >= max_duplicate_messages:
		return {
			"valid": false,
			"reason": "请不要重复发送相同的消息"
		}

	# 检查是否全是重复字符
	if _is_repetitive_text(message):
		return {
			"valid": false,
			"reason": "请发送有意义的消息内容"
		}

	# 检查是否全是大写字母（可能是刷屏）
	if message.length() > 10 and message == message.to_upper():
		return {
			"valid": false,
			"reason": "请不要使用全大写字母"
		}

	return {"valid": true, "reason": ""}

## 过滤违禁词
func _filter_profanity(message: String) -> Dictionary:
	"""
	过滤消息中的违禁词
	@param message: 原始消息
	@return: 过滤结果
	"""
	var filtered_message = message
	var has_profanity = false

	for word in profanity_words:
		if filtered_message.to_lower().contains(word.to_lower()):
			# 替换违禁词
			var regex = RegEx.new()
			regex.compile("(?i)" + word)  # 不区分大小写
			filtered_message = regex.sub(filtered_message, profanity_replacement, true)
			has_profanity = true

	return {
		"has_profanity": has_profanity,
		"filtered_message": filtered_message
	}

## 检查是否为重复字符文本
func _is_repetitive_text(text: String) -> bool:
	"""
	检查文本是否主要由重复字符组成
	@param text: 输入文本
	@return: 是否为重复字符文本
	"""
	if text.length() < 5:
		return false

	var char_counts = {}
	for character in text:
		char_counts[character] = char_counts.get(character, 0) + 1

	# 如果任何字符占比超过70%，认为是重复文本
	var threshold = text.length() * 0.7
	for count in char_counts.values():
		if count > threshold:
			return true

	return false

## 记录消息历史
func _record_message(user_id: String, message: String) -> void:
	"""
	记录用户消息历史（用于垃圾信息检测）
	@param user_id: 用户ID
	@param message: 消息内容
	"""
	if not message_history.has(user_id):
		message_history[user_id] = []

	var user_messages = message_history[user_id]
	var current_time = Time.get_unix_time_from_system()

	# 添加新消息
	user_messages.append({
		"message": message,
		"timestamp": current_time
	})

	# 清理过期消息（保留最近1小时的消息）
	var one_hour_ago = current_time - 3600
	var filtered_messages = []
	for msg_record in user_messages:
		if msg_record.timestamp > one_hour_ago:
			filtered_messages.append(msg_record)

	message_history[user_id] = filtered_messages

## 添加违禁词
func add_profanity_word(word: String) -> void:
	"""
	添加违禁词到过滤列表
	@param word: 违禁词
	"""
	var clean_word = word.strip_edges().to_lower()
	if not clean_word.is_empty() and not clean_word in profanity_words:
		profanity_words.append(clean_word)
		save_filter_config()

## 移除违禁词
func remove_profanity_word(word: String) -> void:
	"""
	从过滤列表中移除违禁词
	@param word: 违禁词
	"""
	var clean_word = word.strip_edges().to_lower()
	if clean_word in profanity_words:
		profanity_words.erase(clean_word)
		save_filter_config()

## 设置过滤配置
func set_filter_config(config: Dictionary) -> void:
	"""
	设置过滤器配置
	@param config: 配置字典
	"""
	if config.has("enable_profanity_filter"):
		enable_profanity_filter = config.enable_profanity_filter

	if config.has("enable_spam_detection"):
		enable_spam_detection = config.enable_spam_detection

	if config.has("enable_length_limit"):
		enable_length_limit = config.enable_length_limit

	if config.has("enable_rate_limiting"):
		enable_rate_limiting = config.enable_rate_limiting

	if config.has("max_message_length"):
		max_message_length = config.max_message_length

	if config.has("max_messages_per_window"):
		max_messages_per_window = config.max_messages_per_window

	save_filter_config()

## 获取过滤配置
func get_filter_config() -> Dictionary:
	"""
	获取当前过滤器配置
	@return: 配置字典
	"""
	return {
		"enable_profanity_filter": enable_profanity_filter,
		"enable_spam_detection": enable_spam_detection,
		"enable_length_limit": enable_length_limit,
		"enable_rate_limiting": enable_rate_limiting,
		"max_message_length": max_message_length,
		"min_message_length": min_message_length,
		"max_messages_per_window": max_messages_per_window,
		"spam_detection_window": spam_detection_window,
		"profanity_words_count": profanity_words.size()
	}

## 获取过滤统计
func get_filter_statistics() -> Dictionary:
	"""
	获取过滤统计信息
	@return: 统计信息字典
	"""
	var stats = filter_stats.duplicate()

	if stats.total_messages > 0:
		stats["filter_rate"] = float(stats.filtered_messages) / float(stats.total_messages)
		stats["block_rate"] = float(stats.profanity_blocked + stats.spam_blocked + stats.length_violations + stats.rate_limit_violations) / float(stats.total_messages)
	else:
		stats["filter_rate"] = 0.0
		stats["block_rate"] = 0.0

	return stats

## 重置统计信息
func reset_statistics() -> void:
	"""重置过滤统计信息"""
	filter_stats = {
		"total_messages": 0,
		"filtered_messages": 0,
		"profanity_blocked": 0,
		"spam_blocked": 0,
		"length_violations": 0,
		"rate_limit_violations": 0
	}

## 清理用户历史
func clear_user_history(user_id: String) -> void:
	"""
	清理指定用户的消息历史
	@param user_id: 用户ID
	"""
	if message_history.has(user_id):
		message_history.erase(user_id)

## 保存过滤器配置
func save_filter_config() -> void:
	"""保存过滤器配置到本地文件"""
	var config = {
		"filter_settings": get_filter_config(),
		"profanity_words": profanity_words
	}

	var file = FileAccess.open("user://dialogue_filter_config.json", FileAccess.WRITE)
	if file:
		var json_string = JSON.stringify(config)
		file.store_string(json_string)
		file.close()
		print("Filter config saved")

## 加载过滤器配置
func load_filter_config() -> void:
	"""从本地文件加载过滤器配置"""
	if not FileAccess.file_exists("user://dialogue_filter_config.json"):
		print("No filter config found, using defaults")
		return

	var file = FileAccess.open("user://dialogue_filter_config.json", FileAccess.READ)
	if file:
		var json_string = file.get_as_text()
		file.close()

		var json = JSON.new()
		var parse_result = json.parse(json_string)

		if parse_result == OK:
			var config = json.data

			if config.has("filter_settings"):
				set_filter_config(config.filter_settings)

			if config.has("profanity_words") and config.profanity_words is Array:
				profanity_words = config.profanity_words

			print("Filter config loaded")
		else:
			print("Failed to parse filter config")

## 定期清理过期数据
func _on_cleanup_timer():
	"""定期清理过期的消息历史数据"""
	var current_time = Time.get_unix_time_from_system()
	var one_hour_ago = current_time - 3600

	for user_id in message_history.keys():
		var user_messages = message_history[user_id]
		var filtered_messages = []

		for msg_record in user_messages:
			if msg_record.timestamp > one_hour_ago:
				filtered_messages.append(msg_record)

		if filtered_messages.is_empty():
			message_history.erase(user_id)
		else:
			message_history[user_id] = filtered_messages