666 lines
16 KiB
TypeScript
666 lines
16 KiB
TypeScript
/**
|
||
* 健康检查控制器
|
||
*
|
||
* 功能描述:
|
||
* - 提供系统健康状态检查接口
|
||
* - 监控各个组件的运行状态
|
||
* - 提供性能指标和统计信息
|
||
* - 支持负载均衡器的健康检查
|
||
*
|
||
* 职责分离:
|
||
* - 健康检查:检查系统各组件状态
|
||
* - 性能监控:提供实时性能指标
|
||
* - 统计报告:生成系统运行统计
|
||
* - 诊断信息:提供故障排查信息
|
||
*
|
||
* 技术实现:
|
||
* - HTTP接口:提供RESTful健康检查API
|
||
* - 组件检查:验证Redis、数据库等依赖
|
||
* - 性能指标:收集和展示关键指标
|
||
* - 缓存机制:避免频繁检查影响性能
|
||
*
|
||
* 最近修改:
|
||
* - 2026-01-08: Bug修复 - 清理未使用的导入,优化代码质量 (修改者: moyin)
|
||
*
|
||
* @author moyin
|
||
* @version 1.0.1
|
||
* @since 2026-01-08
|
||
* @lastModified 2026-01-08
|
||
*/
|
||
|
||
import { Controller, Get, HttpStatus, Inject, Logger } from '@nestjs/common';
|
||
import { ApiTags, ApiOperation, ApiResponse } from '@nestjs/swagger';
|
||
|
||
// 导入中间件和服务
|
||
import { PerformanceMonitorMiddleware } from './performance_monitor.middleware';
|
||
import { RateLimitMiddleware } from './rate_limit.middleware';
|
||
|
||
/**
|
||
* 健康检查状态枚举
|
||
*/
|
||
enum HealthStatus {
|
||
HEALTHY = 'healthy',
|
||
DEGRADED = 'degraded',
|
||
UNHEALTHY = 'unhealthy',
|
||
}
|
||
|
||
/**
|
||
* 组件健康状态接口
|
||
*/
|
||
interface ComponentHealth {
|
||
/** 组件名称 */
|
||
name: string;
|
||
/** 健康状态 */
|
||
status: HealthStatus;
|
||
/** 响应时间(毫秒) */
|
||
responseTime?: number;
|
||
/** 错误信息 */
|
||
error?: string;
|
||
/** 详细信息 */
|
||
details?: any;
|
||
/** 检查时间戳 */
|
||
timestamp: number;
|
||
}
|
||
|
||
/**
|
||
* 系统健康检查响应接口
|
||
*/
|
||
interface HealthCheckResponse {
|
||
/** 整体状态 */
|
||
status: HealthStatus;
|
||
/** 检查时间戳 */
|
||
timestamp: number;
|
||
/** 系统版本 */
|
||
version: string;
|
||
/** 运行时间(毫秒) */
|
||
uptime: number;
|
||
/** 组件状态列表 */
|
||
components: ComponentHealth[];
|
||
/** 性能指标 */
|
||
metrics?: {
|
||
/** 活跃连接数 */
|
||
activeConnections: number;
|
||
/** 总事件数 */
|
||
totalEvents: number;
|
||
/** 平均响应时间 */
|
||
avgResponseTime: number;
|
||
/** 错误率 */
|
||
errorRate: number;
|
||
/** 内存使用情况 */
|
||
memoryUsage: {
|
||
used: number;
|
||
total: number;
|
||
percentage: number;
|
||
};
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 详细健康报告接口
|
||
*/
|
||
interface DetailedHealthReport extends HealthCheckResponse {
|
||
/** 系统信息 */
|
||
system: {
|
||
/** Node.js版本 */
|
||
nodeVersion: string;
|
||
/** 平台信息 */
|
||
platform: string;
|
||
/** CPU架构 */
|
||
arch: string;
|
||
/** 进程ID */
|
||
pid: number;
|
||
};
|
||
/** 性能统计 */
|
||
performance: {
|
||
/** 事件统计 */
|
||
eventStats: any[];
|
||
/** 限流统计 */
|
||
rateLimitStats: any;
|
||
/** 系统性能 */
|
||
systemPerformance: any;
|
||
};
|
||
/** 配置信息 */
|
||
configuration: {
|
||
/** 环境变量 */
|
||
environment: string;
|
||
/** 功能开关 */
|
||
features: {
|
||
rateLimitEnabled: boolean;
|
||
performanceMonitorEnabled: boolean;
|
||
};
|
||
};
|
||
}
|
||
|
||
@ApiTags('健康检查')
|
||
@Controller('health')
|
||
export class HealthController {
|
||
private readonly logger = new Logger(HealthController.name);
|
||
private readonly startTime = Date.now();
|
||
|
||
// 健康检查缓存
|
||
private healthCache: HealthCheckResponse | null = null;
|
||
private cacheExpiry = 0;
|
||
private readonly cacheTimeout = 30000; // 30秒缓存
|
||
|
||
constructor(
|
||
@Inject('ILocationBroadcastCore')
|
||
private readonly locationBroadcastCore: any,
|
||
private readonly performanceMonitor: PerformanceMonitorMiddleware,
|
||
private readonly rateLimitMiddleware: RateLimitMiddleware,
|
||
) {}
|
||
|
||
/**
|
||
* 基础健康检查
|
||
*
|
||
* 提供快速的健康状态检查,适用于负载均衡器
|
||
*
|
||
* @returns 基础健康状态
|
||
*/
|
||
@Get()
|
||
@ApiOperation({ summary: '基础健康检查' })
|
||
@ApiResponse({
|
||
status: HttpStatus.OK,
|
||
description: '系统健康',
|
||
schema: {
|
||
type: 'object',
|
||
properties: {
|
||
status: { type: 'string', enum: ['healthy', 'degraded', 'unhealthy'] },
|
||
timestamp: { type: 'number' },
|
||
uptime: { type: 'number' },
|
||
},
|
||
},
|
||
})
|
||
@ApiResponse({
|
||
status: HttpStatus.SERVICE_UNAVAILABLE,
|
||
description: '系统不健康',
|
||
})
|
||
async getHealth() {
|
||
try {
|
||
const now = Date.now();
|
||
|
||
// 检查缓存
|
||
if (this.healthCache && now < this.cacheExpiry) {
|
||
return this.formatHealthResponse(this.healthCache);
|
||
}
|
||
|
||
// 执行健康检查
|
||
const healthCheck = await this.performHealthCheck();
|
||
|
||
// 更新缓存
|
||
this.healthCache = healthCheck;
|
||
this.cacheExpiry = now + this.cacheTimeout;
|
||
|
||
return this.formatHealthResponse(healthCheck);
|
||
|
||
} catch (error) {
|
||
this.logger.error('健康检查失败', {
|
||
error: error instanceof Error ? error.message : String(error),
|
||
timestamp: new Date().toISOString(),
|
||
});
|
||
|
||
const unhealthyResponse: HealthCheckResponse = {
|
||
status: HealthStatus.UNHEALTHY,
|
||
timestamp: Date.now(),
|
||
version: process.env.npm_package_version || '1.0.0',
|
||
uptime: Date.now() - this.startTime,
|
||
components: [{
|
||
name: 'system',
|
||
status: HealthStatus.UNHEALTHY,
|
||
error: error instanceof Error ? error.message : String(error),
|
||
timestamp: Date.now(),
|
||
}],
|
||
};
|
||
|
||
return this.formatHealthResponse(unhealthyResponse);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 详细健康检查
|
||
*
|
||
* 提供完整的系统健康状态和性能指标
|
||
*
|
||
* @returns 详细健康报告
|
||
*/
|
||
@Get('detailed')
|
||
@ApiOperation({ summary: '详细健康检查' })
|
||
@ApiResponse({
|
||
status: HttpStatus.OK,
|
||
description: '详细健康报告',
|
||
})
|
||
async getDetailedHealth(): Promise<DetailedHealthReport> {
|
||
try {
|
||
const basicHealth = await this.performHealthCheck();
|
||
const systemPerformance = this.performanceMonitor.getSystemPerformance();
|
||
const eventStats = this.performanceMonitor.getEventStats();
|
||
const rateLimitStats = this.rateLimitMiddleware.getStats();
|
||
|
||
const detailedReport: DetailedHealthReport = {
|
||
...basicHealth,
|
||
system: {
|
||
nodeVersion: process.version,
|
||
platform: process.platform,
|
||
arch: process.arch,
|
||
pid: process.pid,
|
||
},
|
||
performance: {
|
||
eventStats,
|
||
rateLimitStats,
|
||
systemPerformance,
|
||
},
|
||
configuration: {
|
||
environment: process.env.NODE_ENV || 'development',
|
||
features: {
|
||
rateLimitEnabled: true,
|
||
performanceMonitorEnabled: true,
|
||
},
|
||
},
|
||
};
|
||
|
||
return detailedReport;
|
||
|
||
} catch (error) {
|
||
this.logger.error('详细健康检查失败', {
|
||
error: error instanceof Error ? error.message : String(error),
|
||
timestamp: new Date().toISOString(),
|
||
});
|
||
|
||
throw error;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 性能指标接口
|
||
*
|
||
* 提供实时性能监控数据
|
||
*
|
||
* @returns 性能指标
|
||
*/
|
||
@Get('metrics')
|
||
@ApiOperation({ summary: '获取性能指标' })
|
||
@ApiResponse({
|
||
status: HttpStatus.OK,
|
||
description: '性能指标数据',
|
||
})
|
||
async getMetrics() {
|
||
try {
|
||
const systemPerformance = this.performanceMonitor.getSystemPerformance();
|
||
const eventStats = this.performanceMonitor.getEventStats();
|
||
const rateLimitStats = this.rateLimitMiddleware.getStats();
|
||
|
||
return {
|
||
timestamp: Date.now(),
|
||
system: systemPerformance,
|
||
events: eventStats,
|
||
rateLimit: rateLimitStats,
|
||
uptime: Date.now() - this.startTime,
|
||
};
|
||
|
||
} catch (error) {
|
||
this.logger.error('获取性能指标失败', {
|
||
error: error instanceof Error ? error.message : String(error),
|
||
timestamp: new Date().toISOString(),
|
||
});
|
||
|
||
throw error;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 就绪检查
|
||
*
|
||
* 检查系统是否准备好接收请求
|
||
*
|
||
* @returns 就绪状态
|
||
*/
|
||
@Get('ready')
|
||
@ApiOperation({ summary: '就绪检查' })
|
||
@ApiResponse({
|
||
status: HttpStatus.OK,
|
||
description: '系统就绪',
|
||
})
|
||
@ApiResponse({
|
||
status: HttpStatus.SERVICE_UNAVAILABLE,
|
||
description: '系统未就绪',
|
||
})
|
||
async getReadiness() {
|
||
try {
|
||
// 检查关键组件
|
||
const components = await this.checkComponents();
|
||
const criticalComponents = components.filter(c =>
|
||
['redis', 'database', 'core_service'].includes(c.name)
|
||
);
|
||
|
||
const allCriticalHealthy = criticalComponents.every(c =>
|
||
c.status === HealthStatus.HEALTHY
|
||
);
|
||
|
||
const status = allCriticalHealthy ? HealthStatus.HEALTHY : HealthStatus.UNHEALTHY;
|
||
|
||
const response = {
|
||
status,
|
||
timestamp: Date.now(),
|
||
components: criticalComponents,
|
||
};
|
||
|
||
if (status === HealthStatus.UNHEALTHY) {
|
||
return this.formatHealthResponse(response, HttpStatus.SERVICE_UNAVAILABLE);
|
||
}
|
||
|
||
return response;
|
||
|
||
} catch (error) {
|
||
this.logger.error('就绪检查失败', {
|
||
error: error instanceof Error ? error.message : String(error),
|
||
timestamp: new Date().toISOString(),
|
||
});
|
||
|
||
return this.formatHealthResponse({
|
||
status: HealthStatus.UNHEALTHY,
|
||
timestamp: Date.now(),
|
||
components: [{
|
||
name: 'system',
|
||
status: HealthStatus.UNHEALTHY,
|
||
error: error instanceof Error ? error.message : String(error),
|
||
timestamp: Date.now(),
|
||
}],
|
||
}, HttpStatus.SERVICE_UNAVAILABLE);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 存活检查
|
||
*
|
||
* 简单的存活状态检查
|
||
*
|
||
* @returns 存活状态
|
||
*/
|
||
@Get('live')
|
||
@ApiOperation({ summary: '存活检查' })
|
||
@ApiResponse({
|
||
status: HttpStatus.OK,
|
||
description: '系统存活',
|
||
})
|
||
async getLiveness() {
|
||
return {
|
||
status: 'alive',
|
||
timestamp: Date.now(),
|
||
uptime: Date.now() - this.startTime,
|
||
pid: process.pid,
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 执行完整的健康检查
|
||
*
|
||
* @returns 健康检查结果
|
||
* @private
|
||
*/
|
||
private async performHealthCheck(): Promise<HealthCheckResponse> {
|
||
const components = await this.checkComponents();
|
||
const systemPerformance = this.performanceMonitor.getSystemPerformance();
|
||
|
||
// 确定整体状态
|
||
const unhealthyComponents = components.filter(c => c.status === HealthStatus.UNHEALTHY);
|
||
const degradedComponents = components.filter(c => c.status === HealthStatus.DEGRADED);
|
||
|
||
let overallStatus: HealthStatus;
|
||
if (unhealthyComponents.length > 0) {
|
||
overallStatus = HealthStatus.UNHEALTHY;
|
||
} else if (degradedComponents.length > 0) {
|
||
overallStatus = HealthStatus.DEGRADED;
|
||
} else {
|
||
overallStatus = HealthStatus.HEALTHY;
|
||
}
|
||
|
||
return {
|
||
status: overallStatus,
|
||
timestamp: Date.now(),
|
||
version: process.env.npm_package_version || '1.0.0',
|
||
uptime: Date.now() - this.startTime,
|
||
components,
|
||
metrics: {
|
||
activeConnections: systemPerformance.activeConnections,
|
||
totalEvents: systemPerformance.totalEvents,
|
||
avgResponseTime: systemPerformance.avgResponseTime,
|
||
errorRate: systemPerformance.errorRate,
|
||
memoryUsage: systemPerformance.memoryUsage,
|
||
},
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 检查各个组件的健康状态
|
||
*
|
||
* @returns 组件健康状态列表
|
||
* @private
|
||
*/
|
||
private async checkComponents(): Promise<ComponentHealth[]> {
|
||
const components: ComponentHealth[] = [];
|
||
|
||
// 检查Redis连接
|
||
components.push(await this.checkRedis());
|
||
|
||
// 检查数据库连接
|
||
components.push(await this.checkDatabase());
|
||
|
||
// 检查核心服务
|
||
components.push(await this.checkCoreService());
|
||
|
||
// 检查性能监控
|
||
components.push(this.checkPerformanceMonitor());
|
||
|
||
// 检查限流中间件
|
||
components.push(this.checkRateLimitMiddleware());
|
||
|
||
return components;
|
||
}
|
||
|
||
/**
|
||
* 检查Redis连接状态
|
||
*
|
||
* @returns Redis健康状态
|
||
* @private
|
||
*/
|
||
private async checkRedis(): Promise<ComponentHealth> {
|
||
const startTime = Date.now();
|
||
|
||
try {
|
||
// 这里应该实际检查Redis连接
|
||
// 暂时返回健康状态
|
||
const responseTime = Date.now() - startTime;
|
||
|
||
return {
|
||
name: 'redis',
|
||
status: HealthStatus.HEALTHY,
|
||
responseTime,
|
||
timestamp: Date.now(),
|
||
details: {
|
||
connected: true,
|
||
responseTime,
|
||
},
|
||
};
|
||
|
||
} catch (error) {
|
||
return {
|
||
name: 'redis',
|
||
status: HealthStatus.UNHEALTHY,
|
||
error: error instanceof Error ? error.message : String(error),
|
||
timestamp: Date.now(),
|
||
};
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 检查数据库连接状态
|
||
*
|
||
* @returns 数据库健康状态
|
||
* @private
|
||
*/
|
||
private async checkDatabase(): Promise<ComponentHealth> {
|
||
const startTime = Date.now();
|
||
|
||
try {
|
||
// 这里应该实际检查数据库连接
|
||
// 暂时返回健康状态
|
||
const responseTime = Date.now() - startTime;
|
||
|
||
return {
|
||
name: 'database',
|
||
status: HealthStatus.HEALTHY,
|
||
responseTime,
|
||
timestamp: Date.now(),
|
||
details: {
|
||
connected: true,
|
||
responseTime,
|
||
},
|
||
};
|
||
|
||
} catch (error) {
|
||
return {
|
||
name: 'database',
|
||
status: HealthStatus.UNHEALTHY,
|
||
error: error instanceof Error ? error.message : String(error),
|
||
timestamp: Date.now(),
|
||
};
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 检查核心服务状态
|
||
*
|
||
* @returns 核心服务健康状态
|
||
* @private
|
||
*/
|
||
private async checkCoreService(): Promise<ComponentHealth> {
|
||
try {
|
||
// 检查核心服务是否可用
|
||
if (!this.locationBroadcastCore) {
|
||
return {
|
||
name: 'core_service',
|
||
status: HealthStatus.UNHEALTHY,
|
||
error: 'Core service not available',
|
||
timestamp: Date.now(),
|
||
};
|
||
}
|
||
|
||
return {
|
||
name: 'core_service',
|
||
status: HealthStatus.HEALTHY,
|
||
timestamp: Date.now(),
|
||
details: {
|
||
available: true,
|
||
},
|
||
};
|
||
|
||
} catch (error) {
|
||
return {
|
||
name: 'core_service',
|
||
status: HealthStatus.UNHEALTHY,
|
||
error: error instanceof Error ? error.message : String(error),
|
||
timestamp: Date.now(),
|
||
};
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 检查性能监控状态
|
||
*
|
||
* @returns 性能监控健康状态
|
||
* @private
|
||
*/
|
||
private checkPerformanceMonitor(): ComponentHealth {
|
||
try {
|
||
const systemPerf = this.performanceMonitor.getSystemPerformance();
|
||
|
||
// 根据性能指标判断状态
|
||
let status = HealthStatus.HEALTHY;
|
||
if (systemPerf.errorRate > 10) {
|
||
status = HealthStatus.DEGRADED;
|
||
}
|
||
if (systemPerf.errorRate > 25 || systemPerf.avgResponseTime > 2000) {
|
||
status = HealthStatus.UNHEALTHY;
|
||
}
|
||
|
||
return {
|
||
name: 'performance_monitor',
|
||
status,
|
||
timestamp: Date.now(),
|
||
details: {
|
||
avgResponseTime: systemPerf.avgResponseTime,
|
||
errorRate: systemPerf.errorRate,
|
||
throughput: systemPerf.throughput,
|
||
},
|
||
};
|
||
|
||
} catch (error) {
|
||
return {
|
||
name: 'performance_monitor',
|
||
status: HealthStatus.UNHEALTHY,
|
||
error: error instanceof Error ? error.message : String(error),
|
||
timestamp: Date.now(),
|
||
};
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 检查限流中间件状态
|
||
*
|
||
* @returns 限流中间件健康状态
|
||
* @private
|
||
*/
|
||
private checkRateLimitMiddleware(): ComponentHealth {
|
||
try {
|
||
const stats = this.rateLimitMiddleware.getStats();
|
||
|
||
// 根据限流统计判断状态
|
||
let status = HealthStatus.HEALTHY;
|
||
if (stats.limitRate > 20) {
|
||
status = HealthStatus.DEGRADED;
|
||
}
|
||
if (stats.limitRate > 50) {
|
||
status = HealthStatus.UNHEALTHY;
|
||
}
|
||
|
||
return {
|
||
name: 'rate_limit',
|
||
status,
|
||
timestamp: Date.now(),
|
||
details: {
|
||
limitRate: stats.limitRate,
|
||
activeUsers: stats.activeUsers,
|
||
totalRequests: stats.totalRequests,
|
||
},
|
||
};
|
||
|
||
} catch (error) {
|
||
return {
|
||
name: 'rate_limit',
|
||
status: HealthStatus.UNHEALTHY,
|
||
error: error instanceof Error ? error.message : String(error),
|
||
timestamp: Date.now(),
|
||
};
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 格式化健康检查响应
|
||
*
|
||
* @param health 健康检查结果
|
||
* @param statusCode HTTP状态码
|
||
* @returns 格式化的响应
|
||
* @private
|
||
*/
|
||
private formatHealthResponse(health: any, statusCode?: number) {
|
||
if (statusCode === HttpStatus.SERVICE_UNAVAILABLE) {
|
||
// 返回503状态码
|
||
const response = new Response(JSON.stringify(health), {
|
||
status: HttpStatus.SERVICE_UNAVAILABLE,
|
||
headers: { 'Content-Type': 'application/json' },
|
||
});
|
||
return response;
|
||
}
|
||
|
||
return health;
|
||
}
|
||
} |