如何定位线上慢请求?

1. 概述

1.1 慢请求定位的重要性

慢请求定位是性能优化的关键环节,通过快速准确地定位线上慢请求,可以及时发现问题、优化性能,提升用户体验和系统稳定性。

本文内容

  • 慢请求定义:什么是慢请求,如何定义
  • 定位方法:多种定位慢请求的方法
  • 监控工具:APM、日志分析等工具使用
  • 代码级定位:代码层面的慢请求定位
  • 数据库定位:数据库慢查询定位
  • 网络定位:网络慢请求定位
  • 实战案例:慢请求定位实践案例

1.2 本文内容结构

本文将从以下几个方面深入探讨慢请求定位:

  1. 慢请求定义:慢请求的定义和标准
  2. 监控方法:监控和告警机制
  3. 日志分析:日志分析和慢请求识别
  4. APM工具:APM工具的使用
  5. 代码级定位:代码层面的定位方法
  6. 数据库定位:数据库慢查询定位
  7. 网络定位:网络慢请求定位
  8. 实战案例:慢请求定位实践案例

2. 慢请求定义

2.1 慢请求标准

2.1.1 慢请求定义

慢请求定义

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
// 慢请求定义
public class SlowRequestDefinition {

// 慢请求阈值
private static final long SLOW_REQUEST_THRESHOLD = 1000; // 1秒
private static final long VERY_SLOW_REQUEST_THRESHOLD = 3000; // 3秒

// 慢请求级别
public enum SlowRequestLevel {
NORMAL, // 正常:< 1秒
SLOW, // 慢:1-3秒
VERY_SLOW, // 很慢:> 3秒
TIMEOUT // 超时
}

public SlowRequestLevel classifyRequest(long responseTime) {
if (responseTime < SLOW_REQUEST_THRESHOLD) {
return SlowRequestLevel.NORMAL;
} else if (responseTime < VERY_SLOW_REQUEST_THRESHOLD) {
return SlowRequestLevel.SLOW;
} else if (responseTime < 30000) { // 30秒超时
return SlowRequestLevel.VERY_SLOW;
} else {
return SlowRequestLevel.TIMEOUT;
}
}

// 不同接口的慢请求阈值
public long getThreshold(String endpoint) {
switch (endpoint) {
case "/api/search":
return 2000; // 搜索接口2秒
case "/api/export":
return 10000; // 导出接口10秒
case "/api/report":
return 5000; // 报表接口5秒
default:
return SLOW_REQUEST_THRESHOLD; // 默认1秒
}
}
}

2.2 慢请求影响

2.2.1 慢请求影响分析

慢请求影响

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
// 慢请求影响分析
public class SlowRequestImpact {

public ImpactAnalysis analyzeImpact(List<SlowRequest> slowRequests) {
ImpactAnalysis analysis = new ImpactAnalysis();

// 1. 用户影响
analysis.setAffectedUsers(calculateAffectedUsers(slowRequests));

// 2. 业务影响
analysis.setBusinessImpact(calculateBusinessImpact(slowRequests));

// 3. 系统影响
analysis.setSystemImpact(calculateSystemImpact(slowRequests));

return analysis;
}

private int calculateAffectedUsers(List<SlowRequest> slowRequests) {
// 计算受影响用户数
return slowRequests.stream()
.map(SlowRequest::getUserId)
.distinct()
.collect(Collectors.toList())
.size();
}

private BusinessImpact calculateBusinessImpact(List<SlowRequest> slowRequests) {
BusinessImpact impact = new BusinessImpact();

// 计算失败请求数
long failedRequests = slowRequests.stream()
.filter(req -> req.getStatus() >= 500)
.count();

impact.setFailedRequests(failedRequests);
impact.setFailureRate((double) failedRequests / slowRequests.size());

return impact;
}
}

3. 监控方法

3.1 监控指标

3.1.1 请求监控

请求监控

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// 请求监控
public class RequestMonitoring {

private MetricsCollector metricsCollector;
private AlertManager alertManager;

public void monitorRequest(Request request) {
long startTime = System.currentTimeMillis();

try {
// 处理请求
Response response = processRequest(request);

long responseTime = System.currentTimeMillis() - startTime;

// 记录指标
recordMetrics(request, response, responseTime);

// 检查慢请求
if (isSlowRequest(responseTime, request.getEndpoint())) {
handleSlowRequest(request, response, responseTime);
}

} catch (Exception e) {
long responseTime = System.currentTimeMillis() - startTime;
recordError(request, e, responseTime);
}
}

private void recordMetrics(Request request, Response response, long responseTime) {
// 记录请求指标
metricsCollector.record("request.count", 1);
metricsCollector.record("request.duration", responseTime);
metricsCollector.record("request.status." + response.getStatus(), 1);

// 按接口记录
metricsCollector.record("request.endpoint." + request.getEndpoint() + ".duration", responseTime);
}

private boolean isSlowRequest(long responseTime, String endpoint) {
SlowRequestDefinition definition = new SlowRequestDefinition();
long threshold = definition.getThreshold(endpoint);
return responseTime > threshold;
}

private void handleSlowRequest(Request request, Response response, long responseTime) {
// 1. 记录慢请求日志
logSlowRequest(request, response, responseTime);

// 2. 发送告警
if (responseTime > 3000) {
alertManager.sendAlert("慢请求告警",
"接口: " + request.getEndpoint() +
", 响应时间: " + responseTime + "ms");
}

// 3. 记录详细追踪信息
recordTrace(request, response, responseTime);
}

private void logSlowRequest(Request request, Response response, long responseTime) {
SlowRequestLog log = new SlowRequestLog();
log.setTimestamp(System.currentTimeMillis());
log.setEndpoint(request.getEndpoint());
log.setMethod(request.getMethod());
log.setResponseTime(responseTime);
log.setStatus(response.getStatus());
log.setRequestId(request.getRequestId());
log.setUserId(request.getUserId());

logger.warn("慢请求: {}", log);
}
}

3.2 告警机制

3.2.1 慢请求告警

慢请求告警

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
// 慢请求告警
public class SlowRequestAlert {

private AlertManager alertManager;
private MetricsCollector metricsCollector;

public void setupAlerts() {
// 1. 慢请求率告警
scheduler.scheduleAtFixedRate(() -> {
double slowRequestRate = calculateSlowRequestRate();
if (slowRequestRate > 0.05) { // 慢请求率 > 5%
alertManager.sendAlert("慢请求率过高",
"当前慢请求率: " + slowRequestRate * 100 + "%");
}
}, 0, 60, TimeUnit.SECONDS);

// 2. 平均响应时间告警
scheduler.scheduleAtFixedRate(() -> {
double avgResponseTime = calculateAvgResponseTime();
if (avgResponseTime > 2000) { // 平均响应时间 > 2秒
alertManager.sendAlert("平均响应时间过长",
"当前平均响应时间: " + avgResponseTime + "ms");
}
}, 0, 60, TimeUnit.SECONDS);

// 3. P99响应时间告警
scheduler.scheduleAtFixedRate(() -> {
long p99ResponseTime = calculateP99ResponseTime();
if (p99ResponseTime > 5000) { // P99响应时间 > 5秒
alertManager.sendAlert("P99响应时间过长",
"当前P99响应时间: " + p99ResponseTime + "ms");
}
}, 0, 60, TimeUnit.SECONDS);
}

private double calculateSlowRequestRate() {
long totalRequests = metricsCollector.getCounter("request.count");
long slowRequests = metricsCollector.getCounter("request.slow.count");

if (totalRequests == 0) {
return 0;
}

return (double) slowRequests / totalRequests;
}

private double calculateAvgResponseTime() {
return metricsCollector.getHistogram("request.duration").getMean();
}

private long calculateP99ResponseTime() {
return metricsCollector.getHistogram("request.duration").getValue(0.99);
}
}

4. 日志分析

4.1 日志记录

4.1.1 请求日志

请求日志记录

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
// 请求日志记录
public class RequestLogging {

// 请求拦截器
@Component
public class RequestLoggingInterceptor implements HandlerInterceptor {

private ThreadLocal<Long> startTime = new ThreadLocal<>();

@Override
public boolean preHandle(HttpServletRequest request,
HttpServletResponse response,
Object handler) {
startTime.set(System.currentTimeMillis());
return true;
}

@Override
public void afterCompletion(HttpServletRequest request,
HttpServletResponse response,
Object handler,
Exception ex) {
long duration = System.currentTimeMillis() - startTime.get();

// 记录请求日志
RequestLog log = new RequestLog();
log.setTimestamp(System.currentTimeMillis());
log.setMethod(request.getMethod());
log.setUri(request.getRequestURI());
log.setQueryString(request.getQueryString());
log.setStatus(response.getStatus());
log.setDuration(duration);
log.setRemoteAddr(request.getRemoteAddr());
log.setUserAgent(request.getHeader("User-Agent"));
log.setRequestId(getRequestId(request));

// 如果是慢请求,记录详细信息
if (duration > 1000) {
log.setLevel("WARN");
log.setRequestBody(getRequestBody(request));
log.setResponseBody(getResponseBody(response));
}

logger.info("请求日志: {}", log);

startTime.remove();
}
}
}

4.2 日志分析

4.2.1 慢请求分析

日志分析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// 日志分析
public class LogAnalysis {

public List<SlowRequest> analyzeSlowRequests(Date startTime, Date endTime) {
// 1. 查询慢请求日志
List<RequestLog> logs = logService.queryLogs(startTime, endTime,
log -> log.getDuration() > 1000);

// 2. 按接口分组统计
Map<String, List<RequestLog>> groupedByEndpoint = logs.stream()
.collect(Collectors.groupingBy(RequestLog::getUri));

// 3. 分析每个接口的慢请求
List<SlowRequest> slowRequests = new ArrayList<>();
for (Map.Entry<String, List<RequestLog>> entry : groupedByEndpoint.entrySet()) {
SlowRequest slowRequest = analyzeEndpoint(entry.getKey(), entry.getValue());
slowRequests.add(slowRequest);
}

// 4. 按响应时间排序
slowRequests.sort(Comparator.comparing(SlowRequest::getAvgResponseTime).reversed());

return slowRequests;
}

private SlowRequest analyzeEndpoint(String endpoint, List<RequestLog> logs) {
SlowRequest slowRequest = new SlowRequest();
slowRequest.setEndpoint(endpoint);
slowRequest.setCount(logs.size());

// 计算平均响应时间
double avgResponseTime = logs.stream()
.mapToLong(RequestLog::getDuration)
.average()
.orElse(0);
slowRequest.setAvgResponseTime(avgResponseTime);

// 计算最大响应时间
long maxResponseTime = logs.stream()
.mapToLong(RequestLog::getDuration)
.max()
.orElse(0);
slowRequest.setMaxResponseTime(maxResponseTime);

// 计算P99响应时间
long p99ResponseTime = calculateP99(logs);
slowRequest.setP99ResponseTime(p99ResponseTime);

// 分析慢请求模式
slowRequest.setPattern(analyzePattern(logs));

return slowRequest;
}

private long calculateP99(List<RequestLog> logs) {
List<Long> durations = logs.stream()
.map(RequestLog::getDuration)
.sorted()
.collect(Collectors.toList());

int index = (int) (durations.size() * 0.99);
return durations.get(index);
}

private String analyzePattern(List<RequestLog> logs) {
// 分析慢请求模式
// 1. 是否集中在特定时间段
// 2. 是否与特定参数相关
// 3. 是否与特定用户相关

return "需要进一步分析";
}
}

5. APM工具

5.1 APM使用

5.1.1 APM工具定位

APM工具使用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
// APM工具定位
public class APMRequestLocation {

// 使用APM工具定位慢请求
public class APMIntegration {

// 1. 慢事务分析
public List<SlowTransaction> findSlowTransactions() {
// 使用APM工具查询慢事务
// 例如:New Relic, AppDynamics, Datadog
return apmService.getSlowTransactions(1000); // 响应时间 > 1秒
}

// 2. 事务详情分析
public TransactionDetail analyzeTransaction(String transactionId) {
TransactionDetail detail = apmService.getTransactionDetail(transactionId);

// 分析事务时间分布
detail.setTimeDistribution(analyzeTimeDistribution(detail));

// 识别瓶颈
detail.setBottlenecks(identifyBottlenecks(detail));

return detail;
}

// 3. 数据库查询分析
public List<SlowQuery> findSlowQueries() {
// 查找慢数据库查询
return apmService.getSlowQueries(500); // 查询时间 > 500ms
}

// 4. 外部服务调用分析
public List<SlowExternalCall> findSlowExternalCalls() {
// 查找慢外部服务调用
return apmService.getSlowExternalCalls(1000); // 调用时间 > 1秒
}
}

// 时间分布分析
private TimeDistribution analyzeTimeDistribution(TransactionDetail detail) {
TimeDistribution distribution = new TimeDistribution();

distribution.setTotalTime(detail.getDuration());
distribution.setDatabaseTime(detail.getDatabaseTime());
distribution.setExternalTime(detail.getExternalTime());
distribution.setApplicationTime(detail.getApplicationTime());

return distribution;
}

// 瓶颈识别
private List<Bottleneck> identifyBottlenecks(TransactionDetail detail) {
List<Bottleneck> bottlenecks = new ArrayList<>();

TimeDistribution distribution = analyzeTimeDistribution(detail);

// 数据库瓶颈
if (distribution.getDatabaseTime() > distribution.getTotalTime() * 0.5) {
bottlenecks.add(new Bottleneck("Database",
"数据库耗时占比: " +
(distribution.getDatabaseTime() / distribution.getTotalTime() * 100) + "%"));
}

// 外部服务瓶颈
if (distribution.getExternalTime() > distribution.getTotalTime() * 0.3) {
bottlenecks.add(new Bottleneck("External Service",
"外部服务耗时占比: " +
(distribution.getExternalTime() / distribution.getTotalTime() * 100) + "%"));
}

return bottlenecks;
}
}

6. 代码级定位

6.1 代码追踪

6.1.1 代码级追踪

代码级定位

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
// 代码级定位
public class CodeLevelLocation {

// 使用AOP进行方法追踪
@Aspect
@Component
public class MethodTracingAspect {

@Around("@annotation(org.springframework.web.bind.annotation.RequestMapping)")
public Object traceMethod(ProceedingJoinPoint joinPoint) throws Throwable {
String methodName = joinPoint.getSignature().toShortString();
long startTime = System.currentTimeMillis();

try {
Object result = joinPoint.proceed();
long duration = System.currentTimeMillis() - startTime;

// 记录方法执行时间
if (duration > 100) { // 方法执行时间 > 100ms
logMethodExecution(methodName, duration, joinPoint.getArgs());
}

return result;
} catch (Throwable e) {
long duration = System.currentTimeMillis() - startTime;
logMethodError(methodName, duration, e);
throw e;
}
}

private void logMethodExecution(String methodName, long duration, Object[] args) {
MethodExecutionLog log = new MethodExecutionLog();
log.setMethodName(methodName);
log.setDuration(duration);
log.setArgs(Arrays.toString(args));
log.setTimestamp(System.currentTimeMillis());

logger.warn("慢方法执行: {}", log);
}
}

// 使用ThreadLocal追踪调用链
public class CallChainTracker {
private static final ThreadLocal<CallChain> callChain = new ThreadLocal<>();

public void startCall(String methodName) {
CallChain chain = callChain.get();
if (chain == null) {
chain = new CallChain();
callChain.set(chain);
}

CallNode node = new CallNode();
node.setMethodName(methodName);
node.setStartTime(System.currentTimeMillis());
chain.addNode(node);
}

public void endCall(String methodName) {
CallChain chain = callChain.get();
if (chain != null) {
CallNode node = chain.getLastNode();
if (node != null && node.getMethodName().equals(methodName)) {
node.setEndTime(System.currentTimeMillis());
node.setDuration(node.getEndTime() - node.getStartTime());

// 如果是慢方法,记录调用链
if (node.getDuration() > 100) {
logCallChain(chain);
}
}
}
}

private void logCallChain(CallChain chain) {
logger.warn("慢方法调用链: {}", chain);
}
}
}

6.2 性能分析

6.2.1 代码性能分析

代码性能分析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
// 代码性能分析
public class CodePerformanceAnalysis {

// 使用Profiler分析代码性能
public class CodeProfiler {

public PerformanceProfile profileMethod(String className, String methodName) {
// 使用JProfiler、VisualVM等工具分析
return profilerService.profile(className, methodName);
}

public List<HotMethod> findHotMethods() {
// 找出热点方法(CPU时间最长的)
return profilerService.getHotMethods(10); // Top 10
}

public List<SlowMethod> findSlowMethods() {
// 找出慢方法(执行时间最长的)
return profilerService.getSlowMethods(10); // Top 10
}
}

// 方法级性能统计
public class MethodPerformanceStats {
private Map<String, MethodStats> stats = new ConcurrentHashMap<>();

public void recordMethodExecution(String methodName, long duration) {
stats.computeIfAbsent(methodName, k -> new MethodStats())
.record(duration);
}

public List<MethodStats> getSlowMethods(long threshold) {
return stats.values().stream()
.filter(stat -> stat.getAvgDuration() > threshold)
.sorted(Comparator.comparing(MethodStats::getAvgDuration).reversed())
.collect(Collectors.toList());
}
}
}

7. 数据库定位

7.1 慢查询定位

7.1.1 数据库慢查询

数据库慢查询定位

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// 数据库慢查询定位
public class DatabaseSlowQueryLocation {

// 慢查询日志分析
public class SlowQueryAnalysis {

public List<SlowQuery> findSlowQueries(Date startTime, Date endTime) {
// 1. 查询慢查询日志
List<SlowQueryLog> logs = databaseService.getSlowQueryLog(startTime, endTime);

// 2. 分析慢查询
return logs.stream()
.map(this::analyzeSlowQuery)
.collect(Collectors.toList());
}

private SlowQuery analyzeSlowQuery(SlowQueryLog log) {
SlowQuery query = new SlowQuery();
query.setSql(log.getSql());
query.setExecutionTime(log.getExecutionTime());
query.setRowsExamined(log.getRowsExamined());
query.setRowsReturned(log.getRowsReturned());

// 分析SQL
query.setAnalysis(analyzeSQL(log.getSql()));

return query;
}

private SQLAnalysis analyzeSQL(String sql) {
SQLAnalysis analysis = new SQLAnalysis();

// 1. 检查是否使用索引
analysis.setUsesIndex(checkIndexUsage(sql));

// 2. 检查是否有全表扫描
analysis.setFullTableScan(checkFullTableScan(sql));

// 3. 检查是否有子查询
analysis.setHasSubquery(checkSubquery(sql));

// 4. 检查JOIN数量
analysis.setJoinCount(countJoins(sql));

return analysis;
}
}

// 实时慢查询监控
public class RealTimeSlowQueryMonitor {

public void monitorSlowQueries() {
// 使用数据库监控工具实时监控慢查询
// 例如:MySQL的Performance Schema, PostgreSQL的pg_stat_statements

scheduler.scheduleAtFixedRate(() -> {
List<SlowQuery> slowQueries = databaseService.getCurrentSlowQueries();

for (SlowQuery query : slowQueries) {
if (query.getExecutionTime() > 1000) {
alertManager.sendAlert("数据库慢查询",
"SQL: " + query.getSql() +
", 执行时间: " + query.getExecutionTime() + "ms");
}
}
}, 0, 10, TimeUnit.SECONDS);
}
}
}

8. 网络定位

8.1 网络慢请求

8.1.1 网络问题定位

网络慢请求定位

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
// 网络慢请求定位
public class NetworkSlowRequestLocation {

// 网络延迟分析
public class NetworkLatencyAnalysis {

public NetworkLatency analyzeLatency(String endpoint) {
NetworkLatency latency = new NetworkLatency();

// 1. DNS解析时间
latency.setDnsTime(measureDNSTime(endpoint));

// 2. TCP连接时间
latency.setTcpTime(measureTCPTime(endpoint));

// 3. TLS握手时间
latency.setTlsTime(measureTLSTime(endpoint));

// 4. 数据传输时间
latency.setTransferTime(measureTransferTime(endpoint));

return latency;
}

private long measureDNSTime(String endpoint) {
long startTime = System.currentTimeMillis();
InetAddress.getByName(endpoint);
return System.currentTimeMillis() - startTime;
}

private long measureTCPTime(String endpoint) {
// 测量TCP连接建立时间
long startTime = System.currentTimeMillis();
try (Socket socket = new Socket(endpoint, 80)) {
return System.currentTimeMillis() - startTime;
}
}
}

// 外部服务调用分析
public class ExternalServiceAnalysis {

public List<SlowExternalCall> findSlowExternalCalls() {
// 分析外部服务调用
return externalCallLogs.stream()
.filter(call -> call.getDuration() > 1000)
.collect(Collectors.toList());
}

public void analyzeExternalCall(SlowExternalCall call) {
// 分析外部服务调用慢的原因
// 1. 网络延迟
// 2. 服务端处理慢
// 3. 超时设置不合理

if (call.getNetworkTime() > call.getDuration() * 0.5) {
// 网络延迟是主要原因
logger.warn("外部服务调用慢,网络延迟: {}ms", call.getNetworkTime());
}
}
}
}

9. 实战案例

9.1 完整定位流程

9.1.1 慢请求定位案例

完整慢请求定位案例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
// 完整慢请求定位案例
public class CompleteSlowRequestLocationCase {

public SlowRequestLocationResult locateSlowRequest(String requestId) {
SlowRequestLocationResult result = new SlowRequestLocationResult();

// 1. 从监控系统获取请求信息
RequestInfo requestInfo = monitoringService.getRequestInfo(requestId);
result.setRequestInfo(requestInfo);

// 2. 分析请求时间分布
TimeDistribution distribution = analyzeTimeDistribution(requestInfo);
result.setTimeDistribution(distribution);

// 3. 识别瓶颈
List<Bottleneck> bottlenecks = identifyBottlenecks(distribution);
result.setBottlenecks(bottlenecks);

// 4. 深入分析瓶颈
for (Bottleneck bottleneck : bottlenecks) {
DetailedAnalysis analysis = analyzeBottleneck(bottleneck, requestInfo);
result.addDetailedAnalysis(analysis);
}

// 5. 生成优化建议
List<OptimizationSuggestion> suggestions = generateSuggestions(result);
result.setSuggestions(suggestions);

return result;
}

private TimeDistribution analyzeTimeDistribution(RequestInfo requestInfo) {
TimeDistribution distribution = new TimeDistribution();

distribution.setTotalTime(requestInfo.getDuration());
distribution.setDatabaseTime(requestInfo.getDatabaseTime());
distribution.setCacheTime(requestInfo.getCacheTime());
distribution.setExternalTime(requestInfo.getExternalTime());
distribution.setApplicationTime(requestInfo.getApplicationTime());

return distribution;
}

private List<Bottleneck> identifyBottlenecks(TimeDistribution distribution) {
List<Bottleneck> bottlenecks = new ArrayList<>();

// 数据库瓶颈
if (distribution.getDatabaseTime() > distribution.getTotalTime() * 0.5) {
bottlenecks.add(new Bottleneck("Database",
"数据库耗时: " + distribution.getDatabaseTime() + "ms"));
}

// 外部服务瓶颈
if (distribution.getExternalTime() > distribution.getTotalTime() * 0.3) {
bottlenecks.add(new Bottleneck("External Service",
"外部服务耗时: " + distribution.getExternalTime() + "ms"));
}

return bottlenecks;
}

private DetailedAnalysis analyzeBottleneck(Bottleneck bottleneck, RequestInfo requestInfo) {
DetailedAnalysis analysis = new DetailedAnalysis();
analysis.setBottleneck(bottleneck);

switch (bottleneck.getType()) {
case "Database":
analysis.setDetails(analyzeDatabaseBottleneck(requestInfo));
break;
case "External Service":
analysis.setDetails(analyzeExternalServiceBottleneck(requestInfo));
break;
}

return analysis;
}

private List<OptimizationSuggestion> generateSuggestions(SlowRequestLocationResult result) {
List<OptimizationSuggestion> suggestions = new ArrayList<>();

for (Bottleneck bottleneck : result.getBottlenecks()) {
switch (bottleneck.getType()) {
case "Database":
suggestions.add(new OptimizationSuggestion(
"优化数据库查询",
"检查慢查询日志,优化SQL,添加索引"
));
break;
case "External Service":
suggestions.add(new OptimizationSuggestion(
"优化外部服务调用",
"增加超时时间,使用异步调用,添加缓存"
));
break;
}
}

return suggestions;
}
}

10. 总结

10.1 核心要点

  1. 慢请求定义:根据业务场景定义慢请求阈值
  2. 监控告警:建立完善的监控和告警机制
  3. 日志分析:通过日志分析定位慢请求
  4. APM工具:使用APM工具快速定位问题
  5. 代码级定位:代码层面的性能分析
  6. 数据库定位:数据库慢查询定位
  7. 网络定位:网络慢请求定位

10.2 关键理解

  1. 多维度定位:从多个维度定位慢请求
  2. 工具辅助:使用专业工具提高定位效率
  3. 数据驱动:基于监控数据进行分析
  4. 持续优化:定位后持续优化
  5. 预防为主:建立预防机制,减少慢请求

10.3 最佳实践

  1. 完善监控:建立完善的监控体系
  2. 及时告警:设置合理的告警阈值
  3. 日志规范:规范日志格式,便于分析
  4. 工具使用:熟练使用APM等工具
  5. 定期分析:定期分析慢请求,持续优化
  6. 知识沉淀:将定位经验沉淀为知识库

相关文章