1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
|
@Service @Slf4j public class JenkinsMonitorService { @Value("${jenkins.url}") private String jenkinsUrl; @Value("${jenkins.username}") private String jenkinsUsername; @Value("${jenkins.password}") private String jenkinsPassword; private JenkinsServer jenkinsServer; @PostConstruct public void init() { try { jenkinsServer = new JenkinsServer( new URI(jenkinsUrl), jenkinsUsername, jenkinsPassword ); log.info("Jenkins客户端初始化成功"); } catch (Exception e) { log.error("Jenkins客户端初始化失败", e); } }
@Scheduled(fixedRate = 60000) public void monitorJenkinsStatus() { try { if (!jenkinsServer.isRunning()) { log.error("Jenkins服务未运行"); sendAlert("Jenkins服务未运行", "critical"); return; } ComputerSet computerSet = jenkinsServer.getComputerSet(); int totalExecutors = computerSet.getTotalExecutors(); int busyExecutors = computerSet.getBusyExecutors(); int idleExecutors = totalExecutors - busyExecutors; log.info("Jenkins执行器状态: 总数={}, 忙碌={}, 空闲={}", totalExecutors, busyExecutors, idleExecutors); recordMetrics("jenkins.executors.total", totalExecutors); recordMetrics("jenkins.executors.busy", busyExecutors); recordMetrics("jenkins.executors.idle", idleExecutors); double usageRate = (double) busyExecutors / totalExecutors; if (usageRate > 0.8) { log.warn("Jenkins执行器使用率过高: {}%", usageRate * 100); sendAlert("Jenkins执行器使用率过高", "warning"); } Queue queue = jenkinsServer.getQueue(); int queueLength = queue.getItems().size(); log.info("Jenkins构建队列长度: {}", queueLength); recordMetrics("jenkins.queue.length", queueLength); if (queueLength > 10) { log.warn("Jenkins构建队列过长: {}", queueLength); sendAlert("Jenkins构建队列过长", "warning"); } } catch (Exception e) { log.error("监控Jenkins状态失败", e); } }
@Scheduled(fixedRate = 120000) public void monitorBuildJobs() { try { Map<String, Job> jobs = jenkinsServer.getJobs(); int totalJobs = jobs.size(); int successJobs = 0; int failedJobs = 0; int unstableJobs = 0; for (Map.Entry<String, Job> entry : jobs.entrySet()) { String jobName = entry.getKey(); Job job = entry.getValue(); Build lastBuild = job.getLastBuild(); if (lastBuild != null) { BuildResult result = lastBuild.details().getResult(); if (result == BuildResult.SUCCESS) { successJobs++; } else if (result == BuildResult.FAILURE) { failedJobs++; log.warn("构建失败: {}", jobName); } else if (result == BuildResult.UNSTABLE) { unstableJobs++; log.warn("构建不稳定: {}", jobName); } long duration = lastBuild.details().getDuration(); recordMetrics("jenkins.build.duration." + jobName, duration); } } log.info("Jenkins任务统计: 总数={}, 成功={}, 失败={}, 不稳定={}", totalJobs, successJobs, failedJobs, unstableJobs); recordMetrics("jenkins.jobs.total", totalJobs); recordMetrics("jenkins.jobs.success", successJobs); recordMetrics("jenkins.jobs.failed", failedJobs); recordMetrics("jenkins.jobs.unstable", unstableJobs); double failureRate = (double) failedJobs / totalJobs; if (failureRate > 0.1) { log.error("Jenkins构建失败率过高: {}%", failureRate * 100); sendAlert("Jenkins构建失败率过高", "critical"); } } catch (Exception e) { log.error("监控构建任务失败", e); } }
@Scheduled(fixedRate = 180000) public void monitorNodes() { try { Map<String, Computer> computers = jenkinsServer.getComputers(); int totalNodes = computers.size(); int onlineNodes = 0; int offlineNodes = 0; for (Map.Entry<String, Computer> entry : computers.entrySet()) { String nodeName = entry.getKey(); Computer computer = entry.getValue(); ComputerWithDetails details = computer.details(); if (details.isOffline()) { offlineNodes++; log.warn("Jenkins节点离线: {}", nodeName); sendAlert("Jenkins节点离线: " + nodeName, "warning"); } else { onlineNodes++; } recordMetrics("jenkins.node.executors." + nodeName, details.getNumExecutors()); recordMetrics("jenkins.node.busy." + nodeName, details.getExecutors().size()); } log.info("Jenkins节点统计: 总数={}, 在线={}, 离线={}", totalNodes, onlineNodes, offlineNodes); recordMetrics("jenkins.nodes.total", totalNodes); recordMetrics("jenkins.nodes.online", onlineNodes); recordMetrics("jenkins.nodes.offline", offlineNodes); } catch (Exception e) { log.error("监控节点状态失败", e); } }
private void recordMetrics(String metricName, Number value) { log.debug("记录指标: {}={}", metricName, value); }
private void sendAlert(String message, String level) { log.info("发送告警: message={}, level={}", message, level); } }
|