# XXL-JOB架构源码解析
XXL开源社区 (opens new window)|Gitee (opens new window)|Github (opens new window)|总体设计 (opens new window)
# 架构图
# 故障转移 & 失败重试
一次完整任务流程包括”调度(调度中心) + 执行(执行器)”两个阶段。
- “故障转移”发生在调度阶段,在执行器集群部署时,如果某一台执行器发生故障,该策略支持自动进行Failover切换到一台正常的执行器机器并且完成调度请求流程。
- “失败重试”发生在”调度 + 执行”两个阶段,支持通过自定义任务失败重试次数,当任务失败时将会按照预设的失败重试次数主动进行重试;
# 失败任务线程 源码分析
/**
* job monitor instance
*
* @author xuxueli 2015-9-1 18:05:56
*/
public class JobFailMonitorHelper {
private static Logger logger = LoggerFactory.getLogger(JobFailMonitorHelper.class);
private static JobFailMonitorHelper instance = new JobFailMonitorHelper();
public static JobFailMonitorHelper getInstance(){
return instance;
}
// ---------------------- monitor ----------------------
private Thread monitorThread;
private volatile boolean toStop = false;
public void start(){
monitorThread = new Thread(new Runnable() {
@Override
public void run() {
// monitor
while (!toStop) {
try {
List<Long> failLogIds = XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().findFailJobLogIds(1000);
if (failLogIds!=null && !failLogIds.isEmpty()) {
for (long failLogId: failLogIds) {
// lock log
int lockRet = XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().updateAlarmStatus(failLogId, 0, -1);
if (lockRet < 1) {
continue;
}
XxlJobLog log = XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().load(failLogId);
XxlJobInfo info = XxlJobAdminConfig.getAdminConfig().getXxlJobInfoDao().loadById(log.getJobId());
// 1、fail retry monitor
if (log.getExecutorFailRetryCount() > 0) {
JobTriggerPoolHelper.trigger(log.getJobId(), TriggerTypeEnum.RETRY, (log.getExecutorFailRetryCount()-1), log.getExecutorShardingParam(), log.getExecutorParam(), null);
String retryMsg = "<br><br><span style=\"color:#F39C12;\" > >>>>>>>>>>>"+ I18nUtil.getString("jobconf_trigger_type_retry") +"<<<<<<<<<<< </span><br>";
log.setTriggerMsg(log.getTriggerMsg() + retryMsg);
XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().updateTriggerInfo(log);
}
// 2、fail alarm monitor
int newAlarmStatus = 0; // 告警状态:0-默认、-1=锁定状态、1-无需告警、2-告警成功、3-告警失败
if (info != null) {
boolean alarmResult = XxlJobAdminConfig.getAdminConfig().getJobAlarmer().alarm(info, log);
newAlarmStatus = alarmResult?2:3;
} else {
newAlarmStatus = 1;
}
XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().updateAlarmStatus(failLogId, -1, newAlarmStatus);
}
}
} catch (Exception e) {
if (!toStop) {
logger.error(">>>>>>>>>>> xxl-job, job fail monitor thread error:{}", e);
}
}
try {
TimeUnit.SECONDS.sleep(10);
} catch (Exception e) {
if (!toStop) {
logger.error(e.getMessage(), e);
}
}
}
logger.info(">>>>>>>>>>> xxl-job, job fail monitor thread stop");
}
});
monitorThread.setDaemon(true);
monitorThread.setName("xxl-job, admin JobFailMonitorHelper");
monitorThread.start();
}
public void toStop(){
toStop = true;
// interrupt and wait
monitorThread.interrupt();
try {
monitorThread.join();
} catch (InterruptedException e) {
logger.error(e.getMessage(), e);
}
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
- 从
xxl_job_log
表获取1000
条未处理的失败日志; - 通过乐观锁获取日志进行操作,如果获取失败,则直接跳过本条日志,继续下一条;
- 判断是否还有剩余失败重试次数,有则重试,并将次数-1;
- 对于本次失败的日志,触发邮件报警;
- 乐观锁更新当前失败日志的锁定状态。
知识点
这里可以使用Spring
的getBeansOfType
实现接口多实现类的动态调用,满足失败告警同时可以发送邮件,短信,钉钉消息等的功能实现。
@Component
public class JobAlarmer implements ApplicationContextAware, InitializingBean {
private static Logger logger = LoggerFactory.getLogger(JobAlarmer.class);
private ApplicationContext applicationContext;
private List<JobAlarm> jobAlarmList;
@Override
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
this.applicationContext = applicationContext;
}
@Override
public void afterPropertiesSet() throws Exception {
Map<String, JobAlarm> serviceBeanMap = applicationContext.getBeansOfType(JobAlarm.class);
if (serviceBeanMap != null && serviceBeanMap.size() > 0) {
jobAlarmList = new ArrayList<JobAlarm>(serviceBeanMap.values());
}
}
/**
* job alarm
*
* @param info
* @param jobLog
* @return
*/
public boolean alarm(XxlJobInfo info, XxlJobLog jobLog) {
boolean result = false;
if (jobAlarmList!=null && jobAlarmList.size()>0) {
result = true; // success means all-success
for (JobAlarm alarm: jobAlarmList) {
boolean resultItem = false;
try {
resultItem = alarm.doAlarm(info, jobLog);
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
if (!resultItem) {
result = false;
}
}
}
return result;
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49