Skip to content

Commit 66486d4

Browse files
committed
BUG-ID: CS-19697: Deal with possible DB deadlock issue, return the job into queue for next turn
1 parent 66bece5 commit 66486d4

2 files changed

Lines changed: 34 additions & 8 deletions

File tree

framework/ipc/src/org/apache/cloudstack/framework/messagebus/MessageDetector.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,10 @@ public MessageDetector() {
3131
}
3232

3333
public boolean waitAny(long timeoutInMiliseconds) {
34-
_signalled = false;
3534
synchronized (this) {
35+
if (_signalled)
36+
return true;
37+
3638
try {
3739
wait(timeoutInMiliseconds);
3840
} catch (InterruptedException e) {

framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -445,9 +445,9 @@ public void run() {
445445
CallContext.registerPlaceHolderContext();
446446

447447
if (job.getRelated() != null && !job.getRelated().isEmpty())
448-
NDC.push("Job-" + job.getRelated() + "/" + "Job-" + job.getId());
448+
NDC.push("job-" + job.getRelated() + "/" + "job-" + job.getId());
449449
else
450-
NDC.push("Job-" + job.getId());
450+
NDC.push("job-" + job.getId());
451451
try {
452452
super.run();
453453
} finally {
@@ -560,17 +560,41 @@ private void executeQueueItem(SyncQueueItemVO item, boolean fromPreviousSession)
560560

561561
job.setSyncSource(item);
562562

563-
job.setExecutingMsid(getMsid());
564-
_jobDao.update(job.getId(), job);
563+
//
564+
// TODO: a temporary solution to work-around DB deadlock situation
565+
//
566+
// to live with DB deadlocks, we will give a chance for job to be rescheduled
567+
// in case of exceptions (most-likely DB deadlock exceptions)
568+
try {
569+
job.setExecutingMsid(getMsid());
570+
_jobDao.update(job.getId(), job);
571+
} catch (Exception e) {
572+
s_logger.warn("Unexpected exception while dispatching job-" + item.getContentId(), e);
573+
574+
try {
575+
_queueMgr.returnItem(item.getId());
576+
} catch (Throwable thr) {
577+
s_logger.error("Unexpected exception while returning job-" + item.getContentId() + " to queue", thr);
578+
}
579+
}
565580

566581
try {
567582
scheduleExecution(job);
568583
} catch (RejectedExecutionException e) {
569584
s_logger.warn("Execution for job-" + job.getId() + " is rejected, return it to the queue for next turn");
570-
_queueMgr.returnItem(item.getId());
571585

572-
job.setExecutingMsid(null);
573-
_jobDao.update(job.getId(), job);
586+
try {
587+
_queueMgr.returnItem(item.getId());
588+
} catch (Exception e2) {
589+
s_logger.error("Unexpected exception while returning job-" + item.getContentId() + " to queue", e2);
590+
}
591+
592+
try {
593+
job.setExecutingMsid(null);
594+
_jobDao.update(job.getId(), job);
595+
} catch (Exception e3) {
596+
s_logger.warn("Unexpected exception while update job-" + item.getContentId() + " msid for bookkeeping");
597+
}
574598
}
575599

576600
} else {

0 commit comments

Comments
 (0)