feat: Add sidekiq jobs to monitor applied SLAs (#8828)

Fixes: https://linear.app/chatwoot/issue/CW-2983/sidekiq-jobservice-to-monitor-sla-breach

Co-authored-by: Sojan <sojan@pepalo.com>
This commit is contained in:
Vishnu Narayanan
2024-02-07 23:14:56 +05:30
committed by GitHub
parent 98eddd0532
commit c1d07a5471
17 changed files with 371 additions and 29 deletions

View File

@@ -24,3 +24,5 @@ class TriggerScheduledItemsJob < ApplicationJob
Notification::RemoveOldNotificationJob.perform_later
end
end
TriggerScheduledItemsJob.prepend_mod_with('TriggerScheduledItemsJob')

View File

@@ -0,0 +1,6 @@
class ChangeAppliedSlaSlaStatusToEnum < ActiveRecord::Migration[7.0]
def change
remove_column :applied_slas, :sla_status, :string
add_column :applied_slas, :sla_status, :integer, default: 0
end
end

View File

@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.0].define(version: 2024_01_31_040316) do
ActiveRecord::Schema[7.0].define(version: 2024_02_07_103014) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_stat_statements"
enable_extension "pg_trgm"
@@ -119,9 +119,9 @@ ActiveRecord::Schema[7.0].define(version: 2024_01_31_040316) do
t.bigint "account_id", null: false
t.bigint "sla_policy_id", null: false
t.bigint "conversation_id", null: false
t.string "sla_status"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.integer "sla_status", default: 0
t.index ["account_id"], name: "index_applied_slas_on_account_id"
t.index ["conversation_id"], name: "index_applied_slas_on_conversation_id"
t.index ["sla_policy_id"], name: "index_applied_slas_on_sla_policy_id"

View File

@@ -0,0 +1,11 @@
module Enterprise::TriggerScheduledItemsJob
def perform
super
## Triggers Enterprise specific jobs
####################################
# Triggers Account Sla jobs
Sla::TriggerSlasForAccountsJob.perform_later
end
end

View File

@@ -0,0 +1,9 @@
class Sla::ProcessAccountAppliedSlasJob < ApplicationJob
queue_as :medium
def perform(account)
account.applied_slas.where(sla_status: 'active').each do |applied_sla|
Sla::ProcessAppliedSlaJob.perform_later(applied_sla)
end
end
end

View File

@@ -0,0 +1,7 @@
class Sla::ProcessAppliedSlaJob < ApplicationJob
queue_as :medium
def perform(applied_sla)
Sla::EvaluateAppliedSlaService.new(applied_sla: applied_sla).perform
end
end

View File

@@ -0,0 +1,10 @@
class Sla::TriggerSlasForAccountsJob < ApplicationJob
queue_as :scheduled_jobs
def perform
Account.find_each do |account|
Rails.logger.info "Enqueuing ProcessAccountAppliedSlasJob for account #{account.id}"
Sla::ProcessAccountAppliedSlasJob.perform_later(account)
end
end
end

View File

@@ -3,7 +3,7 @@
# Table name: applied_slas
#
# id :bigint not null, primary key
# sla_status :string
# sla_status :integer default("active")
# created_at :datetime not null
# updated_at :datetime not null
# account_id :bigint not null
@@ -20,4 +20,6 @@ class AppliedSla < ApplicationRecord
belongs_to :account
belongs_to :sla_policy
belongs_to :conversation
enum sla_status: { active: 0, hit: 1, missed: 2 }
end

View File

@@ -3,6 +3,7 @@ module Enterprise::Concerns::Account
included do
has_many :sla_policies, dependent: :destroy_async
has_many :applied_slas, dependent: :destroy_async
def self.add_response_related_associations
has_many :response_sources, dependent: :destroy_async

View File

@@ -0,0 +1,78 @@
class Sla::EvaluateAppliedSlaService
pattr_initialize [:applied_sla!]
def perform
check_sla_thresholds
# We will calculate again in the next iteration
return unless applied_sla.conversation.resolved?
# No SLA missed, so marking as hit as conversation is resolved
handle_hit_sla(applied_sla) if applied_sla.active?
end
private
def check_sla_thresholds
[:first_response_time_threshold, :next_response_time_threshold, :resolution_time_threshold].each do |threshold|
next if applied_sla.sla_policy.send(threshold).blank?
send("check_#{threshold}", applied_sla, applied_sla.conversation, applied_sla.sla_policy)
end
end
def still_within_threshold?(threshold)
Time.zone.now.to_i < threshold
end
def check_first_response_time_threshold(applied_sla, conversation, sla_policy)
threshold = conversation.created_at.to_i + sla_policy.first_response_time_threshold.to_i
return if first_reply_was_within_threshold?(conversation, threshold)
return if still_within_threshold?(threshold)
handle_missed_sla(applied_sla)
end
def first_reply_was_within_threshold?(conversation, threshold)
conversation.first_reply_created_at.present? && conversation.first_reply_created_at.to_i <= threshold
end
def check_next_response_time_threshold(applied_sla, conversation, sla_policy)
# still waiting for first reply, so covered under first response time threshold
return if conversation.first_reply_created_at.blank?
# Waiting on customer response, no need to check next response time threshold
return if conversation.waiting_since.blank?
threshold = conversation.waiting_since.to_i + sla_policy.next_response_time_threshold.to_i
return if still_within_threshold?(threshold)
handle_missed_sla(applied_sla)
end
def check_resolution_time_threshold(applied_sla, conversation, sla_policy)
return if conversation.resolved?
threshold = conversation.created_at.to_i + sla_policy.resolution_time_threshold.to_i
return if still_within_threshold?(threshold)
handle_missed_sla(applied_sla)
end
def handle_missed_sla(applied_sla)
return unless applied_sla.active?
applied_sla.update!(sla_status: 'missed')
Rails.logger.warn "SLA missed for conversation #{applied_sla.conversation.id} " \
"in account #{applied_sla.account_id} " \
"for sla_policy #{applied_sla.sla_policy.id}"
end
def handle_hit_sla(applied_sla)
return unless applied_sla.active?
applied_sla.update!(sla_status: 'hit')
Rails.logger.info "SLA hit for conversation #{applied_sla.conversation.id} " \
"in account #{applied_sla.account_id} " \
"for sla_policy #{applied_sla.sla_policy.id}"
end
end

View File

@@ -0,0 +1,10 @@
require 'rails_helper'
RSpec.describe TriggerScheduledItemsJob do
subject(:job) { described_class.perform_later }
it 'triggers Sla::TriggerSlasForAccountsJob' do
expect(Sla::TriggerSlasForAccountsJob).to receive(:perform_later).once
described_class.perform_now
end
end

View File

@@ -0,0 +1,27 @@
require 'rails_helper'
RSpec.describe Sla::ProcessAccountAppliedSlasJob do
context 'when perform is called' do
let!(:account) { create(:account) }
let!(:sla_policy) { create(:sla_policy, first_response_time_threshold: 1.hour) }
let!(:applied_sla) { create(:applied_sla, account: account, sla_policy: sla_policy, sla_status: 'active') }
let!(:hit_applied_sla) { create(:applied_sla, account: account, sla_policy: sla_policy, sla_status: 'hit') }
let!(:miss_applied_sla) { create(:applied_sla, account: account, sla_policy: sla_policy, sla_status: 'missed') }
it 'enqueues the job' do
expect { described_class.perform_later }.to have_enqueued_job(described_class)
.on_queue('medium')
end
it 'calls the ProcessAppliedSlaJob' do
expect(Sla::ProcessAppliedSlaJob).to receive(:perform_later).with(applied_sla).and_call_original
described_class.perform_now(account)
end
it 'does not call the ProcessAppliedSlaJob for not active applied slas' do
expect(Sla::ProcessAppliedSlaJob).not_to receive(:perform_later).with(hit_applied_sla)
expect(Sla::ProcessAppliedSlaJob).not_to receive(:perform_later).with(miss_applied_sla)
described_class.perform_now(account)
end
end
end

View File

@@ -0,0 +1,18 @@
require 'rails_helper'
RSpec.describe Sla::ProcessAppliedSlaJob do
context 'when perform is called' do
let(:account) { create(:account) }
it 'enqueues the job' do
expect { described_class.perform_later }.to have_enqueued_job(described_class)
.on_queue('medium')
end
it 'calls the EvaluateAppliedSlaService' do
applied_sla = create(:applied_sla)
expect(Sla::EvaluateAppliedSlaService).to receive(:new).with(applied_sla: applied_sla).and_call_original
described_class.perform_now(applied_sla)
end
end
end

View File

@@ -0,0 +1,17 @@
require 'rails_helper'
RSpec.describe Sla::TriggerSlasForAccountsJob do
context 'when perform is called' do
let(:account) { create(:account) }
it 'enqueues the job' do
expect { described_class.perform_later }.to have_enqueued_job(described_class)
.on_queue('scheduled_jobs')
end
it 'calls the ProcessAccountAppliedSlasJob' do
expect(Sla::ProcessAccountAppliedSlasJob).to receive(:perform_later).with(account).and_call_original
described_class.perform_now
end
end
end

View File

@@ -0,0 +1,141 @@
require 'rails_helper'
RSpec.describe Sla::EvaluateAppliedSlaService do
let!(:conversation) { create(:conversation, created_at: 6.hours.ago) }
let!(:sla_policy) do
create(:sla_policy, account: conversation.account,
first_response_time_threshold: nil,
next_response_time_threshold: nil,
resolution_time_threshold: nil)
end
let!(:applied_sla) { create(:applied_sla, conversation: conversation, sla_policy: sla_policy, sla_status: 'active') }
describe '#perform - SLA misses' do
context 'when first response SLA is missed' do
before { sla_policy.update(first_response_time_threshold: 1.hour) }
it 'updates the SLA status to missed and logs a warning' do
allow(Rails.logger).to receive(:warn)
described_class.new(applied_sla: applied_sla).perform
expect(Rails.logger).to have_received(:warn).with("SLA missed for conversation #{conversation.id} in account " \
"#{applied_sla.account_id} for sla_policy #{sla_policy.id}")
expect(applied_sla.reload.sla_status).to eq('missed')
end
end
context 'when next response SLA is missed' do
before do
sla_policy.update(next_response_time_threshold: 1.hour)
conversation.update(first_reply_created_at: 5.hours.ago, waiting_since: 5.hours.ago)
end
it 'updates the SLA status to missed and logs a warning' do
allow(Rails.logger).to receive(:warn)
described_class.new(applied_sla: applied_sla).perform
expect(Rails.logger).to have_received(:warn).with("SLA missed for conversation #{conversation.id} in account " \
"#{applied_sla.account_id} for sla_policy #{sla_policy.id}")
expect(applied_sla.reload.sla_status).to eq('missed')
end
end
context 'when resolution time SLA is missed' do
before { sla_policy.update(resolution_time_threshold: 1.hour) }
it 'updates the SLA status to missed and logs a warning' do
allow(Rails.logger).to receive(:warn)
described_class.new(applied_sla: applied_sla).perform
expect(Rails.logger).to have_received(:warn).with("SLA missed for conversation #{conversation.id} in account " \
"#{applied_sla.account_id} for sla_policy #{sla_policy.id}")
expect(applied_sla.reload.sla_status).to eq('missed')
end
end
# We will mark resolved miss only if while processing the SLA
# if the conversation is resolved and the resolution time is missed by small margins then we will not mark it as missed
context 'when resolved conversation with resolution time SLA is missed' do
before do
conversation.resolved!
sla_policy.update(resolution_time_threshold: 1.hour)
end
it 'does not update the SLA status to missed' do
described_class.new(applied_sla: applied_sla).perform
expect(applied_sla.reload.sla_status).to eq('hit')
end
end
context 'when multiple SLAs are missed' do
before do
sla_policy.update(first_response_time_threshold: 1.hour, next_response_time_threshold: 1.hour, resolution_time_threshold: 1.hour)
conversation.update(first_reply_created_at: 5.hours.ago, waiting_since: 5.hours.ago)
end
it 'updates the SLA status to missed and logs a warning' do
allow(Rails.logger).to receive(:warn)
described_class.new(applied_sla: applied_sla).perform
expect(Rails.logger).to have_received(:warn).with("SLA missed for conversation #{conversation.id} in account " \
"#{applied_sla.account_id} for sla_policy #{sla_policy.id}").exactly(1).time
expect(applied_sla.reload.sla_status).to eq('missed')
end
end
end
describe '#perform - SLA hits' do
context 'when first response SLA is hit' do
before do
sla_policy.update(first_response_time_threshold: 6.hours)
conversation.update(first_reply_created_at: 30.minutes.ago)
end
it 'sla remains active until conversation is resolved' do
described_class.new(applied_sla: applied_sla).perform
expect(applied_sla.reload.sla_status).to eq('active')
end
it 'updates the SLA status to hit and logs an info when conversations is resolved' do
conversation.resolved!
allow(Rails.logger).to receive(:info)
described_class.new(applied_sla: applied_sla).perform
expect(Rails.logger).to have_received(:info).with("SLA hit for conversation #{conversation.id} in account " \
"#{applied_sla.account_id} for sla_policy #{sla_policy.id}")
expect(applied_sla.reload.sla_status).to eq('hit')
end
end
context 'when next response SLA is hit' do
before do
sla_policy.update(next_response_time_threshold: 6.hours)
conversation.update(first_reply_created_at: 30.minutes.ago, waiting_since: nil)
end
it 'sla remains active until conversation is resolved' do
described_class.new(applied_sla: applied_sla).perform
expect(applied_sla.reload.sla_status).to eq('active')
end
it 'updates the SLA status to hit and logs an info when conversations is resolved' do
conversation.resolved!
allow(Rails.logger).to receive(:info)
described_class.new(applied_sla: applied_sla).perform
expect(Rails.logger).to have_received(:info).with("SLA hit for conversation #{conversation.id} in account " \
"#{applied_sla.account_id} for sla_policy #{sla_policy.id}")
expect(applied_sla.reload.sla_status).to eq('hit')
end
end
context 'when resolution time SLA is hit' do
before do
sla_policy.update(resolution_time_threshold: 8.hours)
conversation.resolved!
end
it 'updates the SLA status to hit and logs an info' do
allow(Rails.logger).to receive(:info)
described_class.new(applied_sla: applied_sla).perform
expect(Rails.logger).to have_received(:info).with("SLA hit for conversation #{conversation.id} in account " \
"#{applied_sla.account_id} for sla_policy #{sla_policy.id}")
expect(applied_sla.reload.sla_status).to eq('hit')
end
end
end
end

View File

@@ -1,8 +1,11 @@
FactoryBot.define do
factory :applied_sla do
account
sla_policy
conversation
sla_status { 'active' }
after(:build) do |applied_sla|
applied_sla.account ||= applied_sla.conversation&.account || create(:account)
end
end
end

View File

@@ -10,6 +10,31 @@ RSpec.describe TriggerScheduledItemsJob do
.on_queue('scheduled_jobs')
end
it 'triggers Conversations::ReopenSnoozedConversationsJob' do
expect(Conversations::ReopenSnoozedConversationsJob).to receive(:perform_later).once
described_class.perform_now
end
it 'triggers Notification::ReopenSnoozedNotificationsJob' do
expect(Notification::ReopenSnoozedNotificationsJob).to receive(:perform_later).once
described_class.perform_now
end
it 'triggers Account::ConversationsResolutionSchedulerJob' do
expect(Account::ConversationsResolutionSchedulerJob).to receive(:perform_later).once
described_class.perform_now
end
it 'triggers Channels::Whatsapp::TemplatesSyncSchedulerJob' do
expect(Channels::Whatsapp::TemplatesSyncSchedulerJob).to receive(:perform_later).once
described_class.perform_now
end
it 'triggers Notification::RemoveOldNotificationJob' do
expect(Notification::RemoveOldNotificationJob).to receive(:perform_later).once
described_class.perform_now
end
context 'when unexecuted Scheduled campaign jobs' do
let!(:twilio_sms) { create(:channel_twilio_sms) }
let!(:twilio_inbox) { create(:inbox, channel: twilio_sms) }
@@ -20,30 +45,5 @@ RSpec.describe TriggerScheduledItemsJob do
expect(Campaigns::TriggerOneoffCampaignJob).to receive(:perform_later).with(campaign).once
described_class.perform_now
end
it 'triggers Conversations::ReopenSnoozedConversationsJob' do
expect(Conversations::ReopenSnoozedConversationsJob).to receive(:perform_later).once
described_class.perform_now
end
it 'triggers Notification::ReopenSnoozedNotificationsJob' do
expect(Notification::ReopenSnoozedNotificationsJob).to receive(:perform_later).once
described_class.perform_now
end
it 'triggers Account::ConversationsResolutionSchedulerJob' do
expect(Account::ConversationsResolutionSchedulerJob).to receive(:perform_later).once
described_class.perform_now
end
it 'triggers Channels::Whatsapp::TemplatesSyncSchedulerJob' do
expect(Channels::Whatsapp::TemplatesSyncSchedulerJob).to receive(:perform_later).once
described_class.perform_now
end
it 'triggers Notification::RemoveOldNotificationJob' do
expect(Notification::RemoveOldNotificationJob).to receive(:perform_later).once
described_class.perform_now
end
end
end