From ab93821d2b210d60d7cd6b0cd1ff0410458c2cf7 Mon Sep 17 00:00:00 2001 From: Sojan Jose Date: Mon, 2 Mar 2026 02:18:29 -0800 Subject: [PATCH] fix(agent-bot): stabilize webhook delivery for transient upstream failures (#13521) This fixes the agent-bot webhook delivery path so transient upstream failures follow the expected delivery lifecycle. Existing fallback behavior is preserved, and fallback actions are applied only after delivery attempts are exhausted. To reproduce, configure an agent-bot webhook endpoint to return 429/500 for message events. Before this fix, failure handling could be applied too early; after this fix, delivery attempts complete first and then existing fallback handling runs. Tested with: - bundle exec rspec spec/jobs/agent_bots/webhook_job_spec.rb spec/lib/webhooks/trigger_spec.rb - bundle exec rubocop spec/jobs/agent_bots/webhook_job_spec.rb spec/lib/webhooks/trigger_spec.rb --------- Co-authored-by: Muhsin Keloth --- app/jobs/agent_bots/webhook_job.rb | 7 ++++ lib/webhooks/trigger.rb | 12 +++++- spec/jobs/agent_bots/webhook_job_spec.rb | 29 ++++++++++++++ spec/lib/webhooks/trigger_spec.rb | 50 ++++++++++++++++++++++++ 4 files changed, 96 insertions(+), 2 deletions(-) diff --git a/app/jobs/agent_bots/webhook_job.rb b/app/jobs/agent_bots/webhook_job.rb index b3a3d6cc1..2786ce70e 100644 --- a/app/jobs/agent_bots/webhook_job.rb +++ b/app/jobs/agent_bots/webhook_job.rb @@ -1,7 +1,14 @@ class AgentBots::WebhookJob < WebhookJob queue_as :high + retry_on RestClient::TooManyRequests, RestClient::InternalServerError, wait: 3.seconds, attempts: 3 do |job, error| + url, payload, webhook_type = job.arguments + Webhooks::Trigger.new(url, payload, webhook_type || :agent_bot_webhook).handle_failure(error) + end def perform(url, payload, webhook_type = :agent_bot_webhook) super(url, payload, webhook_type) + rescue RestClient::TooManyRequests, RestClient::InternalServerError => e + Rails.logger.warn("[AgentBots::WebhookJob] attempt #{executions} failed #{e.class.name}") + raise end end diff --git a/lib/webhooks/trigger.rb b/lib/webhooks/trigger.rb index 456e186ca..7cb15c836 100644 --- a/lib/webhooks/trigger.rb +++ b/lib/webhooks/trigger.rb @@ -15,9 +15,17 @@ class Webhooks::Trigger def execute perform_request + rescue RestClient::TooManyRequests, RestClient::InternalServerError => e + raise if @webhook_type == :agent_bot_webhook + + handle_failure(e) rescue StandardError => e - handle_error(e) - Rails.logger.warn "Exception: Invalid webhook URL #{@url} : #{e.message}" + handle_failure(e) + end + + def handle_failure(error) + handle_error(error) + Rails.logger.warn "Exception: Invalid webhook URL #{@url} : #{error.message}" end private diff --git a/spec/jobs/agent_bots/webhook_job_spec.rb b/spec/jobs/agent_bots/webhook_job_spec.rb index 346d85e83..c14c46cb3 100644 --- a/spec/jobs/agent_bots/webhook_job_spec.rb +++ b/spec/jobs/agent_bots/webhook_job_spec.rb @@ -8,6 +8,16 @@ RSpec.describe AgentBots::WebhookJob do let(:url) { 'https://test.com' } let(:payload) { { name: 'test' } } let(:webhook_type) { :agent_bot_webhook } + let(:retryable_error) { RestClient::InternalServerError.new(nil, 500) } + + before do + ActiveJob::Base.queue_adapter = :test + end + + after do + clear_enqueued_jobs + clear_performed_jobs + end it 'queues the job' do expect { job }.to have_enqueued_job(described_class) @@ -19,4 +29,23 @@ RSpec.describe AgentBots::WebhookJob do expect(Webhooks::Trigger).to receive(:execute).with(url, payload, webhook_type, secret: nil, delivery_id: nil) perform_enqueued_jobs { job } end + + it 'configures retry handlers for 429 and 500 errors' do + handlers = described_class.rescue_handlers.map(&:first) + + expect(handlers).to include('RestClient::TooManyRequests', 'RestClient::InternalServerError') + end + + it 'retries 3 times and handles failure after retries are exhausted' do + allow(Webhooks::Trigger).to receive(:execute).and_raise(retryable_error) + trigger_instance = instance_double(Webhooks::Trigger, handle_failure: true) + allow(Webhooks::Trigger).to receive(:new).and_return(trigger_instance) + allow(Rails.logger).to receive(:warn) + + expect(Webhooks::Trigger).to receive(:execute).exactly(3).times + expect(trigger_instance).to receive(:handle_failure).with(instance_of(RestClient::InternalServerError)).once + expect(Rails.logger).to receive(:warn).with(/AgentBots::WebhookJob/).exactly(3).times + + perform_enqueued_jobs { job } + end end diff --git a/spec/lib/webhooks/trigger_spec.rb b/spec/lib/webhooks/trigger_spec.rb index 1e047b557..90d1ce7f8 100644 --- a/spec/lib/webhooks/trigger_spec.rb +++ b/spec/lib/webhooks/trigger_spec.rb @@ -77,6 +77,40 @@ describe Webhooks::Trigger do let!(:pending_conversation) { create(:conversation, inbox: inbox, status: :pending, account: account) } let!(:pending_message) { create(:message, account: account, inbox: inbox, conversation: pending_conversation) } + it 'raises 500 errors for retry and does not reopen conversation immediately' do + payload = { event: 'message_created', id: pending_message.id } + + expect(RestClient::Request).to receive(:execute) + .with( + method: :post, + url: url, + payload: payload.to_json, + headers: { content_type: :json, accept: :json }, + timeout: webhook_timeout + ).and_raise(RestClient::InternalServerError.new(nil, 500)).once + + expect { trigger.execute(url, payload, webhook_type) }.to raise_error(RestClient::InternalServerError) + expect(pending_conversation.reload.status).to eq('pending') + expect(Conversations::ActivityMessageJob).not_to have_been_enqueued + end + + it 'raises 429 errors for retry and does not reopen conversation immediately' do + payload = { event: 'message_created', id: pending_message.id } + + expect(RestClient::Request).to receive(:execute) + .with( + method: :post, + url: url, + payload: payload.to_json, + headers: { content_type: :json, accept: :json }, + timeout: webhook_timeout + ).and_raise(RestClient::TooManyRequests.new(nil, 429)).once + + expect { trigger.execute(url, payload, webhook_type) }.to raise_error(RestClient::TooManyRequests) + expect(pending_conversation.reload.status).to eq('pending') + expect(Conversations::ActivityMessageJob).not_to have_been_enqueued + end + it 'reopens conversation and enqueues activity message if pending' do payload = { event: 'message_created', id: pending_message.id } @@ -166,6 +200,22 @@ describe Webhooks::Trigger do expect(activity_message.content).to eq(agent_bot_error_content) end end + + it 'handles 500 without raising for non-agent webhooks' do + payload = { event: 'message_created', conversation: { id: conversation.id }, id: message.id } + + expect(RestClient::Request).to receive(:execute) + .with( + method: :post, + url: url, + payload: payload.to_json, + headers: { content_type: :json, accept: :json }, + timeout: webhook_timeout + ).and_raise(RestClient::InternalServerError.new(nil, 500)).once + + expect { trigger.execute(url, payload, webhook_type) }.not_to raise_error + expect(message.reload.status).to eq('failed') + end end describe 'request headers' do