fix(agent-bot): stabilize webhook delivery for transient upstream failures (#13521)

This fixes the agent-bot webhook delivery path so transient upstream
failures follow the expected delivery lifecycle. Existing fallback
behavior is preserved, and fallback actions are applied only after
delivery attempts are exhausted.

To reproduce, configure an agent-bot webhook endpoint to return 429/500
for message events. Before this fix, failure handling could be applied
too early; after this fix, delivery attempts complete first and then
existing fallback handling runs.

Tested with:
- bundle exec rspec spec/jobs/agent_bots/webhook_job_spec.rb
spec/lib/webhooks/trigger_spec.rb
- bundle exec rubocop spec/jobs/agent_bots/webhook_job_spec.rb
spec/lib/webhooks/trigger_spec.rb

---------

Co-authored-by: Muhsin Keloth <muhsinkeramam@gmail.com>
This commit is contained in:
Sojan Jose
2026-03-02 02:18:29 -08:00
committed by GitHub
parent 8d48e05283
commit ab93821d2b
4 changed files with 96 additions and 2 deletions

View File

@@ -1,7 +1,14 @@
class AgentBots::WebhookJob < WebhookJob
queue_as :high
retry_on RestClient::TooManyRequests, RestClient::InternalServerError, wait: 3.seconds, attempts: 3 do |job, error|
url, payload, webhook_type = job.arguments
Webhooks::Trigger.new(url, payload, webhook_type || :agent_bot_webhook).handle_failure(error)
end
def perform(url, payload, webhook_type = :agent_bot_webhook)
super(url, payload, webhook_type)
rescue RestClient::TooManyRequests, RestClient::InternalServerError => e
Rails.logger.warn("[AgentBots::WebhookJob] attempt #{executions} failed #{e.class.name}")
raise
end
end

View File

@@ -15,9 +15,17 @@ class Webhooks::Trigger
def execute
perform_request
rescue RestClient::TooManyRequests, RestClient::InternalServerError => e
raise if @webhook_type == :agent_bot_webhook
handle_failure(e)
rescue StandardError => e
handle_error(e)
Rails.logger.warn "Exception: Invalid webhook URL #{@url} : #{e.message}"
handle_failure(e)
end
def handle_failure(error)
handle_error(error)
Rails.logger.warn "Exception: Invalid webhook URL #{@url} : #{error.message}"
end
private

View File

@@ -8,6 +8,16 @@ RSpec.describe AgentBots::WebhookJob do
let(:url) { 'https://test.com' }
let(:payload) { { name: 'test' } }
let(:webhook_type) { :agent_bot_webhook }
let(:retryable_error) { RestClient::InternalServerError.new(nil, 500) }
before do
ActiveJob::Base.queue_adapter = :test
end
after do
clear_enqueued_jobs
clear_performed_jobs
end
it 'queues the job' do
expect { job }.to have_enqueued_job(described_class)
@@ -19,4 +29,23 @@ RSpec.describe AgentBots::WebhookJob do
expect(Webhooks::Trigger).to receive(:execute).with(url, payload, webhook_type, secret: nil, delivery_id: nil)
perform_enqueued_jobs { job }
end
it 'configures retry handlers for 429 and 500 errors' do
handlers = described_class.rescue_handlers.map(&:first)
expect(handlers).to include('RestClient::TooManyRequests', 'RestClient::InternalServerError')
end
it 'retries 3 times and handles failure after retries are exhausted' do
allow(Webhooks::Trigger).to receive(:execute).and_raise(retryable_error)
trigger_instance = instance_double(Webhooks::Trigger, handle_failure: true)
allow(Webhooks::Trigger).to receive(:new).and_return(trigger_instance)
allow(Rails.logger).to receive(:warn)
expect(Webhooks::Trigger).to receive(:execute).exactly(3).times
expect(trigger_instance).to receive(:handle_failure).with(instance_of(RestClient::InternalServerError)).once
expect(Rails.logger).to receive(:warn).with(/AgentBots::WebhookJob/).exactly(3).times
perform_enqueued_jobs { job }
end
end

View File

@@ -77,6 +77,40 @@ describe Webhooks::Trigger do
let!(:pending_conversation) { create(:conversation, inbox: inbox, status: :pending, account: account) }
let!(:pending_message) { create(:message, account: account, inbox: inbox, conversation: pending_conversation) }
it 'raises 500 errors for retry and does not reopen conversation immediately' do
payload = { event: 'message_created', id: pending_message.id }
expect(RestClient::Request).to receive(:execute)
.with(
method: :post,
url: url,
payload: payload.to_json,
headers: { content_type: :json, accept: :json },
timeout: webhook_timeout
).and_raise(RestClient::InternalServerError.new(nil, 500)).once
expect { trigger.execute(url, payload, webhook_type) }.to raise_error(RestClient::InternalServerError)
expect(pending_conversation.reload.status).to eq('pending')
expect(Conversations::ActivityMessageJob).not_to have_been_enqueued
end
it 'raises 429 errors for retry and does not reopen conversation immediately' do
payload = { event: 'message_created', id: pending_message.id }
expect(RestClient::Request).to receive(:execute)
.with(
method: :post,
url: url,
payload: payload.to_json,
headers: { content_type: :json, accept: :json },
timeout: webhook_timeout
).and_raise(RestClient::TooManyRequests.new(nil, 429)).once
expect { trigger.execute(url, payload, webhook_type) }.to raise_error(RestClient::TooManyRequests)
expect(pending_conversation.reload.status).to eq('pending')
expect(Conversations::ActivityMessageJob).not_to have_been_enqueued
end
it 'reopens conversation and enqueues activity message if pending' do
payload = { event: 'message_created', id: pending_message.id }
@@ -166,6 +200,22 @@ describe Webhooks::Trigger do
expect(activity_message.content).to eq(agent_bot_error_content)
end
end
it 'handles 500 without raising for non-agent webhooks' do
payload = { event: 'message_created', conversation: { id: conversation.id }, id: message.id }
expect(RestClient::Request).to receive(:execute)
.with(
method: :post,
url: url,
payload: payload.to_json,
headers: { content_type: :json, accept: :json },
timeout: webhook_timeout
).and_raise(RestClient::InternalServerError.new(nil, 500)).once
expect { trigger.execute(url, payload, webhook_type) }.not_to raise_error
expect(message.reload.status).to eq('failed')
end
end
describe 'request headers' do