fix: call authorization_error! on IMAP auth failures (#13560)
## Notion document https://www.notion.so/chatwoot/Email-IMAP-Issue-30aa5f274c928062aa6bddc2e5877a63?showMoveTo=true&saveParent=true ## Description PLAIN IMAP channels (non-OAuth) were silently retrying failed authentication every minute, forever. When credentials are wrong/expired, Net::IMAP::NoResponseError was caught and logged but channel.authorization_error! was never called — so the Redis error counter never incremented, reauthorization_required? was never set, and admins were never notified. OAuth channels already had this handled correctly via the Reauthorizable concern. Additionally, Net::IMAP::ResponseParseError (raised by non-RFC-compliant IMAP servers) was falling through to the StandardError catch-all, flooding Estimated impact before fix: ~70–75 broken IMAP inboxes generating ~700k–750k wasted Sidekiq jobs/week. ## Type of change - [ ] Bug fix (non-breaking change which fixes an issue) ## Checklist: - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented on my code, particularly in hard-to-understand areas - [ ] I have made corresponding changes to the documentation - [ ] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged and published in downstream modules
This commit is contained in:
committed by
GitHub
parent
9ca03c1af3
commit
7acd239c70
@@ -15,6 +15,7 @@ class Inboxes::FetchImapEmailInboxesJob < ApplicationJob
|
||||
return false if inbox.account.suspended?
|
||||
return false unless inbox.channel.imap_enabled
|
||||
return false if inbox.channel.reauthorization_required?
|
||||
return false if inbox.channel.in_backoff?
|
||||
|
||||
return true unless ChatwootApp.chatwoot_cloud?
|
||||
return false if default_plan?(inbox.account)
|
||||
|
||||
@@ -6,26 +6,29 @@ class Inboxes::FetchImapEmailsJob < MutexApplicationJob
|
||||
def perform(channel, interval = 1)
|
||||
return unless should_fetch_email?(channel)
|
||||
|
||||
key = format(::Redis::Alfred::EMAIL_MESSAGE_MUTEX, inbox_id: channel.inbox.id)
|
||||
|
||||
with_lock(key, 5.minutes) do
|
||||
process_email_for_channel(channel, interval)
|
||||
end
|
||||
rescue *ExceptionList::IMAP_EXCEPTIONS => e
|
||||
Rails.logger.error "Authorization error for email channel - #{channel.inbox.id} : #{e.message}"
|
||||
rescue EOFError, OpenSSL::SSL::SSLError, Net::IMAP::NoResponseError, Net::IMAP::BadResponseError, Net::IMAP::InvalidResponseError,
|
||||
Net::IMAP::ResponseParseError, Net::IMAP::ResponseReadError, Net::IMAP::ResponseTooLargeError => e
|
||||
Rails.logger.error "Error for email channel - #{channel.inbox.id} : #{e.message}"
|
||||
rescue LockAcquisitionError
|
||||
Rails.logger.error "Lock failed for #{channel.inbox.id}"
|
||||
fetch_emails_with_backoff(channel, interval)
|
||||
rescue StandardError => e
|
||||
ChatwootExceptionTracker.new(e, account: channel.account).capture_exception
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def fetch_emails_with_backoff(channel, interval)
|
||||
key = format(::Redis::Alfred::EMAIL_MESSAGE_MUTEX, inbox_id: channel.inbox.id)
|
||||
with_lock(key, 5.minutes) { process_email_for_channel(channel, interval) }
|
||||
channel.clear_backoff!
|
||||
rescue Imap::AuthenticationError => e
|
||||
Rails.logger.error "#{channel.backoff_log_identifier} authentication error : #{e.message}"
|
||||
channel.authorization_error!
|
||||
rescue *ExceptionList::IMAP_TRANSIENT_EXCEPTIONS => e
|
||||
Rails.logger.error "#{channel.backoff_log_identifier} transient error : #{e.message}"
|
||||
channel.apply_backoff!
|
||||
rescue LockAcquisitionError
|
||||
Rails.logger.error "Lock failed for #{channel.inbox.id}"
|
||||
end
|
||||
|
||||
def should_fetch_email?(channel)
|
||||
channel.imap_enabled? && !channel.reauthorization_required?
|
||||
channel.imap_enabled? && !channel.reauthorization_required? && !channel.in_backoff?
|
||||
end
|
||||
|
||||
def process_email_for_channel(channel, interval)
|
||||
|
||||
@@ -72,6 +72,10 @@ class Channel::Email < ApplicationRecord
|
||||
imap_enabled && imap_address == 'imap.gmail.com'
|
||||
end
|
||||
|
||||
def backoff_log_identifier
|
||||
"Error for email channel - #{inbox.id}"
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def ensure_forward_to_email
|
||||
|
||||
70
app/models/concerns/backoffable.rb
Normal file
70
app/models/concerns/backoffable.rb
Normal file
@@ -0,0 +1,70 @@
|
||||
# Backoffable provides transient-error retry backoff for models that depend on external services.
|
||||
#
|
||||
# When a transient error occurs (network hiccup, SSL failure, etc.) call apply_backoff!.
|
||||
# The wait time ramps from 1 minute up to BACKOFF_MAX_INTERVAL_MINUTES, then holds at that
|
||||
# ceiling for BACKOFF_MAX_INTERVAL_COUNT more attempts before calling prompt_reauthorization!.
|
||||
#
|
||||
# Call clear_backoff! after a successful operation to reset the counter.
|
||||
|
||||
module Backoffable
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
def backoff_log_identifier
|
||||
inbox_id = respond_to?(:inbox) && inbox&.id
|
||||
inbox_id ? "#{self.class.name} - #{inbox_id}" : "#{self.class.name}##{id}"
|
||||
end
|
||||
|
||||
def backoff_retry_count
|
||||
::Redis::Alfred.get(backoff_retry_count_key).to_i
|
||||
end
|
||||
|
||||
def in_backoff?
|
||||
val = ::Redis::Alfred.get(backoff_retry_after_key)
|
||||
val.present? && Time.zone.at(val.to_f) > Time.current
|
||||
end
|
||||
|
||||
def apply_backoff!
|
||||
new_count = backoff_retry_count + 1
|
||||
max_interval, max_retries = backoff_limits
|
||||
|
||||
if new_count > max_retries
|
||||
exhaust_backoff(new_count)
|
||||
else
|
||||
schedule_backoff_retry(new_count, max_interval, max_retries)
|
||||
end
|
||||
end
|
||||
|
||||
def clear_backoff!
|
||||
::Redis::Alfred.delete(backoff_retry_count_key)
|
||||
::Redis::Alfred.delete(backoff_retry_after_key)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def backoff_limits
|
||||
max_interval = GlobalConfigService.load('BACKOFF_MAX_INTERVAL_MINUTES', 5).to_i
|
||||
max_count = GlobalConfigService.load('BACKOFF_MAX_INTERVAL_COUNT', 10).to_i
|
||||
[max_interval, (max_interval - 1) + max_count]
|
||||
end
|
||||
|
||||
def exhaust_backoff(new_count)
|
||||
Rails.logger.warn "#{backoff_log_identifier} backoff exhausted (#{new_count} failures), prompting reauthorization"
|
||||
clear_backoff!
|
||||
prompt_reauthorization!
|
||||
end
|
||||
|
||||
def schedule_backoff_retry(new_count, max_interval, max_retries)
|
||||
wait_minutes = [new_count, max_interval].min
|
||||
::Redis::Alfred.set(backoff_retry_count_key, new_count.to_s, ex: 24.hours)
|
||||
::Redis::Alfred.set(backoff_retry_after_key, wait_minutes.minutes.from_now.to_f.to_s, ex: 24.hours)
|
||||
Rails.logger.warn "#{backoff_log_identifier} backoff retry #{new_count}/#{max_retries}, next attempt in #{wait_minutes}m"
|
||||
end
|
||||
|
||||
def backoff_retry_count_key
|
||||
format(::Redis::Alfred::BACKOFF_RETRY_COUNT, obj_type: self.class.table_name.singularize, obj_id: id)
|
||||
end
|
||||
|
||||
def backoff_retry_after_key
|
||||
format(::Redis::Alfred::BACKOFF_RETRY_AFTER, obj_type: self.class.table_name.singularize, obj_id: id)
|
||||
end
|
||||
end
|
||||
@@ -13,6 +13,8 @@
|
||||
module Reauthorizable
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
include Backoffable
|
||||
|
||||
AUTHORIZATION_ERROR_THRESHOLD = 2
|
||||
|
||||
# model attribute
|
||||
@@ -65,6 +67,7 @@ module Reauthorizable
|
||||
def reauthorized!
|
||||
::Redis::Alfred.delete(authorization_error_count_key)
|
||||
::Redis::Alfred.delete(reauthorization_required_key)
|
||||
clear_backoff!
|
||||
|
||||
invalidate_inbox_cache unless instance_of?(::AutomationRule)
|
||||
end
|
||||
|
||||
1
app/services/imap/authentication_error.rb
Normal file
1
app/services/imap/authentication_error.rb
Normal file
@@ -0,0 +1 @@
|
||||
class Imap::AuthenticationError < StandardError; end
|
||||
@@ -107,7 +107,11 @@ class Imap::BaseFetchEmailService
|
||||
|
||||
def build_imap_client
|
||||
imap = Net::IMAP.new(channel.imap_address, port: channel.imap_port, ssl: true)
|
||||
imap.authenticate(authentication_type, channel.imap_login, imap_password)
|
||||
begin
|
||||
imap.authenticate(authentication_type, channel.imap_login, imap_password)
|
||||
rescue Net::IMAP::NoResponseError => e
|
||||
raise Imap::AuthenticationError, e.message
|
||||
end
|
||||
imap.select('INBOX')
|
||||
imap
|
||||
end
|
||||
|
||||
@@ -96,6 +96,19 @@
|
||||
locked: false
|
||||
# ------- End of Account Related Config ------- #
|
||||
|
||||
# ------- Transient Error Backoff Config ------- #
|
||||
- name: BACKOFF_MAX_INTERVAL_MINUTES
|
||||
display_title: 'Backoff Max Interval (minutes)'
|
||||
description: 'Maximum wait time in minutes between retry attempts before the backoff plateaus'
|
||||
value: 5
|
||||
locked: false
|
||||
- name: BACKOFF_MAX_INTERVAL_COUNT
|
||||
display_title: 'Backoff Max Retry Count'
|
||||
description: 'Number of additional retries at the maximum interval before prompting reauthorization'
|
||||
value: 10
|
||||
locked: false
|
||||
# ------- End of Transient Error Backoff Config ------- #
|
||||
|
||||
# ------- Email Related Config ------- #
|
||||
- name: MAILER_INBOUND_EMAIL_DOMAIN
|
||||
display_title: 'Inbound Email Domain'
|
||||
|
||||
@@ -16,4 +16,10 @@ module ExceptionList
|
||||
Errno::ECONNRESET, Errno::ENETUNREACH, Net::IMAP::ByeResponseError,
|
||||
SocketError
|
||||
].freeze
|
||||
|
||||
IMAP_TRANSIENT_EXCEPTIONS = (IMAP_EXCEPTIONS + [
|
||||
EOFError, OpenSSL::SSL::SSLError, Net::IMAP::NoResponseError, Net::IMAP::BadResponseError,
|
||||
Net::IMAP::InvalidResponseError, Net::IMAP::ResponseParseError,
|
||||
Net::IMAP::ResponseReadError, Net::IMAP::ResponseTooLargeError
|
||||
]).freeze
|
||||
end
|
||||
|
||||
@@ -52,4 +52,8 @@ module Redis::RedisKeys
|
||||
|
||||
## Account Email Rate Limiting
|
||||
ACCOUNT_OUTBOUND_EMAIL_COUNT_KEY = 'OUTBOUND_EMAIL_COUNT::%<account_id>d::%<date>s'.freeze
|
||||
|
||||
## Transient Error Backoff
|
||||
BACKOFF_RETRY_COUNT = 'BACKOFF:%<obj_type>s:%<obj_id>d:retry_count'.freeze
|
||||
BACKOFF_RETRY_AFTER = 'BACKOFF:%<obj_type>s:%<obj_id>d:retry_after'.freeze
|
||||
end
|
||||
|
||||
@@ -38,6 +38,14 @@ RSpec.describe Inboxes::FetchImapEmailsJob do
|
||||
end
|
||||
end
|
||||
|
||||
context 'when channel is in backoff' do
|
||||
it 'does not fetch emails' do
|
||||
allow(imap_email_channel).to receive(:in_backoff?).and_return(true)
|
||||
expect(Imap::FetchEmailService).not_to receive(:new)
|
||||
described_class.perform_now(imap_email_channel)
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the channel is regular imap' do
|
||||
it 'calls the imap fetch service' do
|
||||
fetch_service = double
|
||||
@@ -56,6 +64,17 @@ RSpec.describe Inboxes::FetchImapEmailsJob do
|
||||
described_class.perform_now(imap_email_channel, 4)
|
||||
expect(fetch_service).to have_received(:perform)
|
||||
end
|
||||
|
||||
it 'clears backoff after successful fetch' do
|
||||
fetch_service = double
|
||||
allow(Imap::FetchEmailService).to receive(:new).and_return(fetch_service)
|
||||
allow(fetch_service).to receive(:perform).and_return([])
|
||||
allow(imap_email_channel).to receive(:clear_backoff!)
|
||||
|
||||
described_class.perform_now(imap_email_channel)
|
||||
|
||||
expect(imap_email_channel).to have_received(:clear_backoff!)
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the channel is Microsoft' do
|
||||
@@ -69,6 +88,37 @@ RSpec.describe Inboxes::FetchImapEmailsJob do
|
||||
end
|
||||
end
|
||||
|
||||
context 'when authentication error is raised' do
|
||||
it 'calls authorization_error! on the channel' do
|
||||
allow(Imap::FetchEmailService).to receive(:new).and_raise(Imap::AuthenticationError)
|
||||
allow(imap_email_channel).to receive(:authorization_error!)
|
||||
|
||||
described_class.perform_now(imap_email_channel)
|
||||
|
||||
expect(imap_email_channel).to have_received(:authorization_error!)
|
||||
end
|
||||
end
|
||||
|
||||
context 'when a transient IMAP error is raised' do
|
||||
it 'calls apply_backoff! on the channel' do
|
||||
allow(Imap::FetchEmailService).to receive(:new).and_raise(EOFError)
|
||||
allow(imap_email_channel).to receive(:apply_backoff!)
|
||||
|
||||
described_class.perform_now(imap_email_channel)
|
||||
|
||||
expect(imap_email_channel).to have_received(:apply_backoff!)
|
||||
end
|
||||
end
|
||||
|
||||
context 'when lock acquisition fails' do
|
||||
it 'does not raise an error' do
|
||||
lock_manager = instance_double(Redis::LockManager, lock: false)
|
||||
allow(Redis::LockManager).to receive(:new).and_return(lock_manager)
|
||||
|
||||
expect { described_class.perform_now(imap_email_channel) }.not_to raise_error
|
||||
end
|
||||
end
|
||||
|
||||
context 'when IMAP OAuth errors out' do
|
||||
it 'marks the connection as requiring authorization' do
|
||||
error_response = double
|
||||
|
||||
@@ -2,12 +2,14 @@
|
||||
|
||||
require 'rails_helper'
|
||||
require Rails.root.join 'spec/models/concerns/reauthorizable_shared.rb'
|
||||
require Rails.root.join 'spec/models/concerns/backoffable_shared.rb'
|
||||
|
||||
RSpec.describe Channel::Email do
|
||||
let(:channel) { create(:channel_email) }
|
||||
|
||||
describe 'concerns' do
|
||||
it_behaves_like 'reauthorizable'
|
||||
it_behaves_like 'backoffable'
|
||||
|
||||
context 'when prompt_reauthorization!' do
|
||||
it 'calls channel notifier mail for email' do
|
||||
|
||||
43
spec/models/concerns/backoffable_shared.rb
Normal file
43
spec/models/concerns/backoffable_shared.rb
Normal file
@@ -0,0 +1,43 @@
|
||||
require 'rails_helper'
|
||||
|
||||
shared_examples_for 'backoffable' do
|
||||
let(:obj) { FactoryBot.create(described_class.to_s.underscore.tr('/', '_').to_sym) }
|
||||
|
||||
before do
|
||||
allow(GlobalConfigService).to receive(:load).with('BACKOFF_MAX_INTERVAL_MINUTES', 5).and_return(2)
|
||||
allow(GlobalConfigService).to receive(:load).with('BACKOFF_MAX_INTERVAL_COUNT', 10).and_return(3)
|
||||
# max_interval=2, max_retries=(2-1)+3=4; exhausts on 5th apply_backoff!
|
||||
end
|
||||
|
||||
it 'starts with no backoff' do
|
||||
expect(obj.in_backoff?).to be false
|
||||
expect(obj.backoff_retry_count).to eq 0
|
||||
end
|
||||
|
||||
it 'ramps backoff on each failure' do
|
||||
obj.apply_backoff!
|
||||
expect(obj.backoff_retry_count).to eq 1
|
||||
expect(obj.in_backoff?).to be true
|
||||
end
|
||||
|
||||
it 'caps wait time at max interval' do
|
||||
4.times { obj.apply_backoff! }
|
||||
expect(obj.backoff_retry_count).to eq 4
|
||||
expect(obj.in_backoff?).to be true
|
||||
end
|
||||
|
||||
it 'exhausts backoff and calls prompt_reauthorization! after max retries' do
|
||||
allow(obj).to receive(:prompt_reauthorization!)
|
||||
5.times { obj.apply_backoff! }
|
||||
expect(obj).to have_received(:prompt_reauthorization!)
|
||||
expect(obj.backoff_retry_count).to eq 0
|
||||
expect(obj.in_backoff?).to be false
|
||||
end
|
||||
|
||||
it 'clear_backoff! resets retry count and backoff window' do
|
||||
obj.apply_backoff!
|
||||
obj.clear_backoff!
|
||||
expect(obj.in_backoff?).to be false
|
||||
expect(obj.backoff_retry_count).to eq 0
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user