diff --git a/app/jobs/inboxes/fetch_imap_email_inboxes_job.rb b/app/jobs/inboxes/fetch_imap_email_inboxes_job.rb index ea2705955..ea7f7664f 100644 --- a/app/jobs/inboxes/fetch_imap_email_inboxes_job.rb +++ b/app/jobs/inboxes/fetch_imap_email_inboxes_job.rb @@ -15,6 +15,7 @@ class Inboxes::FetchImapEmailInboxesJob < ApplicationJob return false if inbox.account.suspended? return false unless inbox.channel.imap_enabled return false if inbox.channel.reauthorization_required? + return false if inbox.channel.in_backoff? return true unless ChatwootApp.chatwoot_cloud? return false if default_plan?(inbox.account) diff --git a/app/jobs/inboxes/fetch_imap_emails_job.rb b/app/jobs/inboxes/fetch_imap_emails_job.rb index ec5717f3b..74368c830 100644 --- a/app/jobs/inboxes/fetch_imap_emails_job.rb +++ b/app/jobs/inboxes/fetch_imap_emails_job.rb @@ -6,26 +6,29 @@ class Inboxes::FetchImapEmailsJob < MutexApplicationJob def perform(channel, interval = 1) return unless should_fetch_email?(channel) - key = format(::Redis::Alfred::EMAIL_MESSAGE_MUTEX, inbox_id: channel.inbox.id) - - with_lock(key, 5.minutes) do - process_email_for_channel(channel, interval) - end - rescue *ExceptionList::IMAP_EXCEPTIONS => e - Rails.logger.error "Authorization error for email channel - #{channel.inbox.id} : #{e.message}" - rescue EOFError, OpenSSL::SSL::SSLError, Net::IMAP::NoResponseError, Net::IMAP::BadResponseError, Net::IMAP::InvalidResponseError, - Net::IMAP::ResponseParseError, Net::IMAP::ResponseReadError, Net::IMAP::ResponseTooLargeError => e - Rails.logger.error "Error for email channel - #{channel.inbox.id} : #{e.message}" - rescue LockAcquisitionError - Rails.logger.error "Lock failed for #{channel.inbox.id}" + fetch_emails_with_backoff(channel, interval) rescue StandardError => e ChatwootExceptionTracker.new(e, account: channel.account).capture_exception end private + def fetch_emails_with_backoff(channel, interval) + key = format(::Redis::Alfred::EMAIL_MESSAGE_MUTEX, inbox_id: channel.inbox.id) + with_lock(key, 5.minutes) { process_email_for_channel(channel, interval) } + channel.clear_backoff! + rescue Imap::AuthenticationError => e + Rails.logger.error "#{channel.backoff_log_identifier} authentication error : #{e.message}" + channel.authorization_error! + rescue *ExceptionList::IMAP_TRANSIENT_EXCEPTIONS => e + Rails.logger.error "#{channel.backoff_log_identifier} transient error : #{e.message}" + channel.apply_backoff! + rescue LockAcquisitionError + Rails.logger.error "Lock failed for #{channel.inbox.id}" + end + def should_fetch_email?(channel) - channel.imap_enabled? && !channel.reauthorization_required? + channel.imap_enabled? && !channel.reauthorization_required? && !channel.in_backoff? end def process_email_for_channel(channel, interval) diff --git a/app/models/channel/email.rb b/app/models/channel/email.rb index b1124dd75..1091b17d8 100644 --- a/app/models/channel/email.rb +++ b/app/models/channel/email.rb @@ -72,6 +72,10 @@ class Channel::Email < ApplicationRecord imap_enabled && imap_address == 'imap.gmail.com' end + def backoff_log_identifier + "Error for email channel - #{inbox.id}" + end + private def ensure_forward_to_email diff --git a/app/models/concerns/backoffable.rb b/app/models/concerns/backoffable.rb new file mode 100644 index 000000000..60b6ac98c --- /dev/null +++ b/app/models/concerns/backoffable.rb @@ -0,0 +1,70 @@ +# Backoffable provides transient-error retry backoff for models that depend on external services. +# +# When a transient error occurs (network hiccup, SSL failure, etc.) call apply_backoff!. +# The wait time ramps from 1 minute up to BACKOFF_MAX_INTERVAL_MINUTES, then holds at that +# ceiling for BACKOFF_MAX_INTERVAL_COUNT more attempts before calling prompt_reauthorization!. +# +# Call clear_backoff! after a successful operation to reset the counter. + +module Backoffable + extend ActiveSupport::Concern + + def backoff_log_identifier + inbox_id = respond_to?(:inbox) && inbox&.id + inbox_id ? "#{self.class.name} - #{inbox_id}" : "#{self.class.name}##{id}" + end + + def backoff_retry_count + ::Redis::Alfred.get(backoff_retry_count_key).to_i + end + + def in_backoff? + val = ::Redis::Alfred.get(backoff_retry_after_key) + val.present? && Time.zone.at(val.to_f) > Time.current + end + + def apply_backoff! + new_count = backoff_retry_count + 1 + max_interval, max_retries = backoff_limits + + if new_count > max_retries + exhaust_backoff(new_count) + else + schedule_backoff_retry(new_count, max_interval, max_retries) + end + end + + def clear_backoff! + ::Redis::Alfred.delete(backoff_retry_count_key) + ::Redis::Alfred.delete(backoff_retry_after_key) + end + + private + + def backoff_limits + max_interval = GlobalConfigService.load('BACKOFF_MAX_INTERVAL_MINUTES', 5).to_i + max_count = GlobalConfigService.load('BACKOFF_MAX_INTERVAL_COUNT', 10).to_i + [max_interval, (max_interval - 1) + max_count] + end + + def exhaust_backoff(new_count) + Rails.logger.warn "#{backoff_log_identifier} backoff exhausted (#{new_count} failures), prompting reauthorization" + clear_backoff! + prompt_reauthorization! + end + + def schedule_backoff_retry(new_count, max_interval, max_retries) + wait_minutes = [new_count, max_interval].min + ::Redis::Alfred.set(backoff_retry_count_key, new_count.to_s, ex: 24.hours) + ::Redis::Alfred.set(backoff_retry_after_key, wait_minutes.minutes.from_now.to_f.to_s, ex: 24.hours) + Rails.logger.warn "#{backoff_log_identifier} backoff retry #{new_count}/#{max_retries}, next attempt in #{wait_minutes}m" + end + + def backoff_retry_count_key + format(::Redis::Alfred::BACKOFF_RETRY_COUNT, obj_type: self.class.table_name.singularize, obj_id: id) + end + + def backoff_retry_after_key + format(::Redis::Alfred::BACKOFF_RETRY_AFTER, obj_type: self.class.table_name.singularize, obj_id: id) + end +end diff --git a/app/models/concerns/reauthorizable.rb b/app/models/concerns/reauthorizable.rb index 7a09f6436..b25f5525e 100644 --- a/app/models/concerns/reauthorizable.rb +++ b/app/models/concerns/reauthorizable.rb @@ -13,6 +13,8 @@ module Reauthorizable extend ActiveSupport::Concern + include Backoffable + AUTHORIZATION_ERROR_THRESHOLD = 2 # model attribute @@ -65,6 +67,7 @@ module Reauthorizable def reauthorized! ::Redis::Alfred.delete(authorization_error_count_key) ::Redis::Alfred.delete(reauthorization_required_key) + clear_backoff! invalidate_inbox_cache unless instance_of?(::AutomationRule) end diff --git a/app/services/imap/authentication_error.rb b/app/services/imap/authentication_error.rb new file mode 100644 index 000000000..5698f5b3b --- /dev/null +++ b/app/services/imap/authentication_error.rb @@ -0,0 +1 @@ +class Imap::AuthenticationError < StandardError; end diff --git a/app/services/imap/base_fetch_email_service.rb b/app/services/imap/base_fetch_email_service.rb index 09332092c..49e06491b 100644 --- a/app/services/imap/base_fetch_email_service.rb +++ b/app/services/imap/base_fetch_email_service.rb @@ -107,7 +107,11 @@ class Imap::BaseFetchEmailService def build_imap_client imap = Net::IMAP.new(channel.imap_address, port: channel.imap_port, ssl: true) - imap.authenticate(authentication_type, channel.imap_login, imap_password) + begin + imap.authenticate(authentication_type, channel.imap_login, imap_password) + rescue Net::IMAP::NoResponseError => e + raise Imap::AuthenticationError, e.message + end imap.select('INBOX') imap end diff --git a/config/installation_config.yml b/config/installation_config.yml index 34cb736bf..0d22de6fc 100644 --- a/config/installation_config.yml +++ b/config/installation_config.yml @@ -96,6 +96,19 @@ locked: false # ------- End of Account Related Config ------- # +# ------- Transient Error Backoff Config ------- # +- name: BACKOFF_MAX_INTERVAL_MINUTES + display_title: 'Backoff Max Interval (minutes)' + description: 'Maximum wait time in minutes between retry attempts before the backoff plateaus' + value: 5 + locked: false +- name: BACKOFF_MAX_INTERVAL_COUNT + display_title: 'Backoff Max Retry Count' + description: 'Number of additional retries at the maximum interval before prompting reauthorization' + value: 10 + locked: false +# ------- End of Transient Error Backoff Config ------- # + # ------- Email Related Config ------- # - name: MAILER_INBOUND_EMAIL_DOMAIN display_title: 'Inbound Email Domain' diff --git a/lib/exception_list.rb b/lib/exception_list.rb index 2fee0a170..dd468f07b 100644 --- a/lib/exception_list.rb +++ b/lib/exception_list.rb @@ -16,4 +16,10 @@ module ExceptionList Errno::ECONNRESET, Errno::ENETUNREACH, Net::IMAP::ByeResponseError, SocketError ].freeze + + IMAP_TRANSIENT_EXCEPTIONS = (IMAP_EXCEPTIONS + [ + EOFError, OpenSSL::SSL::SSLError, Net::IMAP::NoResponseError, Net::IMAP::BadResponseError, + Net::IMAP::InvalidResponseError, Net::IMAP::ResponseParseError, + Net::IMAP::ResponseReadError, Net::IMAP::ResponseTooLargeError + ]).freeze end diff --git a/lib/redis/redis_keys.rb b/lib/redis/redis_keys.rb index 8c9361ab5..fa9eeb31e 100644 --- a/lib/redis/redis_keys.rb +++ b/lib/redis/redis_keys.rb @@ -52,4 +52,8 @@ module Redis::RedisKeys ## Account Email Rate Limiting ACCOUNT_OUTBOUND_EMAIL_COUNT_KEY = 'OUTBOUND_EMAIL_COUNT::%d::%s'.freeze + + ## Transient Error Backoff + BACKOFF_RETRY_COUNT = 'BACKOFF:%s:%d:retry_count'.freeze + BACKOFF_RETRY_AFTER = 'BACKOFF:%s:%d:retry_after'.freeze end diff --git a/spec/jobs/inboxes/fetch_imap_emails_job_spec.rb b/spec/jobs/inboxes/fetch_imap_emails_job_spec.rb index da4b95b15..1abd7450b 100644 --- a/spec/jobs/inboxes/fetch_imap_emails_job_spec.rb +++ b/spec/jobs/inboxes/fetch_imap_emails_job_spec.rb @@ -38,6 +38,14 @@ RSpec.describe Inboxes::FetchImapEmailsJob do end end + context 'when channel is in backoff' do + it 'does not fetch emails' do + allow(imap_email_channel).to receive(:in_backoff?).and_return(true) + expect(Imap::FetchEmailService).not_to receive(:new) + described_class.perform_now(imap_email_channel) + end + end + context 'when the channel is regular imap' do it 'calls the imap fetch service' do fetch_service = double @@ -56,6 +64,17 @@ RSpec.describe Inboxes::FetchImapEmailsJob do described_class.perform_now(imap_email_channel, 4) expect(fetch_service).to have_received(:perform) end + + it 'clears backoff after successful fetch' do + fetch_service = double + allow(Imap::FetchEmailService).to receive(:new).and_return(fetch_service) + allow(fetch_service).to receive(:perform).and_return([]) + allow(imap_email_channel).to receive(:clear_backoff!) + + described_class.perform_now(imap_email_channel) + + expect(imap_email_channel).to have_received(:clear_backoff!) + end end context 'when the channel is Microsoft' do @@ -69,6 +88,37 @@ RSpec.describe Inboxes::FetchImapEmailsJob do end end + context 'when authentication error is raised' do + it 'calls authorization_error! on the channel' do + allow(Imap::FetchEmailService).to receive(:new).and_raise(Imap::AuthenticationError) + allow(imap_email_channel).to receive(:authorization_error!) + + described_class.perform_now(imap_email_channel) + + expect(imap_email_channel).to have_received(:authorization_error!) + end + end + + context 'when a transient IMAP error is raised' do + it 'calls apply_backoff! on the channel' do + allow(Imap::FetchEmailService).to receive(:new).and_raise(EOFError) + allow(imap_email_channel).to receive(:apply_backoff!) + + described_class.perform_now(imap_email_channel) + + expect(imap_email_channel).to have_received(:apply_backoff!) + end + end + + context 'when lock acquisition fails' do + it 'does not raise an error' do + lock_manager = instance_double(Redis::LockManager, lock: false) + allow(Redis::LockManager).to receive(:new).and_return(lock_manager) + + expect { described_class.perform_now(imap_email_channel) }.not_to raise_error + end + end + context 'when IMAP OAuth errors out' do it 'marks the connection as requiring authorization' do error_response = double diff --git a/spec/models/channel/email_spec.rb b/spec/models/channel/email_spec.rb index 939e8e10a..bdb9f859b 100644 --- a/spec/models/channel/email_spec.rb +++ b/spec/models/channel/email_spec.rb @@ -2,12 +2,14 @@ require 'rails_helper' require Rails.root.join 'spec/models/concerns/reauthorizable_shared.rb' +require Rails.root.join 'spec/models/concerns/backoffable_shared.rb' RSpec.describe Channel::Email do let(:channel) { create(:channel_email) } describe 'concerns' do it_behaves_like 'reauthorizable' + it_behaves_like 'backoffable' context 'when prompt_reauthorization!' do it 'calls channel notifier mail for email' do diff --git a/spec/models/concerns/backoffable_shared.rb b/spec/models/concerns/backoffable_shared.rb new file mode 100644 index 000000000..385f93be7 --- /dev/null +++ b/spec/models/concerns/backoffable_shared.rb @@ -0,0 +1,43 @@ +require 'rails_helper' + +shared_examples_for 'backoffable' do + let(:obj) { FactoryBot.create(described_class.to_s.underscore.tr('/', '_').to_sym) } + + before do + allow(GlobalConfigService).to receive(:load).with('BACKOFF_MAX_INTERVAL_MINUTES', 5).and_return(2) + allow(GlobalConfigService).to receive(:load).with('BACKOFF_MAX_INTERVAL_COUNT', 10).and_return(3) + # max_interval=2, max_retries=(2-1)+3=4; exhausts on 5th apply_backoff! + end + + it 'starts with no backoff' do + expect(obj.in_backoff?).to be false + expect(obj.backoff_retry_count).to eq 0 + end + + it 'ramps backoff on each failure' do + obj.apply_backoff! + expect(obj.backoff_retry_count).to eq 1 + expect(obj.in_backoff?).to be true + end + + it 'caps wait time at max interval' do + 4.times { obj.apply_backoff! } + expect(obj.backoff_retry_count).to eq 4 + expect(obj.in_backoff?).to be true + end + + it 'exhausts backoff and calls prompt_reauthorization! after max retries' do + allow(obj).to receive(:prompt_reauthorization!) + 5.times { obj.apply_backoff! } + expect(obj).to have_received(:prompt_reauthorization!) + expect(obj.backoff_retry_count).to eq 0 + expect(obj.in_backoff?).to be false + end + + it 'clear_backoff! resets retry count and backoff window' do + obj.apply_backoff! + obj.clear_backoff! + expect(obj.in_backoff?).to be false + expect(obj.backoff_retry_count).to eq 0 + end +end