From 0175714d65bbc49680a94d428646504cd50ab9a0 Mon Sep 17 00:00:00 2001 From: Vishnu Narayanan Date: Fri, 28 Mar 2025 12:18:39 +0530 Subject: [PATCH] feat: add job to remove stale contacts and contact_inboxes (#11186) - Add a job to remove stale contacts and contact_inboxes across all accounts Stale anonymous contact is defined as - have no identification (email, phone_number, and identifier are NULL) - have no conversations - are older than 30 days --------- Co-authored-by: Pranav Co-authored-by: Shivam Mishra --- .../internal/process_stale_contacts_job.rb | 18 ++++++++ .../internal/remove_stale_contacts_job.rb | 13 ++++++ app/models/contact.rb | 12 ++++++ .../internal/remove_stale_contacts_service.rb | 19 +++++++++ config/schedule.yml | 7 ++++ .../process_stale_contacts_job_spec.rb | 34 +++++++++++++++ .../remove_stale_contacts_job_spec.rb | 20 +++++++++ .../remove_stale_contacts_service_spec.rb | 41 +++++++++++++++++++ 8 files changed, 164 insertions(+) create mode 100644 app/jobs/internal/process_stale_contacts_job.rb create mode 100644 app/jobs/internal/remove_stale_contacts_job.rb create mode 100644 app/services/internal/remove_stale_contacts_service.rb create mode 100644 spec/jobs/internal/process_stale_contacts_job_spec.rb create mode 100644 spec/jobs/internal/remove_stale_contacts_job_spec.rb create mode 100644 spec/services/internal/remove_stale_contacts_service_spec.rb diff --git a/app/jobs/internal/process_stale_contacts_job.rb b/app/jobs/internal/process_stale_contacts_job.rb new file mode 100644 index 000000000..eaf53be75 --- /dev/null +++ b/app/jobs/internal/process_stale_contacts_job.rb @@ -0,0 +1,18 @@ +# housekeeping +# remove stale contacts for all accounts +# - have no identification (email, phone_number, and identifier are NULL) +# - have no conversations +# - are older than 30 days + +class Internal::ProcessStaleContactsJob < ApplicationJob + queue_as :scheduled_jobs + + def perform + Account.find_in_batches(batch_size: 100) do |accounts| + accounts.each do |account| + Rails.logger.info "Enqueuing RemoveStaleContactsJob for account #{account.id}" + Internal::RemoveStaleContactsJob.perform_later(account) + end + end + end +end diff --git a/app/jobs/internal/remove_stale_contacts_job.rb b/app/jobs/internal/remove_stale_contacts_job.rb new file mode 100644 index 000000000..3c33fd245 --- /dev/null +++ b/app/jobs/internal/remove_stale_contacts_job.rb @@ -0,0 +1,13 @@ +# housekeeping +# remove contacts that: +# - have no identification (email, phone_number, and identifier are NULL) +# - have no conversations +# - are older than 30 days + +class Internal::RemoveStaleContactsJob < ApplicationJob + queue_as :low + + def perform(account, batch_size = 1000) + Internal::RemoveStaleContactsService.new(account: account).perform(batch_size) + end +end diff --git a/app/models/contact.rb b/app/models/contact.rb index d9555f5fa..83920d9fc 100644 --- a/app/models/contact.rb +++ b/app/models/contact.rb @@ -128,6 +128,18 @@ class Contact < ApplicationRecord ) } + # Find contacts that: + # 1. Have no identification (email, phone_number, and identifier are NULL or empty string) + # 2. Have no conversations + # 3. Are older than the specified time period + scope :stale_without_conversations, lambda { |time_period| + where('contacts.email IS NULL OR contacts.email = ?', '') + .where('contacts.phone_number IS NULL OR contacts.phone_number = ?', '') + .where('contacts.identifier IS NULL OR contacts.identifier = ?', '') + .where('contacts.created_at < ?', time_period) + .where.missing(:conversations) + } + def get_source_id(inbox_id) contact_inboxes.find_by!(inbox_id: inbox_id).source_id end diff --git a/app/services/internal/remove_stale_contacts_service.rb b/app/services/internal/remove_stale_contacts_service.rb new file mode 100644 index 000000000..74e189068 --- /dev/null +++ b/app/services/internal/remove_stale_contacts_service.rb @@ -0,0 +1,19 @@ +class Internal::RemoveStaleContactsService + pattr_initialize [:account!] + + def perform(batch_size = 1000) + contacts_to_remove = @account.contacts.stale_without_conversations(30.days.ago) + total_deleted = 0 + + Rails.logger.info "[Internal::RemoveStaleContactsService] Starting removal of stale contacts for account #{@account.id}" + + contacts_to_remove.find_in_batches(batch_size: batch_size) do |batch| + contact_ids = batch.map(&:id) + + ContactInbox.where(contact_id: contact_ids).delete_all + Contact.where(id: contact_ids).delete_all + total_deleted += batch.size + Rails.logger.info "[Internal::RemoveStaleContactsService] Deleted #{batch.size} contacts (#{total_deleted} total) for account #{@account.id}" + end + end +end diff --git a/config/schedule.yml b/config/schedule.yml index d8d23172b..f86015de6 100644 --- a/config/schedule.yml +++ b/config/schedule.yml @@ -32,3 +32,10 @@ remove_stale_redis_keys_job.rb: cron: '30 22 * * *' class: 'Internal::RemoveStaleRedisKeysJob' queue: scheduled_jobs + +# executed daily at 2230 UTC +# which is our lowest traffic time +# process_stale_contacts_job: +# cron: '30 22 * * *' +# class: 'Internal::ProcessStaleContactsJob' +# queue: scheduled_jobs diff --git a/spec/jobs/internal/process_stale_contacts_job_spec.rb b/spec/jobs/internal/process_stale_contacts_job_spec.rb new file mode 100644 index 000000000..30648ce9a --- /dev/null +++ b/spec/jobs/internal/process_stale_contacts_job_spec.rb @@ -0,0 +1,34 @@ +require 'rails_helper' + +RSpec.describe Internal::ProcessStaleContactsJob do + subject(:job) { described_class.perform_later } + + it 'enqueues the job' do + expect { job }.to have_enqueued_job(described_class) + .on_queue('scheduled_jobs') + end + + it 'enqueues RemoveStaleContactsJob for each account' do + account1 = create(:account) + account2 = create(:account) + account3 = create(:account) + + expect { described_class.perform_now }.to have_enqueued_job(Internal::RemoveStaleContactsJob) + .with(account1) + .on_queue('low') + expect { described_class.perform_now }.to have_enqueued_job(Internal::RemoveStaleContactsJob) + .with(account2) + .on_queue('low') + expect { described_class.perform_now }.to have_enqueued_job(Internal::RemoveStaleContactsJob) + .with(account3) + .on_queue('low') + end + + it 'processes accounts in batches' do + account = create(:account) + allow(Account).to receive(:find_in_batches).with(batch_size: 100).and_yield([account]) + + expect(Internal::RemoveStaleContactsJob).to receive(:perform_later).with(account) + described_class.perform_now + end +end diff --git a/spec/jobs/internal/remove_stale_contacts_job_spec.rb b/spec/jobs/internal/remove_stale_contacts_job_spec.rb new file mode 100644 index 000000000..ea2636114 --- /dev/null +++ b/spec/jobs/internal/remove_stale_contacts_job_spec.rb @@ -0,0 +1,20 @@ +require 'rails_helper' + +RSpec.describe Internal::RemoveStaleContactsJob do + subject(:job) { described_class.perform_later(account) } + + let(:account) { create(:account) } + + it 'enqueues the job' do + expect { job }.to have_enqueued_job(described_class) + .with(account) + .on_queue('low') + end + + it 'calls the RemoveStaleContactsService' do + service = instance_double(Internal::RemoveStaleContactsService) + expect(Internal::RemoveStaleContactsService).to receive(:new).with(account: account).and_return(service) + expect(service).to receive(:perform) + described_class.perform_now(account) + end +end diff --git a/spec/services/internal/remove_stale_contacts_service_spec.rb b/spec/services/internal/remove_stale_contacts_service_spec.rb new file mode 100644 index 000000000..d00c0e3ca --- /dev/null +++ b/spec/services/internal/remove_stale_contacts_service_spec.rb @@ -0,0 +1,41 @@ +require 'rails_helper' + +RSpec.describe Internal::RemoveStaleContactsService do + describe '#perform' do + let(:account) { create(:account) } + + it 'does not delete contacts with conversations' do + # Contact with NULL values and conversation + contact1 = create(:contact, account: account, email: nil, phone_number: nil, identifier: nil, created_at: 31.days.ago) + create(:conversation, contact: contact1) + + # Contact with empty strings and conversation + contact2 = create(:contact, account: account, email: '', phone_number: '', identifier: '', created_at: 31.days.ago) + create(:conversation, contact: contact2) + + service = described_class.new(account: account) + expect { service.perform }.not_to change(Contact, :count) + end + + it 'does not delete contacts with identification' do + create(:contact, :with_email, account: account, phone_number: '', identifier: nil, created_at: 31.days.ago) + create(:contact, :with_phone_number, account: account, email: nil, identifier: '', created_at: 31.days.ago) + create(:contact, account: account, identifier: 'test123', created_at: 31.days.ago) + + create(:contact, :with_email, account: account, phone_number: '', identifier: nil, created_at: 31.days.ago) + create(:contact, :with_phone_number, account: account, email: nil, identifier: nil, created_at: 31.days.ago) + create(:contact, account: account, email: '', phone_number: nil, identifier: 'test1234', created_at: 31.days.ago) + + service = described_class.new(account: account) + expect { service.perform }.not_to change(Contact, :count) + end + + it 'deletes stale contacts' do + create(:contact, account: account, created_at: 31.days.ago) + create(:contact, account: account, created_at: 1.day.ago) + + service = described_class.new(account: account) + expect { service.perform }.to change(Contact, :count).by(-1) + end + end +end