diff --git a/Gemfile b/Gemfile index 18442e3b0..abbd3332f 100644 --- a/Gemfile +++ b/Gemfile @@ -21,6 +21,7 @@ gem 'telephone_number' gem 'time_diff' gem 'tzinfo-data' gem 'valid_email2' +gem 'email-provider-info' # compress javascript config.assets.js_compressor gem 'uglifier' ##-- used for single column multiple binary flags in notification settings/feature flagging --## diff --git a/Gemfile.lock b/Gemfile.lock index 2f4da34e3..99e75b33c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -270,6 +270,7 @@ GEM concurrent-ruby (~> 1.0) http (>= 3.0) ruby2_keywords + email-provider-info (0.0.1) email_reply_trimmer (0.1.13) erubi (1.13.0) et-orbi (1.2.11) @@ -1016,6 +1017,7 @@ DEPENDENCIES dotenv-rails (>= 3.0.0) down elastic-apm + email-provider-info email_reply_trimmer facebook-messenger factory_bot_rails (>= 6.4.3) diff --git a/db/migrate/20251021082242_add_unique_index_to_companies_domain.rb b/db/migrate/20251021082242_add_unique_index_to_companies_domain.rb new file mode 100644 index 000000000..2fb387242 --- /dev/null +++ b/db/migrate/20251021082242_add_unique_index_to_companies_domain.rb @@ -0,0 +1,16 @@ +class AddUniqueIndexToCompaniesDomain < ActiveRecord::Migration[7.1] + def up + remove_index :companies, name: 'index_companies_on_domain_and_account_id', if_exists: true + + add_index :companies, [:account_id, :domain], + unique: true, + name: 'index_companies_on_account_and_domain', + where: 'domain IS NOT NULL' + end + + def down + remove_index :companies, name: 'index_companies_on_account_and_domain', if_exists: true + add_index :companies, [:domain, :account_id], + name: 'index_companies_on_domain_and_account_id' + end +end diff --git a/db/schema.rb b/db/schema.rb index 022a0101e..45165d5c5 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -577,8 +577,8 @@ ActiveRecord::Schema[7.1].define(version: 2025_10_22_152158) do t.bigint "account_id", null: false t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.index ["account_id", "domain"], name: "index_companies_on_account_and_domain", unique: true, where: "(domain IS NOT NULL)" t.index ["account_id"], name: "index_companies_on_account_id" - t.index ["domain", "account_id"], name: "index_companies_on_domain_and_account_id" t.index ["name", "account_id"], name: "index_companies_on_name_and_account_id" end diff --git a/enterprise/app/jobs/migration/company_account_batch_job.rb b/enterprise/app/jobs/migration/company_account_batch_job.rb new file mode 100644 index 000000000..2e6c20f19 --- /dev/null +++ b/enterprise/app/jobs/migration/company_account_batch_job.rb @@ -0,0 +1,54 @@ +class Migration::CompanyAccountBatchJob < ApplicationJob + queue_as :low + + def perform(account) + account.contacts + .where.not(email: nil) + .find_in_batches(batch_size: 1000) do |contact_batch| + process_contact_batch(contact_batch, account) + end + end + + private + + def process_contact_batch(contacts, account) + contacts.each do |contact| + next unless should_process?(contact) + + company = find_or_create_company(contact, account) + # rubocop:disable Rails/SkipsModelValidations + contact.update_column(:company_id, company.id) if company + # rubocop:enable Rails/SkipsModelValidations + end + end + + def should_process?(contact) + return false if contact.company_id.present? + return false if contact.email.blank? + + Companies::BusinessEmailDetectorService.new(contact.email).perform + end + + def find_or_create_company(contact, account) + domain = extract_domain(contact.email) + company_name = derive_company_name(contact, domain) + + Company.find_or_create_by!(account: account, domain: domain) do |company| + company.name = company_name + end + rescue ActiveRecord::RecordNotUnique + # Race condition: Another job created it between our check and create + # just find the one that was created + + Company.find_by(account: account, domain: domain) + end + + def extract_domain(email) + email.split('@').last&.downcase + end + + def derive_company_name(contact, domain) + contact.additional_attributes&.dig('company_name').presence || + domain.split('.').first.tr('-_', ' ').titleize + end +end diff --git a/enterprise/app/jobs/migration/company_backfill_job.rb b/enterprise/app/jobs/migration/company_backfill_job.rb new file mode 100644 index 000000000..db9a3370f --- /dev/null +++ b/enterprise/app/jobs/migration/company_backfill_job.rb @@ -0,0 +1,17 @@ +class Migration::CompanyBackfillJob < ApplicationJob + queue_as :low + + def perform + Rails.logger.info 'Starting company backfill migration...' + account_count = 0 + Account.find_in_batches(batch_size: 100) do |accounts| + accounts.each do |account| + Rails.logger.info "Enqueuing company backfill for account #{account.id}" + Migration::CompanyAccountBatchJob.perform_later(account) + account_count += 1 + end + end + + Rails.logger.info "Company backfill migration complete. Enqueued jobs for #{account_count} accounts." + end +end diff --git a/enterprise/app/models/company.rb b/enterprise/app/models/company.rb index 764cb2a9c..fde6cb122 100644 --- a/enterprise/app/models/company.rb +++ b/enterprise/app/models/company.rb @@ -12,9 +12,9 @@ # # Indexes # -# index_companies_on_account_id (account_id) -# index_companies_on_domain_and_account_id (domain,account_id) -# index_companies_on_name_and_account_id (name,account_id) +# index_companies_on_account_and_domain (account_id,domain) UNIQUE WHERE (domain IS NOT NULL) +# index_companies_on_account_id (account_id) +# index_companies_on_name_and_account_id (name,account_id) # class Company < ApplicationRecord include Avatarable @@ -24,6 +24,7 @@ class Company < ApplicationRecord with: /\A[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?)+\z/, message: I18n.t('errors.companies.domain.invalid') } + validates :domain, uniqueness: { scope: :account_id }, if: -> { domain.present? } validates :description, length: { maximum: Limits::COMPANY_DESCRIPTION_LENGTH_LIMIT } belongs_to :account diff --git a/enterprise/app/services/companies/business_email_detector_service.rb b/enterprise/app/services/companies/business_email_detector_service.rb new file mode 100644 index 000000000..422ef41c8 --- /dev/null +++ b/enterprise/app/services/companies/business_email_detector_service.rb @@ -0,0 +1,19 @@ +class Companies::BusinessEmailDetectorService + attr_reader :email + + def initialize(email) + @email = email + end + + def perform + return false if email.blank? + + address = ValidEmail2::Address.new(email) + return false unless address.valid? + return false if address.disposable_domain? + + provider = EmailProviderInfo.call(email) + + provider.nil? + end +end diff --git a/lib/tasks/companies.rake b/lib/tasks/companies.rake new file mode 100644 index 000000000..11fb5dc10 --- /dev/null +++ b/lib/tasks/companies.rake @@ -0,0 +1,12 @@ +namespace :companies do + desc 'Backfill companies from existing contact email domains' + task backfill: :environment do + puts 'Starting company backfill migration...' + puts 'This will process all accounts and create companies from contact email domains.' + puts 'The job will run in the background via Sidekiq' + puts '' + Migration::CompanyBackfillJob.perform_later + puts 'Company backfill job has been enqueued.' + puts 'Monitor progress in logs or Sidekiq dashboard.' + end +end diff --git a/spec/enterprise/jobs/migration/company_account_batch_job_spec.rb b/spec/enterprise/jobs/migration/company_account_batch_job_spec.rb new file mode 100644 index 000000000..9a3e88c9d --- /dev/null +++ b/spec/enterprise/jobs/migration/company_account_batch_job_spec.rb @@ -0,0 +1,120 @@ +require 'rails_helper' + +RSpec.describe Migration::CompanyAccountBatchJob, type: :job do + let(:account) { create(:account) } + + describe '#perform' do + before do + # Stub EmailProvideInfo to control behavior in tests + allow(EmailProviderInfo).to receive(:call) do |email| + domain = email.split('@').last&.downcase + case domain + when 'gmail.com', 'yahoo.com', 'hotmail.com', 'uol.com.br' + 'free_provider' # generic free provider name + end + end + end + + context 'when contact has business email' do + let!(:contact) { create(:contact, account: account, email: 'user@acme.com') } + + it 'creates a company and associates the contact' do + expect do + described_class.perform_now(account) + end.to change(Company, :count).by(1) + contact.reload + expect(contact.company).to be_present + expect(contact.company.domain).to eq('acme.com') + expect(contact.company.name).to eq('Acme') + end + end + + context 'when contact has free email' do + let!(:contact) { create(:contact, account: account, email: 'user@gmail.com') } + + it 'does not create a company' do + expect do + described_class.perform_now(account) + end.not_to change(Company, :count) + contact.reload + expect(contact.company_id).to be_nil + end + end + + context 'when contact has company_name in additional_attributes' do + let!(:contact) do + create(:contact, account: account, email: 'user@acme.com', additional_attributes: { 'company_name' => 'Acme Corporation' }) + end + + it 'uses the saved company name' do + described_class.perform_now(account) + contact.reload + expect(contact.company.name).to eq('Acme Corporation') + end + end + + context 'when contact already has a company' do + let!(:existing_company) { create(:company, account: account, domain: 'existing.com') } + let!(:contact) do + create(:contact, account: account, email: 'user@acme.com', company: existing_company) + end + + it 'does not change the existing company' do + described_class.perform_now(account) + contact.reload + expect(contact.company_id).to eq(existing_company.id) + end + end + + context 'when multiple contacts have the same domain' do + let!(:contact1) { create(:contact, account: account, email: 'user1@acme.com') } + let!(:contact2) { create(:contact, account: account, email: 'user2@acme.com') } + + it 'creates only one company for the domain' do + expect do + described_class.perform_now(account) + end.to change(Company, :count).by(1) + contact1.reload + contact2.reload + expect(contact1.company_id).to eq(contact2.company_id) + expect(contact1.company.domain).to eq('acme.com') + end + end + + context 'when contact has no email' do + let!(:contact) { create(:contact, account: account, email: nil) } + + it 'skips the contact' do + expect do + described_class.perform_now(account) + end.not_to change(Company, :count) + contact.reload + expect(contact.company_id).to be_nil + end + end + + context 'when processing large batch' do + before do + contacts_data = Array.new(2000) do |i| + { + account_id: account.id, + email: "user#{i}@company#{i % 100}.com", + name: "User #{i}", + created_at: Time.current, + updated_at: Time.current + } + end + # rubocop:disable Rails/SkipsModelValidations + Contact.insert_all(contacts_data) + # rubocop:enable Rails/SkipsModelValidations + end + + it 'processes all contacts in batches' do + expect do + described_class.perform_now(account) + end.to change(Company, :count).by(100) + expect(account.contacts.where.not(company_id: nil).count).to eq(2000) + end + end + end +end diff --git a/spec/enterprise/jobs/migration/company_backfill_job_spec.rb b/spec/enterprise/jobs/migration/company_backfill_job_spec.rb new file mode 100644 index 000000000..e60b84487 --- /dev/null +++ b/spec/enterprise/jobs/migration/company_backfill_job_spec.rb @@ -0,0 +1,31 @@ +require 'rails_helper' + +RSpec.describe Migration::CompanyBackfillJob, type: :job do + describe '#perform' do + it 'enqueues the job' do + expect { described_class.perform_later } + .to have_enqueued_job(described_class) + .on_queue('low') + end + + context 'when accounts exist' do + let!(:account1) { create(:account) } + let!(:account2) { create(:account) } + + it 'enqueues CompanyAccountBatchJob for each account' do + expect do + described_class.perform_now + end.to have_enqueued_job(Migration::CompanyAccountBatchJob) + .with(account1) + .and have_enqueued_job(Migration::CompanyAccountBatchJob) + .with(account2) + end + end + + context 'when no accounts exist' do + it 'completes without error' do + expect { described_class.perform_now }.not_to raise_error + end + end + end +end diff --git a/spec/enterprise/services/companies/business_email_detector_service_spec.rb b/spec/enterprise/services/companies/business_email_detector_service_spec.rb new file mode 100644 index 000000000..ceabfa905 --- /dev/null +++ b/spec/enterprise/services/companies/business_email_detector_service_spec.rb @@ -0,0 +1,99 @@ +require 'rails_helper' + +RSpec.describe Companies::BusinessEmailDetectorService, type: :service do + let(:service) { described_class.new(email) } + + describe '#perform' do + context 'when email is from a business domain' do + let(:email) { 'user@acme.com' } + let(:valid_email_address) { instance_double(ValidEmail2::Address, valid?: true, disposable_domain?: false) } + + before do + allow(ValidEmail2::Address).to receive(:new).with(email).and_return(valid_email_address) + allow(EmailProviderInfo).to receive(:call).with(email).and_return(nil) + end + + it 'returns true' do + expect(service.perform).to be(true) + end + end + + context 'when email is from gmail' do + let(:email) { 'user@gmail.com' } + let(:valid_email_address) { instance_double(ValidEmail2::Address, valid?: true, disposable_domain?: false) } + + before do + allow(ValidEmail2::Address).to receive(:new).with(email).and_return(valid_email_address) + allow(EmailProviderInfo).to receive(:call).with(email).and_return('gmail') + end + + it 'returns false' do + expect(service.perform).to be(false) + end + end + + context 'when email is from Brazilian free provider' do + let(:email) { 'user@uol.com.br' } + let(:valid_email_address) { instance_double(ValidEmail2::Address, valid?: true, disposable_domain?: false) } + + before do + allow(ValidEmail2::Address).to receive(:new).with(email).and_return(valid_email_address) + allow(EmailProviderInfo).to receive(:call).with(email).and_return('uol') + end + + it 'returns false' do + expect(service.perform).to be(false) + end + end + + context 'when email is disposable' do + let(:email) { 'user@mailinator.com' } + let(:disposable_email_address) { instance_double(ValidEmail2::Address, valid?: true, disposable_domain?: true) } + + it 'returns false' do + allow(ValidEmail2::Address).to receive(:new).with(email).and_return(disposable_email_address) + expect(service.perform).to be(false) + end + end + + context 'when email is invalid format' do + let(:email) { 'invalid-email' } + let(:invalid_email_address) { instance_double(ValidEmail2::Address, valid?: false) } + + it 'returns false' do + allow(ValidEmail2::Address).to receive(:new).with(email).and_return(invalid_email_address) + expect(service.perform).to be(false) + end + end + + context 'when email is nil' do + let(:email) { nil } + + it 'remains false' do + expect(service.perform).to be(false) + end + end + + context 'when email is empty string' do + let(:email) { '' } + + it 'returns false' do + expect(service.perform).to be(false) + end + end + + context 'when email domain is uppercase' do + let(:email) { 'user@GMAIL.COM' } + let(:valid_email_address) { instance_double(ValidEmail2::Address, valid?: true, disposable_domain?: false) } + + before do + allow(ValidEmail2::Address).to receive(:new).with(email).and_return(valid_email_address) + allow(EmailProviderInfo).to receive(:call).with(email).and_return('gmail') + end + + it 'returns false (case insensitive)' do + expect(service.perform).to be(false) + end + end + end +end