From d9b840f1615b44c5b84cba4b5585973f78e5b12f Mon Sep 17 00:00:00 2001 From: Vinay Keerthi <11478411+stonecharioteer@users.noreply.github.com> Date: Tue, 4 Nov 2025 07:07:51 +0530 Subject: [PATCH] fix: Optimize Message search_data to prevent OpenSearch field explosion (#12786) ## Description Refactored the `Message#search_data` method to prevent exceeding OpenSearch's 1000 field limit during reindex operations. **Problem:** The previous implementation serialized entire ActiveRecord objects (Inbox, Sender, Conversation) with all their JSONB fields, causing dynamic field explosion in OpenSearch. This resulted in `Searchkick::ImportError` with "Limit of total fields [1000] has been exceeded". **Solution:** Whitelisted only necessary fields for search and filtering, and flattened JSONB `custom_attributes` into key-value pair arrays to prevent unbounded field creation. Linked to: CW-5861 ## Type of change - [x] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality not to work as expected) - [x] This change requires a documentation update ## How Has This Been Tested? - Verified rubocop passes with no offenses - Code review of search field usage from `enterprise/app/services/enterprise/search_service.rb` - Analyzed actual search queries to determine required indexed fields **Still needed:** - Full reindex test on staging/production environment - Verify search functionality still works after reindex - Confirm field count is under 1000 limit ## Changes Made ### Before - Indexed 1000+ fields (entire AR objects with JSONB) - `inbox` = full Inbox object (23+ fields + JSONB) - `sender` = full Contact/User/AgentBot object (10+ fields + JSONB) - `conversation` = full push_event_data - Dynamic JSONB keys creating unlimited fields ### After - ~35-40 controlled fields - Whitelisted search fields: `content`, `attachment_transcribed_text`, `email_subject` - Filter fields: `account_id`, `inbox_id`, `conversation_id`, `sender_id`, `sender_type`, etc. - Flattened `custom_attributes`: `[{key, value, value_type}]` format - Helper methods: `search_conversation_data`, `search_inbox_data`, `search_sender_data`, `search_additional_data` ## Checklist: - [x] My code follows the style guidelines of this project - [x] I have performed a self-review of my code - [x] I have commented on my code, particularly in hard-to-understand areas - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged and published in downstream modules ## Post-merge Steps After merging, the following steps are required: 1. **Reindex all messages:** ```bash bundle exec rails runner "Message.reindex" ``` 2. **Verify field count:** ```bash bundle exec rails runner " client = Searchkick.client index_name = Message.searchkick_index.name mapping = client.indices.get_mapping(index: index_name) fields = mapping.dig(index_name, 'mappings', 'properties') puts 'Total fields: ' + fields.keys.count.to_s " ``` 3. **Test search functionality** to ensure queries still work as expected --------- Co-authored-by: Vishnu Narayanan Co-authored-by: Pranav --- app/models/message.rb | 13 +--- .../messages/search_data_presenter.rb | 58 ++++++++++++++ .../messages/search_data_presenter_spec.rb | 78 +++++++++++++++++++ 3 files changed, 140 insertions(+), 9 deletions(-) create mode 100644 app/presenters/messages/search_data_presenter.rb create mode 100644 spec/presenters/messages/search_data_presenter_spec.rb diff --git a/app/models/message.rb b/app/models/message.rb index 5f98493d0..2964d9286 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -153,15 +153,6 @@ class Message < ApplicationRecord merge_sender_attributes(data) end - def search_data - data = attributes.symbolize_keys - data[:conversation] = conversation.present? ? conversation_push_event_data : nil - data[:attachments] = attachments.map(&:push_event_data) if attachments.present? - data[:sender] = sender.push_event_data if sender - data[:inbox] = inbox - data - end - def conversation_push_event_data { assignee_id: conversation.assignee_id, @@ -259,6 +250,10 @@ class Message < ApplicationRecord true end + def search_data + Messages::SearchDataPresenter.new(self).search_data + end + private def prevent_message_flooding diff --git a/app/presenters/messages/search_data_presenter.rb b/app/presenters/messages/search_data_presenter.rb new file mode 100644 index 000000000..dba0b9499 --- /dev/null +++ b/app/presenters/messages/search_data_presenter.rb @@ -0,0 +1,58 @@ +class Messages::SearchDataPresenter < SimpleDelegator + def search_data + { + **searchable_content, + **message_attributes, + **search_additional_data, + conversation: conversation_data + } + end + + private + + def searchable_content + { + content: content, + attachments: attachment_data, + content_attributes: content_attributes_data + } + end + + def message_attributes + { + account_id: account_id, + inbox_id: inbox_id, + conversation_id: conversation_id, + message_type: message_type, + private: private, + created_at: created_at, + source_id: source_id, + sender_id: sender_id, + sender_type: sender_type + } + end + + def attachment_data + attachments.filter_map do |a| + { transcribed_text: a.meta&.dig('transcribed_text') } + end.presence + end + + def content_attributes_data + email_subject = content_attributes.dig(:email, :subject) + return {} if email_subject.blank? + + { email: { subject: email_subject } } + end + + def conversation_data + { id: conversation.display_id } + end + + def search_additional_data + { + campaign_id: additional_attributes&.dig('campaign_id'), + automation_rule_id: content_attributes&.dig('automation_rule_id') + } + end +end diff --git a/spec/presenters/messages/search_data_presenter_spec.rb b/spec/presenters/messages/search_data_presenter_spec.rb new file mode 100644 index 000000000..a5062086a --- /dev/null +++ b/spec/presenters/messages/search_data_presenter_spec.rb @@ -0,0 +1,78 @@ +require 'rails_helper' + +RSpec.describe Messages::SearchDataPresenter do + let(:presenter) { described_class.new(message) } + let(:account) { create(:account) } + let(:inbox) { create(:inbox, account: account) } + let(:contact) { create(:contact, account: account) } + let(:conversation) { create(:conversation, account: account, inbox: inbox, contact: contact) } + let(:message) { create(:message, account: account, inbox: inbox, conversation: conversation, sender: contact) } + + describe '#search_data' do + let(:expected_data) do + { + content: message.content, + account_id: message.account_id, + inbox_id: message.inbox_id, + conversation_id: message.conversation_id, + message_type: message.message_type, + private: message.private, + created_at: message.created_at, + source_id: message.source_id, + sender_id: message.sender_id, + sender_type: message.sender_type, + conversation: { + id: conversation.display_id + } + } + end + + it 'returns search index payload with core fields' do + expect(presenter.search_data).to include(expected_data) + end + + context 'with attachments' do + before do + attachment = message.attachments.new(account_id: message.account_id, file_type: :image) + attachment.file.attach(io: Rails.root.join('spec/assets/avatar.png').open, filename: 'avatar.png', content_type: 'image/png') + attachment.meta = { 'transcribed_text' => 'Hello world' } + end + + it 'includes attachment transcriptions' do + attachments_data = presenter.search_data[:attachments] + expect(attachments_data).to be_an(Array) + expect(attachments_data.first).to include(transcribed_text: 'Hello world') + end + end + + context 'with email content attributes' do + before do + message.update( + content_attributes: { email: { subject: 'Test Subject' } } + ) + end + + it 'includes email subject' do + content_attrs = presenter.search_data[:content_attributes] + expect(content_attrs[:email][:subject]).to eq('Test Subject') + end + end + + context 'with campaign and automation data' do + before do + message.update( + additional_attributes: { 'campaign_id' => '123' }, + content_attributes: { 'automation_rule_id' => '456' } + ) + end + + it 'includes campaign_id' do + expect(presenter.search_data[:campaign_id]).to eq('123') + end + + it 'includes automation_rule_id' do + expect(presenter.search_data[:automation_rule_id]).to eq('456') + end + end + end +end