From 826d9ec5a72da14fd6f66909d2cf47b0606b2a6b Mon Sep 17 00:00:00 2001 From: Sojan Jose Date: Sun, 1 Oct 2023 19:31:38 -0700 Subject: [PATCH] chore: Refactor Response Bot Data Schema (#8011) This PR refactors the schema we introduced in #7518 based on the feedback from production tests. Here is the change log - Decouple Inbox association to a new table inbox_response_sources -> this lets us share the same response source between multiple inboxes - Add a status field to responses. This ensures that, by default, responses are created in pending status. You can do quality assurance before making them active. In future, this status can be leveraged by the bot to auto-generate response questions from conversations which require a handoff - Add response_source association to responses and remove hard dependency from response_documents. This lets users write free-form question answers based on conversations, which doesn't necessarily need a response source. --- .../v1/models/_response_source.json.jbuilder | 1 - .../monkey_patches/schema_dumper.rb | 1 + .../accounts/response_sources_controller.rb | 2 +- .../app/dashboards/response_dashboard.rb | 21 ++++++++++++-- .../dashboards/response_document_dashboard.rb | 21 ++++++++------ .../dashboards/response_source_dashboard.rb | 6 ++-- enterprise/app/jobs/response_builder_job.rb | 10 +++++-- enterprise/app/models/enterprise/audit_log.rb | 28 +++++++++++++++++++ .../app/models/enterprise/concerns/inbox.rb | 5 ++-- enterprise/app/models/enterprise/inbox.rb | 2 +- .../app/models/inbox_response_source.rb | 21 ++++++++++++++ enterprise/app/models/response.rb | 12 +++++++- enterprise/app/models/response_document.rb | 2 +- enterprise/app/models/response_source.rb | 8 +++--- .../services/features/response_bot_service.rb | 19 +++++++++++-- lib/tasks/auto_annotate_models.rake | 5 +++- .../response_sources_controller_spec.rb | 6 ++-- .../v1/accounts/inboxes_controller_spec.rb | 4 ++- 18 files changed, 138 insertions(+), 36 deletions(-) create mode 100644 enterprise/app/models/inbox_response_source.rb diff --git a/app/views/api/v1/models/_response_source.json.jbuilder b/app/views/api/v1/models/_response_source.json.jbuilder index 570f3687e..9753fa860 100644 --- a/app/views/api/v1/models/_response_source.json.jbuilder +++ b/app/views/api/v1/models/_response_source.json.jbuilder @@ -2,7 +2,6 @@ json.id resource.id json.name resource.name json.source_link resource.source_link json.source_type resource.source_type -json.inbox_id resource.inbox_id json.account_id resource.account_id json.created_at resource.created_at.to_i json.updated_at resource.updated_at.to_i diff --git a/config/initializers/monkey_patches/schema_dumper.rb b/config/initializers/monkey_patches/schema_dumper.rb index fd1865d0e..a790f34d0 100644 --- a/config/initializers/monkey_patches/schema_dumper.rb +++ b/config/initializers/monkey_patches/schema_dumper.rb @@ -33,3 +33,4 @@ ActiveRecord::ConnectionAdapters::PostgreSQL::SchemaDumper.ignore_extentions << ActiveRecord::SchemaDumper.ignore_tables << 'responses' ActiveRecord::SchemaDumper.ignore_tables << 'response_sources' ActiveRecord::SchemaDumper.ignore_tables << 'response_documents' +ActiveRecord::SchemaDumper.ignore_tables << 'inbox_response_sources' diff --git a/enterprise/app/controllers/api/v1/accounts/response_sources_controller.rb b/enterprise/app/controllers/api/v1/accounts/response_sources_controller.rb index 5c84933cf..8527036bb 100644 --- a/enterprise/app/controllers/api/v1/accounts/response_sources_controller.rb +++ b/enterprise/app/controllers/api/v1/accounts/response_sources_controller.rb @@ -28,7 +28,7 @@ class Api::V1::Accounts::ResponseSourcesController < Api::V1::Accounts::BaseCont end def response_source_params - params.require(:response_source).permit(:name, :source_link, :inbox_id, + params.require(:response_source).permit(:name, :source_link, response_documents_attributes: [:document_link]) end end diff --git a/enterprise/app/dashboards/response_dashboard.rb b/enterprise/app/dashboards/response_dashboard.rb index ca6fe9405..40ba1c428 100644 --- a/enterprise/app/dashboards/response_dashboard.rb +++ b/enterprise/app/dashboards/response_dashboard.rb @@ -10,8 +10,13 @@ class ResponseDashboard < Administrate::BaseDashboard ATTRIBUTE_TYPES = { id: Field::Number.with_options(searchable: true), account: Field::BelongsToSearch.with_options(class_name: 'Account', searchable_field: [:name, :id], order: 'id DESC'), + response_source: Field::BelongsToSearch.with_options(class_name: 'ResponseSource', searchable_field: [:name, :id, :source_link], + order: 'id DESC'), answer: Field::Text.with_options(searchable: true), question: Field::String.with_options(searchable: true), + status: Field::Select.with_options(searchable: false, collection: lambda { |field| + field.resource.class.send(field.attribute.to_s.pluralize).keys + }), response_document: Field::BelongsToSearch.with_options(class_name: 'ResponseDocument', searchable_field: [:document_link, :content, :id], order: 'id DESC'), created_at: Field::DateTime, @@ -27,7 +32,9 @@ class ResponseDashboard < Administrate::BaseDashboard id question answer + status response_document + response_source account ].freeze @@ -35,9 +42,11 @@ class ResponseDashboard < Administrate::BaseDashboard # an array of attributes that will be displayed on the model's show page. SHOW_PAGE_ATTRIBUTES = %i[ id + status question answer response_document + response_source account created_at updated_at @@ -47,10 +56,11 @@ class ResponseDashboard < Administrate::BaseDashboard # an array of attributes that will be displayed # on the model's form (`new` and `edit`) pages. FORM_ATTRIBUTES = %i[ + response_source + response_document question answer - response_document - account + status ].freeze # COLLECTION_FILTERS @@ -63,7 +73,12 @@ class ResponseDashboard < Administrate::BaseDashboard # COLLECTION_FILTERS = { # open: ->(resources) { resources.where(open: true) } # }.freeze - COLLECTION_FILTERS = {}.freeze + COLLECTION_FILTERS = { + account: ->(resources, attr) { resources.where(account_id: attr) }, + response_source: ->(resources, attr) { resources.where(response_source_id: attr) }, + response_document: ->(resources, attr) { resources.where(response_document_id: attr) }, + status: ->(resources, attr) { resources.where(status: attr) } + }.freeze # Overwrite this method to customize how responses are displayed # across all pages of the admin dashboard. diff --git a/enterprise/app/dashboards/response_document_dashboard.rb b/enterprise/app/dashboards/response_document_dashboard.rb index d29b977b9..fe20c842b 100644 --- a/enterprise/app/dashboards/response_document_dashboard.rb +++ b/enterprise/app/dashboards/response_document_dashboard.rb @@ -38,11 +38,11 @@ class ResponseDocumentDashboard < Administrate::BaseDashboard SHOW_PAGE_ATTRIBUTES = %i[ id account - content - document_id - document_link - document_type response_source + document_link + document_id + document_type + content created_at updated_at responses @@ -53,11 +53,11 @@ class ResponseDocumentDashboard < Administrate::BaseDashboard # on the model's form (`new` and `edit`) pages. FORM_ATTRIBUTES = %i[ account - content - document_id - document_link - document_type response_source + document_link + document_id + document_type + content ].freeze # COLLECTION_FILTERS @@ -70,7 +70,10 @@ class ResponseDocumentDashboard < Administrate::BaseDashboard # COLLECTION_FILTERS = { # open: ->(resources) { resources.where(open: true) } # }.freeze - COLLECTION_FILTERS = {}.freeze + COLLECTION_FILTERS = { + account: ->(resources, attr) { resources.where(account_id: attr) }, + response_source: ->(resources, attr) { resources.where(response_source_id: attr) } + }.freeze # Overwrite this method to customize how response documents are displayed # across all pages of the admin dashboard. diff --git a/enterprise/app/dashboards/response_source_dashboard.rb b/enterprise/app/dashboards/response_source_dashboard.rb index 2d851c1db..f17548643 100644 --- a/enterprise/app/dashboards/response_source_dashboard.rb +++ b/enterprise/app/dashboards/response_source_dashboard.rb @@ -8,7 +8,7 @@ class ResponseSourceDashboard < Administrate::BaseDashboard # which determines how the attribute is displayed # on pages throughout the dashboard. ATTRIBUTE_TYPES = { - id: Field::Number, + id: Field::Number.with_options(searchable: true), account: Field::BelongsToSearch.with_options(class_name: 'Account', searchable_field: [:name, :id], order: 'id DESC'), name: Field::String.with_options(searchable: true), response_documents: Field::HasMany, @@ -73,7 +73,9 @@ class ResponseSourceDashboard < Administrate::BaseDashboard # COLLECTION_FILTERS = { # open: ->(resources) { resources.where(open: true) } # }.freeze - COLLECTION_FILTERS = {}.freeze + COLLECTION_FILTERS = { + account: ->(resources, attr) { resources.where(account_id: attr) } + }.freeze # Overwrite this method to customize how response sources are displayed # across all pages of the admin dashboard. diff --git a/enterprise/app/jobs/response_builder_job.rb b/enterprise/app/jobs/response_builder_job.rb index a459b887d..f30f97d4b 100644 --- a/enterprise/app/jobs/response_builder_job.rb +++ b/enterprise/app/jobs/response_builder_job.rb @@ -63,14 +63,20 @@ class ResponseBuilderJob < ApplicationJob def create_responses(response, response_document) response_body = JSON.parse(response.body) - faqs = JSON.parse(response_body['choices'][0]['message']['content'].strip) + content = response_body.dig('choices', 0, 'message', 'content') + + return if content.nil? + + faqs = JSON.parse(content.strip) faqs.each do |faq| response_document.responses.create!( question: faq['question'], answer: faq['answer'], - account_id: response_document.account_id + response_source: response_document.response_source ) end + rescue JSON::ParserError => e + Rails.logger.error "Error in parsing GPT processed response document : #{e.message}" end end diff --git a/enterprise/app/models/enterprise/audit_log.rb b/enterprise/app/models/enterprise/audit_log.rb index c84aefcb1..cbb64425f 100644 --- a/enterprise/app/models/enterprise/audit_log.rb +++ b/enterprise/app/models/enterprise/audit_log.rb @@ -1,3 +1,31 @@ +# == Schema Information +# +# Table name: audits +# +# id :bigint not null, primary key +# action :string +# associated_type :string +# auditable_type :string +# audited_changes :jsonb +# comment :string +# remote_address :string +# request_uuid :string +# user_type :string +# username :string +# version :integer default(0) +# created_at :datetime +# associated_id :bigint +# auditable_id :bigint +# user_id :bigint +# +# Indexes +# +# associated_index (associated_type,associated_id) +# auditable_index (auditable_type,auditable_id,version) +# index_audits_on_created_at (created_at) +# index_audits_on_request_uuid (request_uuid) +# user_index (user_id,user_type) +# class Enterprise::AuditLog < Audited::Audit after_save :log_additional_information diff --git a/enterprise/app/models/enterprise/concerns/inbox.rb b/enterprise/app/models/enterprise/concerns/inbox.rb index 8c2082dbd..a71e243b5 100644 --- a/enterprise/app/models/enterprise/concerns/inbox.rb +++ b/enterprise/app/models/enterprise/concerns/inbox.rb @@ -3,9 +3,10 @@ module Enterprise::Concerns::Inbox included do def self.add_response_related_associations - has_many :response_sources, dependent: :destroy_async + has_many :inbox_response_sources, dependent: :destroy_async + has_many :response_sources, through: :inbox_response_sources has_many :response_documents, through: :response_sources - has_many :responses, through: :response_documents + has_many :responses, through: :response_sources end add_response_related_associations if Features::ResponseBotService.new.vector_extension_enabled? diff --git a/enterprise/app/models/enterprise/inbox.rb b/enterprise/app/models/enterprise/inbox.rb index 70c37456b..b1d39f0be 100644 --- a/enterprise/app/models/enterprise/inbox.rb +++ b/enterprise/app/models/enterprise/inbox.rb @@ -7,7 +7,7 @@ module Enterprise::Inbox def get_responses(query) embedding = Openai::EmbeddingsService.new.get_embedding(query) - responses.nearest_neighbors(:embedding, embedding, distance: 'cosine').first(5) + responses.active.nearest_neighbors(:embedding, embedding, distance: 'cosine').first(5) end def active_bot? diff --git a/enterprise/app/models/inbox_response_source.rb b/enterprise/app/models/inbox_response_source.rb new file mode 100644 index 000000000..6a929a8b8 --- /dev/null +++ b/enterprise/app/models/inbox_response_source.rb @@ -0,0 +1,21 @@ +# == Schema Information +# +# Table name: inbox_response_sources +# +# id :bigint not null, primary key +# created_at :datetime not null +# updated_at :datetime not null +# inbox_id :bigint not null +# response_source_id :bigint not null +# +# Indexes +# +# index_inbox_response_sources_on_inbox_id (inbox_id) +# index_inbox_response_sources_on_inbox_id_and_response_source_id (inbox_id,response_source_id) UNIQUE +# index_inbox_response_sources_on_response_source_id (response_source_id) +# index_inbox_response_sources_on_response_source_id_and_inbox_id (response_source_id,inbox_id) UNIQUE +# +class InboxResponseSource < ApplicationRecord + belongs_to :inbox + belongs_to :response_source +end diff --git a/enterprise/app/models/response.rb b/enterprise/app/models/response.rb index 1fc82e94c..d226d650e 100644 --- a/enterprise/app/models/response.rb +++ b/enterprise/app/models/response.rb @@ -6,10 +6,12 @@ # answer :text not null # embedding :vector(1536) # question :string not null +# status :integer default(0) # created_at :datetime not null # updated_at :datetime not null # account_id :bigint not null # response_document_id :bigint +# response_source_id :bigint not null # # Indexes # @@ -17,11 +19,15 @@ # index_responses_on_response_document_id (response_document_id) # class Response < ApplicationRecord - belongs_to :response_document + belongs_to :response_document, optional: true belongs_to :account + belongs_to :response_source has_neighbors :embedding, normalize: true before_save :update_response_embedding + before_validation :ensure_account + + enum status: { pending: 0, active: 1 } def self.search(query) embedding = Openai::EmbeddingsService.new.get_embedding(query) @@ -30,6 +36,10 @@ class Response < ApplicationRecord private + def ensure_account + self.account = response_source.account + end + def update_response_embedding self.embedding = Openai::EmbeddingsService.new.get_embedding("#{question}: #{answer}") end diff --git a/enterprise/app/models/response_document.rb b/enterprise/app/models/response_document.rb index 6e547a64c..91540e6ab 100644 --- a/enterprise/app/models/response_document.rb +++ b/enterprise/app/models/response_document.rb @@ -18,7 +18,7 @@ # index_response_documents_on_response_source_id (response_source_id) # class ResponseDocument < ApplicationRecord - has_many :responses, dependent: :destroy + has_many :responses, dependent: :destroy_async belongs_to :account belongs_to :response_source diff --git a/enterprise/app/models/response_source.rb b/enterprise/app/models/response_source.rb index 99db2ac3d..cc5890a19 100644 --- a/enterprise/app/models/response_source.rb +++ b/enterprise/app/models/response_source.rb @@ -10,7 +10,6 @@ # created_at :datetime not null # updated_at :datetime not null # account_id :bigint not null -# inbox_id :bigint not null # source_model_id :bigint # # Indexes @@ -19,10 +18,11 @@ # class ResponseSource < ApplicationRecord enum source_type: { external: 0, kbase: 1, inbox: 2 } + has_many :inbox_response_sources, dependent: :destroy_async + has_many :inboxes, through: :inbox_response_sources belongs_to :account - belongs_to :inbox - has_many :response_documents, dependent: :destroy - has_many :responses, through: :response_documents + has_many :response_documents, dependent: :destroy_async + has_many :responses, dependent: :destroy_async accepts_nested_attributes_for :response_documents end diff --git a/enterprise/app/services/features/response_bot_service.rb b/enterprise/app/services/features/response_bot_service.rb index 857ddfedd..da45cf48b 100644 --- a/enterprise/app/services/features/response_bot_service.rb +++ b/enterprise/app/services/features/response_bot_service.rb @@ -23,19 +23,31 @@ class Features::ResponseBotService def create_tables return unless vector_extension_enabled? - %i[response_sources response_documents responses].each do |table| + %i[response_sources response_documents responses inbox_response_sources].each do |table| send("create_#{table}_table") end end def drop_tables - %i[responses response_documents response_sources].each do |table| + %i[responses response_documents response_sources inbox_response_sources].each do |table| MIGRATION_VERSION.drop_table table if MIGRATION_VERSION.table_exists?(table) end end private + def create_inbox_response_sources_table + return if MIGRATION_VERSION.table_exists?(:inbox_response_sources) + + MIGRATION_VERSION.create_table :inbox_response_sources do |t| + t.references :inbox, null: false + t.references :response_source, null: false + t.index [:inbox_id, :response_source_id], name: 'index_inbox_response_sources_on_inbox_id_and_response_source_id', unique: true + t.index [:response_source_id, :inbox_id], name: 'index_inbox_response_sources_on_response_source_id_and_inbox_id', unique: true + t.timestamps + end + end + def create_response_sources_table return if MIGRATION_VERSION.table_exists?(:response_sources) @@ -45,7 +57,6 @@ class Features::ResponseBotService t.string :source_link t.references :source_model, polymorphic: true t.bigint :account_id, null: false - t.bigint :inbox_id, null: false t.timestamps end end @@ -69,9 +80,11 @@ class Features::ResponseBotService return if MIGRATION_VERSION.table_exists?(:responses) MIGRATION_VERSION.create_table :responses do |t| + t.bigint :response_source_id, null: false t.bigint :response_document_id t.string :question, null: false t.text :answer, null: false + t.integer :status, default: 0 t.bigint :account_id, null: false t.vector :embedding, limit: 1536 t.timestamps diff --git a/lib/tasks/auto_annotate_models.rake b/lib/tasks/auto_annotate_models.rake index 98c8d344e..8a89739b6 100644 --- a/lib/tasks/auto_annotate_models.rake +++ b/lib/tasks/auto_annotate_models.rake @@ -20,7 +20,10 @@ if Rails.env.development? 'show_complete_foreign_keys' => 'false', 'show_indexes' => 'true', 'simple_indexes' => 'false', - 'model_dir' => 'app/models', + 'model_dir' => [ + 'app/models', + 'enterprise/app/models', + ], 'root_dir' => '', 'include_version' => 'false', 'require' => '', diff --git a/spec/enterprise/controllers/api/v1/accounts/response_sources_controller_spec.rb b/spec/enterprise/controllers/api/v1/accounts/response_sources_controller_spec.rb index 74767da56..44e0dced1 100644 --- a/spec/enterprise/controllers/api/v1/accounts/response_sources_controller_spec.rb +++ b/spec/enterprise/controllers/api/v1/accounts/response_sources_controller_spec.rb @@ -3,7 +3,6 @@ require 'rails_helper' RSpec.describe 'Response Sources API', type: :request do let!(:account) { create(:account) } let!(:admin) { create(:user, account: account, role: :administrator) } - let!(:inbox) { create(:inbox, account: account) } before do skip('Skipping since vector is not enabled in this environment') unless Features::ResponseBotService.new.vector_extension_enabled? @@ -44,7 +43,6 @@ RSpec.describe 'Response Sources API', type: :request do response_source: { name: 'Test', source_link: 'http://test.test', - inbox_id: inbox.id, response_documents_attributes: [ { document_link: 'http://test1.test' }, { document_link: 'http://test2.test' } @@ -75,7 +73,7 @@ RSpec.describe 'Response Sources API', type: :request do end describe 'POST /api/v1/accounts/{account.id}/response_sources/{response_source.id}/add_document' do - let!(:response_source) { create(:response_source, account: account, inbox: inbox) } + let!(:response_source) { create(:response_source, account: account) } let(:valid_params) do { document_link: 'http://test.test' } end @@ -103,7 +101,7 @@ RSpec.describe 'Response Sources API', type: :request do end describe 'POST /api/v1/accounts/{account.id}/response_sources/{response_source.id}/remove_document' do - let!(:response_source) { create(:response_source, account: account, inbox: inbox) } + let!(:response_source) { create(:response_source, account: account) } let!(:response_document) { response_source.response_documents.create!(document_link: 'http://test.test') } let(:valid_params) do { document_id: response_document.id } diff --git a/spec/enterprise/controllers/enterprise/api/v1/accounts/inboxes_controller_spec.rb b/spec/enterprise/controllers/enterprise/api/v1/accounts/inboxes_controller_spec.rb index fbde6917f..7783dc384 100644 --- a/spec/enterprise/controllers/enterprise/api/v1/accounts/inboxes_controller_spec.rb +++ b/spec/enterprise/controllers/enterprise/api/v1/accounts/inboxes_controller_spec.rb @@ -71,7 +71,9 @@ RSpec.describe 'Enterprise Inboxes API', type: :request do end it 'returns all response_sources belonging to the inbox to administrators' do - response_source = create(:response_source, account: account, inbox: inbox) + response_source = create(:response_source, account: account) + inbox.response_sources << response_source + inbox.save! get "/api/v1/accounts/#{account.id}/inboxes/#{inbox.id}/response_sources", headers: administrator.create_new_auth_token, as: :json