feat: Use embeddings in help center search (#9227)
This commit is contained in:
@@ -7,7 +7,7 @@ class Public::Api::V1::Portals::ArticlesController < Public::Api::V1::Portals::B
|
|||||||
|
|
||||||
def index
|
def index
|
||||||
@articles = @portal.articles
|
@articles = @portal.articles
|
||||||
@articles = @articles.search(list_params) if list_params.present?
|
search_articles
|
||||||
order_by_sort_param
|
order_by_sort_param
|
||||||
@articles.page(list_params[:page]) if list_params[:page].present?
|
@articles.page(list_params[:page]) if list_params[:page].present?
|
||||||
end
|
end
|
||||||
@@ -16,6 +16,10 @@ class Public::Api::V1::Portals::ArticlesController < Public::Api::V1::Portals::B
|
|||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
|
def search_articles
|
||||||
|
@articles = @articles.search(list_params) if list_params.present?
|
||||||
|
end
|
||||||
|
|
||||||
def order_by_sort_param
|
def order_by_sort_param
|
||||||
@articles = if list_params[:sort].present? && list_params[:sort] == 'views'
|
@articles = if list_params[:sort].present? && list_params[:sort] == 'views'
|
||||||
@articles.order_by_views
|
@articles.order_by_views
|
||||||
@@ -51,3 +55,5 @@ class Public::Api::V1::Portals::ArticlesController < Public::Api::V1::Portals::B
|
|||||||
ChatwootMarkdownRenderer.new(content).render_article
|
ChatwootMarkdownRenderer.new(content).render_article
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Public::Api::V1::Portals::ArticlesController.prepend_mod_with('Public::Api::V1::Portals::ArticlesController')
|
||||||
|
|||||||
@@ -170,3 +170,4 @@ class Article < ApplicationRecord
|
|||||||
self.slug ||= "#{Time.now.utc.to_i}-#{title.underscore.parameterize(separator: '-')}" if title.present?
|
self.slug ||= "#{Time.now.utc.to_i}-#{title.underscore.parameterize(separator: '-')}" if title.present?
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
Article.include_mod_with('Concerns::Article')
|
||||||
|
|||||||
@@ -80,3 +80,6 @@
|
|||||||
- name: sla
|
- name: sla
|
||||||
enabled: false
|
enabled: false
|
||||||
premium: true
|
premium: true
|
||||||
|
- name: help_center_embedding_search
|
||||||
|
enabled: false
|
||||||
|
premium: true
|
||||||
|
|||||||
@@ -34,3 +34,4 @@ ActiveRecord::SchemaDumper.ignore_tables << 'responses'
|
|||||||
ActiveRecord::SchemaDumper.ignore_tables << 'response_sources'
|
ActiveRecord::SchemaDumper.ignore_tables << 'response_sources'
|
||||||
ActiveRecord::SchemaDumper.ignore_tables << 'response_documents'
|
ActiveRecord::SchemaDumper.ignore_tables << 'response_documents'
|
||||||
ActiveRecord::SchemaDumper.ignore_tables << 'inbox_response_sources'
|
ActiveRecord::SchemaDumper.ignore_tables << 'inbox_response_sources'
|
||||||
|
ActiveRecord::SchemaDumper.ignore_tables << 'article_embeddings'
|
||||||
|
|||||||
@@ -0,0 +1,11 @@
|
|||||||
|
module Enterprise::Public::Api::V1::Portals::ArticlesController
|
||||||
|
private
|
||||||
|
|
||||||
|
def search_articles
|
||||||
|
if @portal.account.feature_enabled?('help_center_embedding_search')
|
||||||
|
@articles = @articles.vector_search(list_params) if list_params.present?
|
||||||
|
else
|
||||||
|
super
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
23
enterprise/app/models/article_embedding.rb
Normal file
23
enterprise/app/models/article_embedding.rb
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
# == Schema Information
|
||||||
|
#
|
||||||
|
# Table name: article_embeddings
|
||||||
|
#
|
||||||
|
# id :bigint not null, primary key
|
||||||
|
# embedding :vector(1536)
|
||||||
|
# term :text not null
|
||||||
|
# created_at :datetime not null
|
||||||
|
# updated_at :datetime not null
|
||||||
|
# article_id :bigint not null
|
||||||
|
#
|
||||||
|
class ArticleEmbedding < ApplicationRecord
|
||||||
|
belongs_to :article
|
||||||
|
has_neighbors :embedding, normalize: true
|
||||||
|
|
||||||
|
before_save :update_response_embedding
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def update_response_embedding
|
||||||
|
self.embedding = Openai::EmbeddingsService.new.get_embedding(term, 'text-embedding-3-small')
|
||||||
|
end
|
||||||
|
end
|
||||||
71
enterprise/app/models/enterprise/concerns/article.rb
Normal file
71
enterprise/app/models/enterprise/concerns/article.rb
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
module Enterprise::Concerns::Article
|
||||||
|
extend ActiveSupport::Concern
|
||||||
|
|
||||||
|
included do
|
||||||
|
after_save :add_article_embedding, if: -> { saved_change_to_title? || saved_change_to_description? || saved_change_to_content? }
|
||||||
|
|
||||||
|
def self.add_article_embedding_association
|
||||||
|
has_many :article_embeddings, dependent: :destroy_async
|
||||||
|
end
|
||||||
|
|
||||||
|
add_article_embedding_association if Features::HelpcenterEmbeddingSearchService.new.feature_enabled?
|
||||||
|
|
||||||
|
def self.vector_search(params)
|
||||||
|
embedding = Openai::EmbeddingsService.new.get_embedding(params['query'], 'text-embedding-3-small')
|
||||||
|
records = joins(
|
||||||
|
:category
|
||||||
|
).search_by_category_slug(
|
||||||
|
params[:category_slug]
|
||||||
|
).search_by_category_locale(params[:locale]).search_by_author(params[:author_id]).search_by_status(params[:status])
|
||||||
|
filtered_article_ids = records.pluck(:id)
|
||||||
|
|
||||||
|
# Fetch nearest neighbors and their distances, then filter directly
|
||||||
|
|
||||||
|
# experimenting with filtering results based on result threshold
|
||||||
|
# distance_threshold = 0.2
|
||||||
|
# if using add the filter block to the below query
|
||||||
|
# .filter { |ae| ae.neighbor_distance <= distance_threshold }
|
||||||
|
|
||||||
|
article_ids = ArticleEmbedding.where(article_id: filtered_article_ids)
|
||||||
|
.nearest_neighbors(:embedding, embedding, distance: 'cosine')
|
||||||
|
.limit(5)
|
||||||
|
.pluck(:article_id)
|
||||||
|
|
||||||
|
# Fetch the articles by the IDs obtained from the nearest neighbors search
|
||||||
|
where(id: article_ids)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def add_article_embedding
|
||||||
|
return unless account.feature_enabled?('help_center_embedding_search')
|
||||||
|
|
||||||
|
terms = generate_article_search_terms
|
||||||
|
article_embeddings.destroy_all
|
||||||
|
terms.each { |term| article_embeddings.create!(term: term) }
|
||||||
|
end
|
||||||
|
|
||||||
|
def article_to_search_terms_prompt
|
||||||
|
<<~SYSTEM_PROMPT_MESSAGE
|
||||||
|
For the provided article content, generate potential search query keywords and snippets that can be used to generate the embeddings.
|
||||||
|
Ensure the search terms are as diverse as possible but capture the essence of the article and are super related to the articles.
|
||||||
|
Don't return any terms if there aren't any terms of relevance.
|
||||||
|
Always return results in valid JSON of the following format
|
||||||
|
{
|
||||||
|
"search_terms": []
|
||||||
|
}
|
||||||
|
SYSTEM_PROMPT_MESSAGE
|
||||||
|
end
|
||||||
|
|
||||||
|
def generate_article_search_terms
|
||||||
|
messages = [
|
||||||
|
{ role: 'system', content: article_to_search_terms_prompt },
|
||||||
|
{ role: 'user', content: "title: #{title} \n description: #{description} \n content: #{content}" }
|
||||||
|
]
|
||||||
|
headers = { 'Content-Type' => 'application/json', 'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY', nil)}" }
|
||||||
|
body = { model: 'gpt-4-turbo', messages: messages, response_format: { type: 'json_object' } }.to_json
|
||||||
|
Rails.logger.info "Requesting Chat GPT with body: #{body}"
|
||||||
|
response = HTTParty.post('https://api.openai.com/v1/chat/completions', headers: headers, body: body)
|
||||||
|
Rails.logger.info "Chat GPT response: #{response.body}"
|
||||||
|
JSON.parse(response.parsed_response['choices'][0]['message']['content'])['search_terms']
|
||||||
|
end
|
||||||
|
end
|
||||||
7
enterprise/app/services/features/base_service.rb
Normal file
7
enterprise/app/services/features/base_service.rb
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
class Features::BaseService
|
||||||
|
MIGRATION_VERSION = ActiveRecord::Migration[7.0]
|
||||||
|
|
||||||
|
def vector_extension_enabled?
|
||||||
|
ActiveRecord::Base.connection.extension_enabled?('vector')
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
# ensure vector extension is enabled via response bot service
|
||||||
|
class Features::HelpcenterEmbeddingSearchService < Features::BaseService
|
||||||
|
def enable_in_installation
|
||||||
|
create_tables
|
||||||
|
end
|
||||||
|
|
||||||
|
def disable_in_installation
|
||||||
|
drop_tables
|
||||||
|
end
|
||||||
|
|
||||||
|
def feature_enabled?
|
||||||
|
vector_extension_enabled? && MIGRATION_VERSION.table_exists?(:article_embeddings)
|
||||||
|
end
|
||||||
|
|
||||||
|
def create_tables
|
||||||
|
return unless vector_extension_enabled?
|
||||||
|
|
||||||
|
%i[article_embeddings].each do |table|
|
||||||
|
send("create_#{table}_table")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def drop_tables
|
||||||
|
%i[article_embeddings].each do |table|
|
||||||
|
MIGRATION_VERSION.drop_table table if MIGRATION_VERSION.table_exists?(table)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def create_article_embeddings_table
|
||||||
|
return if MIGRATION_VERSION.table_exists?(:article_embeddings)
|
||||||
|
|
||||||
|
MIGRATION_VERSION.create_table :article_embeddings do |t|
|
||||||
|
t.bigint :article_id, null: false
|
||||||
|
t.text :term, null: false
|
||||||
|
t.vector :embedding, limit: 1536
|
||||||
|
t.timestamps
|
||||||
|
end
|
||||||
|
MIGRATION_VERSION.add_index :article_embeddingsk, :embedding, using: :ivfflat, opclass: :vector_l2_ops
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -1,6 +1,4 @@
|
|||||||
class Features::ResponseBotService
|
class Features::ResponseBotService < Features::BaseService
|
||||||
MIGRATION_VERSION = ActiveRecord::Migration[7.0]
|
|
||||||
|
|
||||||
def enable_in_installation
|
def enable_in_installation
|
||||||
enable_vector_extension
|
enable_vector_extension
|
||||||
create_tables
|
create_tables
|
||||||
@@ -21,10 +19,6 @@ class Features::ResponseBotService
|
|||||||
MIGRATION_VERSION.disable_extension 'vector'
|
MIGRATION_VERSION.disable_extension 'vector'
|
||||||
end
|
end
|
||||||
|
|
||||||
def vector_extension_enabled?
|
|
||||||
ActiveRecord::Base.connection.extension_enabled?('vector')
|
|
||||||
end
|
|
||||||
|
|
||||||
def create_tables
|
def create_tables
|
||||||
return unless vector_extension_enabled?
|
return unless vector_extension_enabled?
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
class Openai::EmbeddingsService
|
class Openai::EmbeddingsService
|
||||||
def get_embedding(content)
|
def get_embedding(content, model = 'text-embedding-ada-002')
|
||||||
fetch_embeddings(content)
|
fetch_embeddings(content, model)
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def fetch_embeddings(input)
|
def fetch_embeddings(input, model)
|
||||||
url = 'https://api.openai.com/v1/embeddings'
|
url = 'https://api.openai.com/v1/embeddings'
|
||||||
headers = {
|
headers = {
|
||||||
'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY', '')}",
|
'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY', '')}",
|
||||||
@@ -13,7 +13,7 @@ class Openai::EmbeddingsService
|
|||||||
}
|
}
|
||||||
data = {
|
data = {
|
||||||
input: input,
|
input: input,
|
||||||
model: 'text-embedding-ada-002'
|
model: model
|
||||||
}
|
}
|
||||||
|
|
||||||
response = Net::HTTP.post(URI(url), data.to_json, headers)
|
response = Net::HTTP.post(URI(url), data.to_json, headers)
|
||||||
|
|||||||
@@ -0,0 +1,19 @@
|
|||||||
|
require 'rails_helper'
|
||||||
|
|
||||||
|
RSpec.describe 'Public Articles API', type: :request do
|
||||||
|
let!(:portal) { create(:portal, slug: 'test-portal', config: { allowed_locales: %w[en es] }, custom_domain: 'www.example.com') }
|
||||||
|
|
||||||
|
describe 'GET /public/api/v1/portals/:slug/articles' do
|
||||||
|
before do
|
||||||
|
portal.account.enable_features!(:help_center_embedding_search)
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'with help_center_embedding_search feature' do
|
||||||
|
it 'get all articles with searched text query using vector search if enabled' do
|
||||||
|
allow(Article).to receive(:vector_search)
|
||||||
|
get "/hc/#{portal.slug}/en/articles.json", params: { query: 'funny' }
|
||||||
|
expect(Article).to have_received(:vector_search)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
Reference in New Issue
Block a user