feat: Use embeddings in help center search (#9227)
This commit is contained in:
23
enterprise/app/models/article_embedding.rb
Normal file
23
enterprise/app/models/article_embedding.rb
Normal file
@@ -0,0 +1,23 @@
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: article_embeddings
|
||||
#
|
||||
# id :bigint not null, primary key
|
||||
# embedding :vector(1536)
|
||||
# term :text not null
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
# article_id :bigint not null
|
||||
#
|
||||
class ArticleEmbedding < ApplicationRecord
|
||||
belongs_to :article
|
||||
has_neighbors :embedding, normalize: true
|
||||
|
||||
before_save :update_response_embedding
|
||||
|
||||
private
|
||||
|
||||
def update_response_embedding
|
||||
self.embedding = Openai::EmbeddingsService.new.get_embedding(term, 'text-embedding-3-small')
|
||||
end
|
||||
end
|
||||
71
enterprise/app/models/enterprise/concerns/article.rb
Normal file
71
enterprise/app/models/enterprise/concerns/article.rb
Normal file
@@ -0,0 +1,71 @@
|
||||
module Enterprise::Concerns::Article
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
included do
|
||||
after_save :add_article_embedding, if: -> { saved_change_to_title? || saved_change_to_description? || saved_change_to_content? }
|
||||
|
||||
def self.add_article_embedding_association
|
||||
has_many :article_embeddings, dependent: :destroy_async
|
||||
end
|
||||
|
||||
add_article_embedding_association if Features::HelpcenterEmbeddingSearchService.new.feature_enabled?
|
||||
|
||||
def self.vector_search(params)
|
||||
embedding = Openai::EmbeddingsService.new.get_embedding(params['query'], 'text-embedding-3-small')
|
||||
records = joins(
|
||||
:category
|
||||
).search_by_category_slug(
|
||||
params[:category_slug]
|
||||
).search_by_category_locale(params[:locale]).search_by_author(params[:author_id]).search_by_status(params[:status])
|
||||
filtered_article_ids = records.pluck(:id)
|
||||
|
||||
# Fetch nearest neighbors and their distances, then filter directly
|
||||
|
||||
# experimenting with filtering results based on result threshold
|
||||
# distance_threshold = 0.2
|
||||
# if using add the filter block to the below query
|
||||
# .filter { |ae| ae.neighbor_distance <= distance_threshold }
|
||||
|
||||
article_ids = ArticleEmbedding.where(article_id: filtered_article_ids)
|
||||
.nearest_neighbors(:embedding, embedding, distance: 'cosine')
|
||||
.limit(5)
|
||||
.pluck(:article_id)
|
||||
|
||||
# Fetch the articles by the IDs obtained from the nearest neighbors search
|
||||
where(id: article_ids)
|
||||
end
|
||||
end
|
||||
|
||||
def add_article_embedding
|
||||
return unless account.feature_enabled?('help_center_embedding_search')
|
||||
|
||||
terms = generate_article_search_terms
|
||||
article_embeddings.destroy_all
|
||||
terms.each { |term| article_embeddings.create!(term: term) }
|
||||
end
|
||||
|
||||
def article_to_search_terms_prompt
|
||||
<<~SYSTEM_PROMPT_MESSAGE
|
||||
For the provided article content, generate potential search query keywords and snippets that can be used to generate the embeddings.
|
||||
Ensure the search terms are as diverse as possible but capture the essence of the article and are super related to the articles.
|
||||
Don't return any terms if there aren't any terms of relevance.
|
||||
Always return results in valid JSON of the following format
|
||||
{
|
||||
"search_terms": []
|
||||
}
|
||||
SYSTEM_PROMPT_MESSAGE
|
||||
end
|
||||
|
||||
def generate_article_search_terms
|
||||
messages = [
|
||||
{ role: 'system', content: article_to_search_terms_prompt },
|
||||
{ role: 'user', content: "title: #{title} \n description: #{description} \n content: #{content}" }
|
||||
]
|
||||
headers = { 'Content-Type' => 'application/json', 'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY', nil)}" }
|
||||
body = { model: 'gpt-4-turbo', messages: messages, response_format: { type: 'json_object' } }.to_json
|
||||
Rails.logger.info "Requesting Chat GPT with body: #{body}"
|
||||
response = HTTParty.post('https://api.openai.com/v1/chat/completions', headers: headers, body: body)
|
||||
Rails.logger.info "Chat GPT response: #{response.body}"
|
||||
JSON.parse(response.parsed_response['choices'][0]['message']['content'])['search_terms']
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user