feat: Response Bot using GPT and Webpage Sources (#7518)

This commit introduces the ability to associate response sources to an inbox, allowing external webpages to be parsed by Chatwoot. The parsed data is converted into embeddings for use with GPT models when managing customer queries.

The implementation relies on the `pgvector` extension for PostgreSQL. Database migrations related to this feature are handled separately by `Features::ResponseBotService`. A future update will integrate these migrations into the default rails migrations, once compatibility with Postgres extensions across all self-hosted installation options is confirmed.

Additionally, a new GitHub action has been added to the CI pipeline to ensure the execution of specs related to this feature.
This commit is contained in:
Sojan Jose
2023-07-21 18:11:51 +03:00
committed by GitHub
parent 30f3928904
commit 480f34803b
41 changed files with 976 additions and 10 deletions

View File

@@ -0,0 +1,10 @@
module Enterprise::MessageTemplates::HookExecutionService
def trigger_templates
super
ResponseBotJob.perform_later(conversation) if should_process_response_bot?
end
def should_process_response_bot?
conversation.pending? && message.incoming? && inbox.response_bot_enabled?
end
end

View File

@@ -0,0 +1,121 @@
class Enterprise::MessageTemplates::ResponseBotService
pattr_initialize [:conversation!]
def perform
ActiveRecord::Base.transaction do
response = get_response(conversation.messages.last.content)
process_response(conversation.messages.last, response)
end
rescue StandardError => e
ChatwootExceptionTracker.new(e, account: conversation.account).capture_exception
true
end
private
delegate :contact, :account, :inbox, to: :conversation
def get_response(content)
previous_messages = []
get_previous_messages(previous_messages)
ChatGpt.new(response_sections(content)).generate_response('', previous_messages)
end
def get_previous_messages(previous_messages)
conversation.messages.where(message_type: [:outgoing, :incoming]).where(private: false).find_each do |message|
next if message.content_type != 'text'
role = determine_role(message)
previous_messages << { content: message.content, role: role }
end
end
def determine_role(message)
message.message_type == 'incoming' ? 'user' : 'system'
end
def response_sections(content)
sections = ''
inbox.get_responses(content).each do |response|
sections += "{context_id: #{response.id}, context: #{response.question} ? #{response.answer}}"
end
sections
end
def process_response(message, response)
if response == 'conversation_handoff'
process_action(message, 'handoff')
else
create_messages(response, conversation)
end
end
def process_action(_message, action)
case action
when 'handoff'
conversation.messages.create!('message_type': :outgoing, 'account_id': conversation.account_id, 'inbox_id': conversation.inbox_id,
'content': 'passing to an agent')
conversation.update(status: :open)
end
end
def create_messages(response, conversation)
response, article_ids = process_response_content(response)
create_outgoing_message(response, conversation)
create_outgoing_message_with_cards(article_ids, conversation) if article_ids.present?
end
def process_response_content(response)
# Regular expression to match '{context_ids: [ids]}'
regex = /{context_ids: \[(\d+(?:, *\d+)*)\]}/
# Extract ids from string
id_string = response[regex, 1] # This will give you '42, 43'
article_ids = id_string.split(',').map(&:to_i) if id_string # This will give you [42, 43]
# Remove '{context_ids: [ids]}' from string
response = response.sub(regex, '')
[response, article_ids]
end
def create_outgoing_message(response, conversation)
conversation.messages.create!(
{
message_type: :outgoing,
account_id: conversation.account_id,
inbox_id: conversation.inbox_id,
content: response
}
)
end
def create_outgoing_message_with_cards(article_ids, conversation)
content_attributes = get_article_hash(article_ids.uniq)
return if content_attributes.blank?
conversation.messages.create!(
{
message_type: :outgoing,
account_id: conversation.account_id,
inbox_id: conversation.inbox_id,
content: 'suggested articles',
content_type: 'article',
content_attributes: content_attributes
}
)
end
def get_article_hash(article_ids)
items = []
article_ids.each do |article_id|
response = Response.find(article_id)
next if response.nil?
items << { title: response.question, description: response.answer[0, 120], link: response.response_document.document_link }
end
items.present? ? { items: items } : {}
end
end

View File

@@ -0,0 +1,83 @@
class Features::ResponseBotService
MIGRATION_VERSION = ActiveRecord::Migration[7.0]
def enable_in_installation
enable_vector_extension
create_tables
end
def enable_vector_extension
MIGRATION_VERSION.enable_extension 'vector'
rescue ActiveRecord::StatementInvalid
print 'Vector extension not available'
end
def disable_vector_extension
MIGRATION_VERSION.disable_extension 'vector'
end
def vector_extension_enabled?
ActiveRecord::Base.connection.extension_enabled?('vector')
end
def create_tables
return unless vector_extension_enabled?
%i[response_sources response_documents responses].each do |table|
send("create_#{table}_table")
end
end
def drop_tables
%i[responses response_documents response_sources].each do |table|
MIGRATION_VERSION.drop_table table if MIGRATION_VERSION.table_exists?(table)
end
end
private
def create_response_sources_table
return if MIGRATION_VERSION.table_exists?(:response_sources)
MIGRATION_VERSION.create_table :response_sources do |t|
t.integer :source_type, null: false, default: 0
t.string :name, null: false
t.string :source_link
t.references :source_model, polymorphic: true
t.bigint :account_id, null: false
t.bigint :inbox_id, null: false
t.timestamps
end
end
def create_response_documents_table
return if MIGRATION_VERSION.table_exists?(:response_documents)
MIGRATION_VERSION.create_table :response_documents do |t|
t.bigint :response_source_id, null: false
t.string :document_link
t.references :document, polymorphic: true
t.text :content
t.bigint :account_id, null: false
t.timestamps
end
MIGRATION_VERSION.add_index :response_documents, :response_source_id
end
def create_responses_table
return if MIGRATION_VERSION.table_exists?(:responses)
MIGRATION_VERSION.create_table :responses do |t|
t.bigint :response_document_id
t.string :question, null: false
t.text :answer, null: false
t.bigint :account_id, null: false
t.vector :embedding, limit: 1536
t.timestamps
end
MIGRATION_VERSION.add_index :responses, :response_document_id
MIGRATION_VERSION.add_index :responses, :embedding, using: :ivfflat, opclass: :vector_l2_ops
end
end

View File

@@ -0,0 +1,22 @@
class Openai::EmbeddingsService
def get_embedding(content)
fetch_embeddings(content)
end
private
def fetch_embeddings(input)
url = 'https://api.openai.com/v1/embeddings'
headers = {
'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY')}",
'Content-Type' => 'application/json'
}
data = {
input: input,
model: 'text-embedding-ada-002'
}
response = Net::HTTP.post(URI(url), data.to_json, headers)
JSON.parse(response.body)['data'].pick('embedding')
end
end

View File

@@ -0,0 +1,38 @@
class PageCrawlerService
attr_reader :external_link
def initialize(external_link)
@external_link = external_link
@doc = Nokogiri::HTML(HTTParty.get(external_link).body)
end
def page_links
sitemap? ? extract_links_from_sitemap : extract_links_from_html
end
def page_title
title_element = @doc.at_xpath('//title')
title_element&.text&.strip
end
def body_text_content
ReverseMarkdown.convert @doc.at_xpath('//body'), unknown_tags: :bypass, github_flavored: true
end
private
def sitemap?
@external_link.end_with?('.xml')
end
def extract_links_from_sitemap
@doc.xpath('//loc').to_set(&:text)
end
def extract_links_from_html
@doc.xpath('//a/@href').to_set do |link|
absolute_url = URI.join(@external_link, link.value).to_s
absolute_url
end
end
end