feat: Response Bot using GPT and Webpage Sources (#7518)

This commit introduces the ability to associate response sources to an inbox, allowing external webpages to be parsed by Chatwoot. The parsed data is converted into embeddings for use with GPT models when managing customer queries.

The implementation relies on the `pgvector` extension for PostgreSQL. Database migrations related to this feature are handled separately by `Features::ResponseBotService`. A future update will integrate these migrations into the default rails migrations, once compatibility with Postgres extensions across all self-hosted installation options is confirmed.

Additionally, a new GitHub action has been added to the CI pipeline to ensure the execution of specs related to this feature.
This commit is contained in:
Sojan Jose
2023-07-21 18:11:51 +03:00
committed by GitHub
parent 30f3928904
commit 480f34803b
41 changed files with 976 additions and 10 deletions

View File

@@ -0,0 +1,34 @@
class Api::V1::Accounts::ResponseSourcesController < Api::V1::Accounts::BaseController
before_action :current_account
before_action :check_authorization
before_action :find_response_source, only: [:add_document, :remove_document]
def parse
links = PageCrawlerService.new(params[:link]).page_links
render json: { links: links }
end
def create
@response_source = Current.account.response_sources.new(response_source_params)
@response_source.save!
end
def add_document
@response_source.response_documents.create!(document_link: params[:document_link])
end
def remove_document
@response_source.response_documents.find(params[:document_id]).destroy!
end
private
def find_response_source
@response_source = Current.account.response_sources.find(params[:id])
end
def response_source_params
params.require(:response_source).permit(:name, :source_link, :inbox_id,
response_documents_attributes: [:document_link])
end
end

View File

@@ -1,4 +1,8 @@
module Enterprise::Api::V1::Accounts::InboxesController
def response_sources
@response_sources = @inbox.response_sources
end
def inbox_attributes
super + ee_inbox_attributes
end

View File

@@ -0,0 +1,7 @@
class ResponseBotJob < ApplicationJob
queue_as :medium
def perform(conversation)
::Enterprise::MessageTemplates::ResponseBotService.new(conversation: conversation).perform
end
end

View File

@@ -0,0 +1,76 @@
class ResponseBuilderJob < ApplicationJob
queue_as :default
def perform(response_document)
reset_previous_responses(response_document)
data = prepare_data(response_document)
response = post_request(data)
create_responses(response, response_document)
end
private
def reset_previous_responses(response_document)
response_document.responses.destroy_all
end
def prepare_data(response_document)
{
model: 'gpt-3.5-turbo',
messages: [
{
role: 'system',
content: system_message_content
},
{
role: 'user',
content: response_document.content
}
]
}
end
def system_message_content
<<~SYSTEM_MESSAGE_CONTENT
You are a content writer looking to convert user content into short FAQs which can be added to your website's helper centre.
Format the webpage content provided in the message to FAQ format like the following example.#{' '}
Ensure that you only generate faqs from the information provider in the message.#{' '}
Ensure that output is always valid json.#{' '}
If no match is available, return an empty JSON.
```
[ { "question": "What is the pricing?",
"answer" : " There are different pricing tiers available."
}]
```
SYSTEM_MESSAGE_CONTENT
end
def post_request(data)
headers = prepare_headers
HTTParty.post(
'https://api.openai.com/v1/chat/completions',
headers: headers,
body: data.to_json
)
end
def prepare_headers
{
'Content-Type' => 'application/json',
'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY')}"
}
end
def create_responses(response, response_document)
response_body = JSON.parse(response.body)
faqs = JSON.parse(response_body['choices'][0]['message']['content'].strip)
faqs.each do |faq|
response_document.responses.create!(
question: faq['question'],
answer: faq['answer'],
account_id: response_document.account_id
)
end
end
end

View File

@@ -0,0 +1,10 @@
# app/jobs/response_document_content_job.rb
class ResponseDocumentContentJob < ApplicationJob
queue_as :default
def perform(response_document)
# Replace the selector with the actual one you need.
content = PageCrawlerService.new(response_document.document_link).body_text_content
response_document.update!(content: content[0..15_000])
end
end

View File

@@ -0,0 +1,15 @@
module Enterprise::Concerns::Account
extend ActiveSupport::Concern
included do
has_many :sla_policies, dependent: :destroy_async
def self.add_response_related_associations
has_many :response_sources, dependent: :destroy_async
has_many :response_documents, dependent: :destroy_async
has_many :responses, dependent: :destroy_async
end
add_response_related_associations if Features::ResponseBotService.new.vector_extension_enabled?
end
end

View File

@@ -0,0 +1,13 @@
module Enterprise::Concerns::Inbox
extend ActiveSupport::Concern
included do
def self.add_response_related_associations
has_many :response_sources, dependent: :destroy_async
has_many :response_documents, dependent: :destroy_async
has_many :responses, dependent: :destroy_async
end
add_response_related_associations if Features::ResponseBotService.new.vector_extension_enabled?
end
end

View File

@@ -1,7 +0,0 @@
module Enterprise::EnterpriseAccountConcern
extend ActiveSupport::Concern
included do
has_many :sla_policies, dependent: :destroy_async
end
end

View File

@@ -5,6 +5,19 @@ module Enterprise::Inbox
super - overloaded_agent_ids
end
def get_responses(query)
embedding = Openai::EmbeddingsService.new.get_embedding(query)
responses.nearest_neighbors(:embedding, embedding, distance: 'cosine').first(5)
end
def active_bot?
super || response_bot_enabled?
end
def response_bot_enabled?
account.feature_enabled?('response_bot') && response_sources.any?
end
private
def get_agent_ids_over_assignment_limit(limit)

View File

@@ -0,0 +1,36 @@
# == Schema Information
#
# Table name: responses
#
# id :bigint not null, primary key
# answer :text not null
# embedding :vector(1536)
# question :string not null
# created_at :datetime not null
# updated_at :datetime not null
# account_id :bigint not null
# response_document_id :bigint
#
# Indexes
#
# index_responses_on_embedding (embedding) USING ivfflat
# index_responses_on_response_document_id (response_document_id)
#
class Response < ApplicationRecord
belongs_to :response_document
belongs_to :account
has_neighbors :embedding, normalize: true
before_save :update_response_embedding
def self.search(query)
embedding = Openai::EmbeddingsService.new.get_embedding(query)
nearest_neighbors(:embedding, embedding, distance: 'cosine').first(5)
end
private
def update_response_embedding
self.embedding = Openai::EmbeddingsService.new.get_embedding("#{question}: #{answer}")
end
end

View File

@@ -0,0 +1,46 @@
# == Schema Information
#
# Table name: response_documents
#
# id :bigint not null, primary key
# content :text
# document_link :string
# document_type :string
# created_at :datetime not null
# updated_at :datetime not null
# account_id :bigint not null
# document_id :bigint
# response_source_id :bigint not null
#
# Indexes
#
# index_response_documents_on_document (document_type,document_id)
# index_response_documents_on_response_source_id (response_source_id)
#
class ResponseDocument < ApplicationRecord
has_many :responses, dependent: :destroy
belongs_to :account
belongs_to :response_source
before_validation :set_account
after_create :ensure_content
after_update :handle_content_change
private
def set_account
self.account = response_source.account
end
def ensure_content
return unless content.nil?
ResponseDocumentContentJob.perform_later(self)
end
def handle_content_change
return unless saved_change_to_content? && content.present?
ResponseBuilderJob.perform_later(self)
end
end

View File

@@ -0,0 +1,28 @@
# == Schema Information
#
# Table name: response_sources
#
# id :bigint not null, primary key
# name :string not null
# source_link :string
# source_model_type :string
# source_type :integer default("external"), not null
# created_at :datetime not null
# updated_at :datetime not null
# account_id :bigint not null
# inbox_id :bigint not null
# source_model_id :bigint
#
# Indexes
#
# index_response_sources_on_source_model (source_model_type,source_model_id)
#
class ResponseSource < ApplicationRecord
enum source_type: { external: 0, kbase: 1, inbox: 2 }
belongs_to :account
belongs_to :inbox
has_many :response_documents, dependent: :destroy
has_many :responses, through: :response_documents
accepts_nested_attributes_for :response_documents
end

View File

@@ -0,0 +1,10 @@
module Enterprise::MessageTemplates::HookExecutionService
def trigger_templates
super
ResponseBotJob.perform_later(conversation) if should_process_response_bot?
end
def should_process_response_bot?
conversation.pending? && message.incoming? && inbox.response_bot_enabled?
end
end

View File

@@ -0,0 +1,121 @@
class Enterprise::MessageTemplates::ResponseBotService
pattr_initialize [:conversation!]
def perform
ActiveRecord::Base.transaction do
response = get_response(conversation.messages.last.content)
process_response(conversation.messages.last, response)
end
rescue StandardError => e
ChatwootExceptionTracker.new(e, account: conversation.account).capture_exception
true
end
private
delegate :contact, :account, :inbox, to: :conversation
def get_response(content)
previous_messages = []
get_previous_messages(previous_messages)
ChatGpt.new(response_sections(content)).generate_response('', previous_messages)
end
def get_previous_messages(previous_messages)
conversation.messages.where(message_type: [:outgoing, :incoming]).where(private: false).find_each do |message|
next if message.content_type != 'text'
role = determine_role(message)
previous_messages << { content: message.content, role: role }
end
end
def determine_role(message)
message.message_type == 'incoming' ? 'user' : 'system'
end
def response_sections(content)
sections = ''
inbox.get_responses(content).each do |response|
sections += "{context_id: #{response.id}, context: #{response.question} ? #{response.answer}}"
end
sections
end
def process_response(message, response)
if response == 'conversation_handoff'
process_action(message, 'handoff')
else
create_messages(response, conversation)
end
end
def process_action(_message, action)
case action
when 'handoff'
conversation.messages.create!('message_type': :outgoing, 'account_id': conversation.account_id, 'inbox_id': conversation.inbox_id,
'content': 'passing to an agent')
conversation.update(status: :open)
end
end
def create_messages(response, conversation)
response, article_ids = process_response_content(response)
create_outgoing_message(response, conversation)
create_outgoing_message_with_cards(article_ids, conversation) if article_ids.present?
end
def process_response_content(response)
# Regular expression to match '{context_ids: [ids]}'
regex = /{context_ids: \[(\d+(?:, *\d+)*)\]}/
# Extract ids from string
id_string = response[regex, 1] # This will give you '42, 43'
article_ids = id_string.split(',').map(&:to_i) if id_string # This will give you [42, 43]
# Remove '{context_ids: [ids]}' from string
response = response.sub(regex, '')
[response, article_ids]
end
def create_outgoing_message(response, conversation)
conversation.messages.create!(
{
message_type: :outgoing,
account_id: conversation.account_id,
inbox_id: conversation.inbox_id,
content: response
}
)
end
def create_outgoing_message_with_cards(article_ids, conversation)
content_attributes = get_article_hash(article_ids.uniq)
return if content_attributes.blank?
conversation.messages.create!(
{
message_type: :outgoing,
account_id: conversation.account_id,
inbox_id: conversation.inbox_id,
content: 'suggested articles',
content_type: 'article',
content_attributes: content_attributes
}
)
end
def get_article_hash(article_ids)
items = []
article_ids.each do |article_id|
response = Response.find(article_id)
next if response.nil?
items << { title: response.question, description: response.answer[0, 120], link: response.response_document.document_link }
end
items.present? ? { items: items } : {}
end
end

View File

@@ -0,0 +1,83 @@
class Features::ResponseBotService
MIGRATION_VERSION = ActiveRecord::Migration[7.0]
def enable_in_installation
enable_vector_extension
create_tables
end
def enable_vector_extension
MIGRATION_VERSION.enable_extension 'vector'
rescue ActiveRecord::StatementInvalid
print 'Vector extension not available'
end
def disable_vector_extension
MIGRATION_VERSION.disable_extension 'vector'
end
def vector_extension_enabled?
ActiveRecord::Base.connection.extension_enabled?('vector')
end
def create_tables
return unless vector_extension_enabled?
%i[response_sources response_documents responses].each do |table|
send("create_#{table}_table")
end
end
def drop_tables
%i[responses response_documents response_sources].each do |table|
MIGRATION_VERSION.drop_table table if MIGRATION_VERSION.table_exists?(table)
end
end
private
def create_response_sources_table
return if MIGRATION_VERSION.table_exists?(:response_sources)
MIGRATION_VERSION.create_table :response_sources do |t|
t.integer :source_type, null: false, default: 0
t.string :name, null: false
t.string :source_link
t.references :source_model, polymorphic: true
t.bigint :account_id, null: false
t.bigint :inbox_id, null: false
t.timestamps
end
end
def create_response_documents_table
return if MIGRATION_VERSION.table_exists?(:response_documents)
MIGRATION_VERSION.create_table :response_documents do |t|
t.bigint :response_source_id, null: false
t.string :document_link
t.references :document, polymorphic: true
t.text :content
t.bigint :account_id, null: false
t.timestamps
end
MIGRATION_VERSION.add_index :response_documents, :response_source_id
end
def create_responses_table
return if MIGRATION_VERSION.table_exists?(:responses)
MIGRATION_VERSION.create_table :responses do |t|
t.bigint :response_document_id
t.string :question, null: false
t.text :answer, null: false
t.bigint :account_id, null: false
t.vector :embedding, limit: 1536
t.timestamps
end
MIGRATION_VERSION.add_index :responses, :response_document_id
MIGRATION_VERSION.add_index :responses, :embedding, using: :ivfflat, opclass: :vector_l2_ops
end
end

View File

@@ -0,0 +1,22 @@
class Openai::EmbeddingsService
def get_embedding(content)
fetch_embeddings(content)
end
private
def fetch_embeddings(input)
url = 'https://api.openai.com/v1/embeddings'
headers = {
'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY')}",
'Content-Type' => 'application/json'
}
data = {
input: input,
model: 'text-embedding-ada-002'
}
response = Net::HTTP.post(URI(url), data.to_json, headers)
JSON.parse(response.body)['data'].pick('embedding')
end
end

View File

@@ -0,0 +1,38 @@
class PageCrawlerService
attr_reader :external_link
def initialize(external_link)
@external_link = external_link
@doc = Nokogiri::HTML(HTTParty.get(external_link).body)
end
def page_links
sitemap? ? extract_links_from_sitemap : extract_links_from_html
end
def page_title
title_element = @doc.at_xpath('//title')
title_element&.text&.strip
end
def body_text_content
ReverseMarkdown.convert @doc.at_xpath('//body'), unknown_tags: :bypass, github_flavored: true
end
private
def sitemap?
@external_link.end_with?('.xml')
end
def extract_links_from_sitemap
@doc.xpath('//loc').to_set(&:text)
end
def extract_links_from_html
@doc.xpath('//a/@href').to_set do |link|
absolute_url = URI.join(@external_link, link.value).to_s
absolute_url
end
end
end