feat: Response Bot using GPT and Webpage Sources (#7518)

This commit introduces the ability to associate response sources to an inbox, allowing external webpages to be parsed by Chatwoot. The parsed data is converted into embeddings for use with GPT models when managing customer queries. The implementation relies on the `pgvector` extension for PostgreSQL. Database migrations related to this feature are handled separately by `Features::ResponseBotService`. A future update will integrate these migrations into the default rails migrations, once compatibility with Postgres extensions across all self-hosted installation options is confirmed. Additionally, a new GitHub action has been added to the CI pipeline to ensure the execution of specs related to this feature.
2023-07-21 18:11:51 +03:00
parent 30f3928904
commit 480f34803b
41 changed files with 976 additions and 10 deletions
--- a/enterprise/app/controllers/api/v1/accounts/response_sources_controller.rb
+++ b/enterprise/app/controllers/api/v1/accounts/response_sources_controller.rb
@@ -0,0 +1,34 @@
+class Api::V1::Accounts::ResponseSourcesController < Api::V1::Accounts::BaseController
+  before_action :current_account
+  before_action :check_authorization
+  before_action :find_response_source, only: [:add_document, :remove_document]
+
+  def parse
+    links = PageCrawlerService.new(params[:link]).page_links
+    render json: { links: links }
+  end
+
+  def create
+    @response_source = Current.account.response_sources.new(response_source_params)
+    @response_source.save!
+  end
+
+  def add_document
+    @response_source.response_documents.create!(document_link: params[:document_link])
+  end
+
+  def remove_document
+    @response_source.response_documents.find(params[:document_id]).destroy!
+  end
+
+  private
+
+  def find_response_source
+    @response_source = Current.account.response_sources.find(params[:id])
+  end
+
+  def response_source_params
+    params.require(:response_source).permit(:name, :source_link, :inbox_id,
+                                            response_documents_attributes: [:document_link])
+  end
+end
--- a/enterprise/app/controllers/enterprise/api/v1/accounts/inboxes_controller.rb
+++ b/enterprise/app/controllers/enterprise/api/v1/accounts/inboxes_controller.rb
@@ -1,4 +1,8 @@
 module Enterprise::Api::V1::Accounts::InboxesController
+  def response_sources
+    @response_sources = @inbox.response_sources
+  end
+
  def inbox_attributes
    super + ee_inbox_attributes
  end
--- a/enterprise/app/jobs/response_bot_job.rb
+++ b/enterprise/app/jobs/response_bot_job.rb
@@ -0,0 +1,7 @@
+class ResponseBotJob < ApplicationJob
+  queue_as :medium
+
+  def perform(conversation)
+    ::Enterprise::MessageTemplates::ResponseBotService.new(conversation: conversation).perform
+  end
+end
--- a/enterprise/app/jobs/response_builder_job.rb
+++ b/enterprise/app/jobs/response_builder_job.rb
@@ -0,0 +1,76 @@
+class ResponseBuilderJob < ApplicationJob
+  queue_as :default
+
+  def perform(response_document)
+    reset_previous_responses(response_document)
+    data = prepare_data(response_document)
+    response = post_request(data)
+    create_responses(response, response_document)
+  end
+
+  private
+
+  def reset_previous_responses(response_document)
+    response_document.responses.destroy_all
+  end
+
+  def prepare_data(response_document)
+    {
+      model: 'gpt-3.5-turbo',
+      messages: [
+        {
+          role: 'system',
+          content: system_message_content
+        },
+        {
+          role: 'user',
+          content: response_document.content
+        }
+      ]
+    }
+  end
+
+  def system_message_content
+    <<~SYSTEM_MESSAGE_CONTENT
+       You are a content writer looking to convert user content into short FAQs which can be added to your website's helper centre.
+       Format the webpage content provided in the message to FAQ format like the following example.#{' '}
+       Ensure that you only generate faqs from the information provider in the message.#{' '}
+       Ensure that output is always valid json.#{'  '}
+       If no match is available, return an empty JSON.
+       ```
+      [ { "question": "What is the pricing?",
+         "answer" : " There are different pricing tiers available."
+       }]
+       ```
+    SYSTEM_MESSAGE_CONTENT
+  end
+
+  def post_request(data)
+    headers = prepare_headers
+    HTTParty.post(
+      'https://api.openai.com/v1/chat/completions',
+      headers: headers,
+      body: data.to_json
+    )
+  end
+
+  def prepare_headers
+    {
+      'Content-Type' => 'application/json',
+      'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY')}"
+    }
+  end
+
+  def create_responses(response, response_document)
+    response_body = JSON.parse(response.body)
+    faqs = JSON.parse(response_body['choices'][0]['message']['content'].strip)
+
+    faqs.each do |faq|
+      response_document.responses.create!(
+        question: faq['question'],
+        answer: faq['answer'],
+        account_id: response_document.account_id
+      )
+    end
+  end
+end
--- a/enterprise/app/jobs/response_document_content_job.rb
+++ b/enterprise/app/jobs/response_document_content_job.rb
@@ -0,0 +1,10 @@
+# app/jobs/response_document_content_job.rb
+class ResponseDocumentContentJob < ApplicationJob
+  queue_as :default
+
+  def perform(response_document)
+    # Replace the selector with the actual one you need.
+    content = PageCrawlerService.new(response_document.document_link).body_text_content
+    response_document.update!(content: content[0..15_000])
+  end
+end
--- a/enterprise/app/models/enterprise/concerns/account.rb
+++ b/enterprise/app/models/enterprise/concerns/account.rb
@@ -0,0 +1,15 @@
+module Enterprise::Concerns::Account
+  extend ActiveSupport::Concern
+
+  included do
+    has_many :sla_policies, dependent: :destroy_async
+
+    def self.add_response_related_associations
+      has_many :response_sources, dependent: :destroy_async
+      has_many :response_documents, dependent: :destroy_async
+      has_many :responses, dependent: :destroy_async
+    end
+
+    add_response_related_associations if Features::ResponseBotService.new.vector_extension_enabled?
+  end
+end
--- a/enterprise/app/models/enterprise/concerns/inbox.rb
+++ b/enterprise/app/models/enterprise/concerns/inbox.rb
@@ -0,0 +1,13 @@
+module Enterprise::Concerns::Inbox
+  extend ActiveSupport::Concern
+
+  included do
+    def self.add_response_related_associations
+      has_many :response_sources, dependent: :destroy_async
+      has_many :response_documents, dependent: :destroy_async
+      has_many :responses, dependent: :destroy_async
+    end
+
+    add_response_related_associations if Features::ResponseBotService.new.vector_extension_enabled?
+  end
+end
--- a/enterprise/app/models/enterprise/enterprise_account_concern.rb
+++ b/enterprise/app/models/enterprise/enterprise_account_concern.rb
@@ -1,7 +0,0 @@
-module Enterprise::EnterpriseAccountConcern
-  extend ActiveSupport::Concern
-
-  included do
-    has_many :sla_policies, dependent: :destroy_async
-  end
-end
--- a/enterprise/app/models/enterprise/inbox.rb
+++ b/enterprise/app/models/enterprise/inbox.rb
@@ -5,6 +5,19 @@ module Enterprise::Inbox
    super - overloaded_agent_ids
  end

+  def get_responses(query)
+    embedding = Openai::EmbeddingsService.new.get_embedding(query)
+    responses.nearest_neighbors(:embedding, embedding, distance: 'cosine').first(5)
+  end
+
+  def active_bot?
+    super || response_bot_enabled?
+  end
+
+  def response_bot_enabled?
+    account.feature_enabled?('response_bot') && response_sources.any?
+  end
+
  private

  def get_agent_ids_over_assignment_limit(limit)
--- a/enterprise/app/models/response.rb
+++ b/enterprise/app/models/response.rb
@@ -0,0 +1,36 @@
+# == Schema Information
+#
+# Table name: responses
+#
+#  id                   :bigint           not null, primary key
+#  answer               :text             not null
+#  embedding            :vector(1536)
+#  question             :string           not null
+#  created_at           :datetime         not null
+#  updated_at           :datetime         not null
+#  account_id           :bigint           not null
+#  response_document_id :bigint
+#
+# Indexes
+#
+#  index_responses_on_embedding             (embedding) USING ivfflat
+#  index_responses_on_response_document_id  (response_document_id)
+#
+class Response < ApplicationRecord
+  belongs_to :response_document
+  belongs_to :account
+  has_neighbors :embedding, normalize: true
+
+  before_save :update_response_embedding
+
+  def self.search(query)
+    embedding = Openai::EmbeddingsService.new.get_embedding(query)
+    nearest_neighbors(:embedding, embedding, distance: 'cosine').first(5)
+  end
+
+  private
+
+  def update_response_embedding
+    self.embedding = Openai::EmbeddingsService.new.get_embedding("#{question}: #{answer}")
+  end
+end
--- a/enterprise/app/models/response_document.rb
+++ b/enterprise/app/models/response_document.rb
@@ -0,0 +1,46 @@
+# == Schema Information
+#
+# Table name: response_documents
+#
+#  id                 :bigint           not null, primary key
+#  content            :text
+#  document_link      :string
+#  document_type      :string
+#  created_at         :datetime         not null
+#  updated_at         :datetime         not null
+#  account_id         :bigint           not null
+#  document_id        :bigint
+#  response_source_id :bigint           not null
+#
+# Indexes
+#
+#  index_response_documents_on_document            (document_type,document_id)
+#  index_response_documents_on_response_source_id  (response_source_id)
+#
+class ResponseDocument < ApplicationRecord
+  has_many :responses, dependent: :destroy
+  belongs_to :account
+  belongs_to :response_source
+
+  before_validation :set_account
+  after_create :ensure_content
+  after_update :handle_content_change
+
+  private
+
+  def set_account
+    self.account = response_source.account
+  end
+
+  def ensure_content
+    return unless content.nil?
+
+    ResponseDocumentContentJob.perform_later(self)
+  end
+
+  def handle_content_change
+    return unless saved_change_to_content? && content.present?
+
+    ResponseBuilderJob.perform_later(self)
+  end
+end
--- a/enterprise/app/models/response_source.rb
+++ b/enterprise/app/models/response_source.rb
@@ -0,0 +1,28 @@
+# == Schema Information
+#
+# Table name: response_sources
+#
+#  id                :bigint           not null, primary key
+#  name              :string           not null
+#  source_link       :string
+#  source_model_type :string
+#  source_type       :integer          default("external"), not null
+#  created_at        :datetime         not null
+#  updated_at        :datetime         not null
+#  account_id        :bigint           not null
+#  inbox_id          :bigint           not null
+#  source_model_id   :bigint
+#
+# Indexes
+#
+#  index_response_sources_on_source_model  (source_model_type,source_model_id)
+#
+class ResponseSource < ApplicationRecord
+  enum source_type: { external: 0, kbase: 1, inbox: 2 }
+  belongs_to :account
+  belongs_to :inbox
+  has_many :response_documents, dependent: :destroy
+  has_many :responses, through: :response_documents
+
+  accepts_nested_attributes_for :response_documents
+end
--- a/enterprise/app/services/enterprise/message_templates/hook_execution_service.rb
+++ b/enterprise/app/services/enterprise/message_templates/hook_execution_service.rb
@@ -0,0 +1,10 @@
+module Enterprise::MessageTemplates::HookExecutionService
+  def trigger_templates
+    super
+    ResponseBotJob.perform_later(conversation) if should_process_response_bot?
+  end
+
+  def should_process_response_bot?
+    conversation.pending? && message.incoming? && inbox.response_bot_enabled?
+  end
+end
--- a/enterprise/app/services/enterprise/message_templates/response_bot_service.rb
+++ b/enterprise/app/services/enterprise/message_templates/response_bot_service.rb
@@ -0,0 +1,121 @@
+class Enterprise::MessageTemplates::ResponseBotService
+  pattr_initialize [:conversation!]
+
+  def perform
+    ActiveRecord::Base.transaction do
+      response = get_response(conversation.messages.last.content)
+      process_response(conversation.messages.last, response)
+    end
+  rescue StandardError => e
+    ChatwootExceptionTracker.new(e, account: conversation.account).capture_exception
+    true
+  end
+
+  private
+
+  delegate :contact, :account, :inbox, to: :conversation
+
+  def get_response(content)
+    previous_messages = []
+    get_previous_messages(previous_messages)
+    ChatGpt.new(response_sections(content)).generate_response('', previous_messages)
+  end
+
+  def get_previous_messages(previous_messages)
+    conversation.messages.where(message_type: [:outgoing, :incoming]).where(private: false).find_each do |message|
+      next if message.content_type != 'text'
+
+      role = determine_role(message)
+      previous_messages << { content: message.content, role: role }
+    end
+  end
+
+  def determine_role(message)
+    message.message_type == 'incoming' ? 'user' : 'system'
+  end
+
+  def response_sections(content)
+    sections = ''
+
+    inbox.get_responses(content).each do |response|
+      sections += "{context_id: #{response.id}, context: #{response.question} ? #{response.answer}}"
+    end
+    sections
+  end
+
+  def process_response(message, response)
+    if response == 'conversation_handoff'
+      process_action(message, 'handoff')
+    else
+      create_messages(response, conversation)
+    end
+  end
+
+  def process_action(_message, action)
+    case action
+    when 'handoff'
+      conversation.messages.create!('message_type': :outgoing, 'account_id': conversation.account_id, 'inbox_id': conversation.inbox_id,
+                                    'content': 'passing to an agent')
+      conversation.update(status: :open)
+    end
+  end
+
+  def create_messages(response, conversation)
+    response, article_ids = process_response_content(response)
+    create_outgoing_message(response, conversation)
+    create_outgoing_message_with_cards(article_ids, conversation) if article_ids.present?
+  end
+
+  def process_response_content(response)
+    # Regular expression to match '{context_ids: [ids]}'
+    regex = /{context_ids: \[(\d+(?:, *\d+)*)\]}/
+
+    # Extract ids from string
+    id_string = response[regex, 1] # This will give you '42, 43'
+    article_ids = id_string.split(',').map(&:to_i) if id_string # This will give you [42, 43]
+
+    # Remove '{context_ids: [ids]}' from string
+    response = response.sub(regex, '')
+
+    [response, article_ids]
+  end
+
+  def create_outgoing_message(response, conversation)
+    conversation.messages.create!(
+      {
+        message_type: :outgoing,
+        account_id: conversation.account_id,
+        inbox_id: conversation.inbox_id,
+        content: response
+      }
+    )
+  end
+
+  def create_outgoing_message_with_cards(article_ids, conversation)
+    content_attributes = get_article_hash(article_ids.uniq)
+    return if content_attributes.blank?
+
+    conversation.messages.create!(
+      {
+        message_type: :outgoing,
+        account_id: conversation.account_id,
+        inbox_id: conversation.inbox_id,
+        content: 'suggested articles',
+        content_type: 'article',
+        content_attributes: content_attributes
+      }
+    )
+  end
+
+  def get_article_hash(article_ids)
+    items = []
+    article_ids.each do |article_id|
+      response = Response.find(article_id)
+      next if response.nil?
+
+      items << { title: response.question, description: response.answer[0, 120], link: response.response_document.document_link }
+    end
+
+    items.present? ? { items: items } : {}
+  end
+end
--- a/enterprise/app/services/features/response_bot_service.rb
+++ b/enterprise/app/services/features/response_bot_service.rb
@@ -0,0 +1,83 @@
+class Features::ResponseBotService
+  MIGRATION_VERSION = ActiveRecord::Migration[7.0]
+
+  def enable_in_installation
+    enable_vector_extension
+    create_tables
+  end
+
+  def enable_vector_extension
+    MIGRATION_VERSION.enable_extension 'vector'
+  rescue ActiveRecord::StatementInvalid
+    print 'Vector extension not available'
+  end
+
+  def disable_vector_extension
+    MIGRATION_VERSION.disable_extension 'vector'
+  end
+
+  def vector_extension_enabled?
+    ActiveRecord::Base.connection.extension_enabled?('vector')
+  end
+
+  def create_tables
+    return unless vector_extension_enabled?
+
+    %i[response_sources response_documents responses].each do |table|
+      send("create_#{table}_table")
+    end
+  end
+
+  def drop_tables
+    %i[responses response_documents response_sources].each do |table|
+      MIGRATION_VERSION.drop_table table if MIGRATION_VERSION.table_exists?(table)
+    end
+  end
+
+  private
+
+  def create_response_sources_table
+    return if MIGRATION_VERSION.table_exists?(:response_sources)
+
+    MIGRATION_VERSION.create_table :response_sources do |t|
+      t.integer :source_type, null: false, default: 0
+      t.string :name, null: false
+      t.string :source_link
+      t.references :source_model, polymorphic: true
+      t.bigint :account_id, null: false
+      t.bigint :inbox_id, null: false
+      t.timestamps
+    end
+  end
+
+  def create_response_documents_table
+    return if MIGRATION_VERSION.table_exists?(:response_documents)
+
+    MIGRATION_VERSION.create_table :response_documents do |t|
+      t.bigint :response_source_id, null: false
+      t.string :document_link
+      t.references :document, polymorphic: true
+      t.text :content
+      t.bigint :account_id, null: false
+      t.timestamps
+    end
+
+    MIGRATION_VERSION.add_index :response_documents, :response_source_id
+  end
+
+  def create_responses_table
+    return if MIGRATION_VERSION.table_exists?(:responses)
+
+    MIGRATION_VERSION.create_table :responses do |t|
+      t.bigint :response_document_id
+      t.string :question, null: false
+      t.text :answer, null: false
+      t.bigint :account_id, null: false
+      t.vector :embedding, limit: 1536
+      t.timestamps
+    end
+
+    MIGRATION_VERSION.add_index :responses, :response_document_id
+    MIGRATION_VERSION.add_index :responses, :embedding, using: :ivfflat, opclass: :vector_l2_ops
+  end
+end
--- a/enterprise/app/services/openai/embeddings_service.rb
+++ b/enterprise/app/services/openai/embeddings_service.rb
@@ -0,0 +1,22 @@
+class Openai::EmbeddingsService
+  def get_embedding(content)
+    fetch_embeddings(content)
+  end
+
+  private
+
+  def fetch_embeddings(input)
+    url = 'https://api.openai.com/v1/embeddings'
+    headers = {
+      'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY')}",
+      'Content-Type' => 'application/json'
+    }
+    data = {
+      input: input,
+      model: 'text-embedding-ada-002'
+    }
+
+    response = Net::HTTP.post(URI(url), data.to_json, headers)
+    JSON.parse(response.body)['data'].pick('embedding')
+  end
+end
--- a/enterprise/app/services/page_crawler_service.rb
+++ b/enterprise/app/services/page_crawler_service.rb
@@ -0,0 +1,38 @@
+class PageCrawlerService
+  attr_reader :external_link
+
+  def initialize(external_link)
+    @external_link = external_link
+    @doc = Nokogiri::HTML(HTTParty.get(external_link).body)
+  end
+
+  def page_links
+    sitemap? ? extract_links_from_sitemap : extract_links_from_html
+  end
+
+  def page_title
+    title_element = @doc.at_xpath('//title')
+    title_element&.text&.strip
+  end
+
+  def body_text_content
+    ReverseMarkdown.convert @doc.at_xpath('//body'), unknown_tags: :bypass, github_flavored: true
+  end
+
+  private
+
+  def sitemap?
+    @external_link.end_with?('.xml')
+  end
+
+  def extract_links_from_sitemap
+    @doc.xpath('//loc').to_set(&:text)
+  end
+
+  def extract_links_from_html
+    @doc.xpath('//a/@href').to_set do |link|
+      absolute_url = URI.join(@external_link, link.value).to_s
+      absolute_url
+    end
+  end
+end