feat: Response Bot using GPT and Webpage Sources (#7518)

This commit introduces the ability to associate response sources to an inbox, allowing external webpages to be parsed by Chatwoot. The parsed data is converted into embeddings for use with GPT models when managing customer queries. The implementation relies on the `pgvector` extension for PostgreSQL. Database migrations related to this feature are handled separately by `Features::ResponseBotService`. A future update will integrate these migrations into the default rails migrations, once compatibility with Postgres extensions across all self-hosted installation options is confirmed. Additionally, a new GitHub action has been added to the CI pipeline to ensure the execution of specs related to this feature.
2023-07-21 18:11:51 +03:00
parent 30f3928904
commit 480f34803b
41 changed files with 976 additions and 10 deletions
--- a/enterprise/app/services/enterprise/message_templates/hook_execution_service.rb
+++ b/enterprise/app/services/enterprise/message_templates/hook_execution_service.rb
@@ -0,0 +1,10 @@
+module Enterprise::MessageTemplates::HookExecutionService
+  def trigger_templates
+    super
+    ResponseBotJob.perform_later(conversation) if should_process_response_bot?
+  end
+
+  def should_process_response_bot?
+    conversation.pending? && message.incoming? && inbox.response_bot_enabled?
+  end
+end
--- a/enterprise/app/services/enterprise/message_templates/response_bot_service.rb
+++ b/enterprise/app/services/enterprise/message_templates/response_bot_service.rb
@@ -0,0 +1,121 @@
+class Enterprise::MessageTemplates::ResponseBotService
+  pattr_initialize [:conversation!]
+
+  def perform
+    ActiveRecord::Base.transaction do
+      response = get_response(conversation.messages.last.content)
+      process_response(conversation.messages.last, response)
+    end
+  rescue StandardError => e
+    ChatwootExceptionTracker.new(e, account: conversation.account).capture_exception
+    true
+  end
+
+  private
+
+  delegate :contact, :account, :inbox, to: :conversation
+
+  def get_response(content)
+    previous_messages = []
+    get_previous_messages(previous_messages)
+    ChatGpt.new(response_sections(content)).generate_response('', previous_messages)
+  end
+
+  def get_previous_messages(previous_messages)
+    conversation.messages.where(message_type: [:outgoing, :incoming]).where(private: false).find_each do |message|
+      next if message.content_type != 'text'
+
+      role = determine_role(message)
+      previous_messages << { content: message.content, role: role }
+    end
+  end
+
+  def determine_role(message)
+    message.message_type == 'incoming' ? 'user' : 'system'
+  end
+
+  def response_sections(content)
+    sections = ''
+
+    inbox.get_responses(content).each do |response|
+      sections += "{context_id: #{response.id}, context: #{response.question} ? #{response.answer}}"
+    end
+    sections
+  end
+
+  def process_response(message, response)
+    if response == 'conversation_handoff'
+      process_action(message, 'handoff')
+    else
+      create_messages(response, conversation)
+    end
+  end
+
+  def process_action(_message, action)
+    case action
+    when 'handoff'
+      conversation.messages.create!('message_type': :outgoing, 'account_id': conversation.account_id, 'inbox_id': conversation.inbox_id,
+                                    'content': 'passing to an agent')
+      conversation.update(status: :open)
+    end
+  end
+
+  def create_messages(response, conversation)
+    response, article_ids = process_response_content(response)
+    create_outgoing_message(response, conversation)
+    create_outgoing_message_with_cards(article_ids, conversation) if article_ids.present?
+  end
+
+  def process_response_content(response)
+    # Regular expression to match '{context_ids: [ids]}'
+    regex = /{context_ids: \[(\d+(?:, *\d+)*)\]}/
+
+    # Extract ids from string
+    id_string = response[regex, 1] # This will give you '42, 43'
+    article_ids = id_string.split(',').map(&:to_i) if id_string # This will give you [42, 43]
+
+    # Remove '{context_ids: [ids]}' from string
+    response = response.sub(regex, '')
+
+    [response, article_ids]
+  end
+
+  def create_outgoing_message(response, conversation)
+    conversation.messages.create!(
+      {
+        message_type: :outgoing,
+        account_id: conversation.account_id,
+        inbox_id: conversation.inbox_id,
+        content: response
+      }
+    )
+  end
+
+  def create_outgoing_message_with_cards(article_ids, conversation)
+    content_attributes = get_article_hash(article_ids.uniq)
+    return if content_attributes.blank?
+
+    conversation.messages.create!(
+      {
+        message_type: :outgoing,
+        account_id: conversation.account_id,
+        inbox_id: conversation.inbox_id,
+        content: 'suggested articles',
+        content_type: 'article',
+        content_attributes: content_attributes
+      }
+    )
+  end
+
+  def get_article_hash(article_ids)
+    items = []
+    article_ids.each do |article_id|
+      response = Response.find(article_id)
+      next if response.nil?
+
+      items << { title: response.question, description: response.answer[0, 120], link: response.response_document.document_link }
+    end
+
+    items.present? ? { items: items } : {}
+  end
+end
--- a/enterprise/app/services/features/response_bot_service.rb
+++ b/enterprise/app/services/features/response_bot_service.rb
@@ -0,0 +1,83 @@
+class Features::ResponseBotService
+  MIGRATION_VERSION = ActiveRecord::Migration[7.0]
+
+  def enable_in_installation
+    enable_vector_extension
+    create_tables
+  end
+
+  def enable_vector_extension
+    MIGRATION_VERSION.enable_extension 'vector'
+  rescue ActiveRecord::StatementInvalid
+    print 'Vector extension not available'
+  end
+
+  def disable_vector_extension
+    MIGRATION_VERSION.disable_extension 'vector'
+  end
+
+  def vector_extension_enabled?
+    ActiveRecord::Base.connection.extension_enabled?('vector')
+  end
+
+  def create_tables
+    return unless vector_extension_enabled?
+
+    %i[response_sources response_documents responses].each do |table|
+      send("create_#{table}_table")
+    end
+  end
+
+  def drop_tables
+    %i[responses response_documents response_sources].each do |table|
+      MIGRATION_VERSION.drop_table table if MIGRATION_VERSION.table_exists?(table)
+    end
+  end
+
+  private
+
+  def create_response_sources_table
+    return if MIGRATION_VERSION.table_exists?(:response_sources)
+
+    MIGRATION_VERSION.create_table :response_sources do |t|
+      t.integer :source_type, null: false, default: 0
+      t.string :name, null: false
+      t.string :source_link
+      t.references :source_model, polymorphic: true
+      t.bigint :account_id, null: false
+      t.bigint :inbox_id, null: false
+      t.timestamps
+    end
+  end
+
+  def create_response_documents_table
+    return if MIGRATION_VERSION.table_exists?(:response_documents)
+
+    MIGRATION_VERSION.create_table :response_documents do |t|
+      t.bigint :response_source_id, null: false
+      t.string :document_link
+      t.references :document, polymorphic: true
+      t.text :content
+      t.bigint :account_id, null: false
+      t.timestamps
+    end
+
+    MIGRATION_VERSION.add_index :response_documents, :response_source_id
+  end
+
+  def create_responses_table
+    return if MIGRATION_VERSION.table_exists?(:responses)
+
+    MIGRATION_VERSION.create_table :responses do |t|
+      t.bigint :response_document_id
+      t.string :question, null: false
+      t.text :answer, null: false
+      t.bigint :account_id, null: false
+      t.vector :embedding, limit: 1536
+      t.timestamps
+    end
+
+    MIGRATION_VERSION.add_index :responses, :response_document_id
+    MIGRATION_VERSION.add_index :responses, :embedding, using: :ivfflat, opclass: :vector_l2_ops
+  end
+end
--- a/enterprise/app/services/openai/embeddings_service.rb
+++ b/enterprise/app/services/openai/embeddings_service.rb
@@ -0,0 +1,22 @@
+class Openai::EmbeddingsService
+  def get_embedding(content)
+    fetch_embeddings(content)
+  end
+
+  private
+
+  def fetch_embeddings(input)
+    url = 'https://api.openai.com/v1/embeddings'
+    headers = {
+      'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY')}",
+      'Content-Type' => 'application/json'
+    }
+    data = {
+      input: input,
+      model: 'text-embedding-ada-002'
+    }
+
+    response = Net::HTTP.post(URI(url), data.to_json, headers)
+    JSON.parse(response.body)['data'].pick('embedding')
+  end
+end
--- a/enterprise/app/services/page_crawler_service.rb
+++ b/enterprise/app/services/page_crawler_service.rb
@@ -0,0 +1,38 @@
+class PageCrawlerService
+  attr_reader :external_link
+
+  def initialize(external_link)
+    @external_link = external_link
+    @doc = Nokogiri::HTML(HTTParty.get(external_link).body)
+  end
+
+  def page_links
+    sitemap? ? extract_links_from_sitemap : extract_links_from_html
+  end
+
+  def page_title
+    title_element = @doc.at_xpath('//title')
+    title_element&.text&.strip
+  end
+
+  def body_text_content
+    ReverseMarkdown.convert @doc.at_xpath('//body'), unknown_tags: :bypass, github_flavored: true
+  end
+
+  private
+
+  def sitemap?
+    @external_link.end_with?('.xml')
+  end
+
+  def extract_links_from_sitemap
+    @doc.xpath('//loc').to_set(&:text)
+  end
+
+  def extract_links_from_html
+    @doc.xpath('//a/@href').to_set do |link|
+      absolute_url = URI.join(@external_link, link.value).to_s
+      absolute_url
+    end
+  end
+end