feat(ee): Add Captain features (#10665)

Migration Guide: https://chwt.app/v4/migration This PR imports all the work related to Captain into the EE codebase. Captain represents the AI-based features in Chatwoot and includes the following key components: - Assistant: An assistant has a persona, the product it would be trained on. At the moment, the data at which it is trained is from websites. Future integrations on Notion documents, PDF etc. This PR enables connecting an assistant to an inbox. The assistant would run the conversation every time before transferring it to an agent. - Copilot for Agents: When an agent is supporting a customer, we will be able to offer additional help to lookup some data or fetch information from integrations etc via copilot. - Conversation FAQ generator: When a conversation is resolved, the Captain integration would identify questions which were not in the knowledge base. - CRM memory: Learns from the conversations and identifies important information about the contact. --------- Co-authored-by: Vishnu Narayanan <vishnu@chatwoot.com> Co-authored-by: Sojan <sojan@pepalo.com> Co-authored-by: iamsivin <iamsivin@gmail.com> Co-authored-by: Sivin Varghese <64252451+iamsivin@users.noreply.github.com>
2025-01-14 16:15:47 -08:00
parent 7b31b5ad6e
commit d070743383
184 changed files with 6666 additions and 2242 deletions
--- a/spec/enterprise/services/captain/tools/simple_page_crawl_service_spec.rb
+++ b/spec/enterprise/services/captain/tools/simple_page_crawl_service_spec.rb
@@ -0,0 +1,128 @@
+require 'rails_helper'
+
+RSpec.describe Captain::Tools::SimplePageCrawlService do
+  let(:base_url) { 'https://example.com' }
+  let(:service) { described_class.new(base_url) }
+
+  before do
+    WebMock.disable_net_connect!
+  end
+
+  after do
+    WebMock.allow_net_connect!
+  end
+
+  describe '#page_title' do
+    context 'when title exists' do
+      before do
+        stub_request(:get, base_url)
+          .to_return(body: '<html><head><title>Example Page</title></head></html>')
+      end
+
+      it 'returns the page title' do
+        expect(service.page_title).to eq('Example Page')
+      end
+    end
+
+    context 'when title does not exist' do
+      before do
+        stub_request(:get, base_url)
+          .to_return(body: '<html><head></head></html>')
+      end
+
+      it 'returns nil' do
+        expect(service.page_title).to be_nil
+      end
+    end
+  end
+
+  describe '#page_links' do
+    context 'with HTML page' do
+      let(:html_content) do
+        <<~HTML
+          <html>
+            <body>
+              <a href="/relative">Relative Link</a>
+              <a href="https://external.com">External Link</a>
+              <a href="#anchor">Anchor Link</a>
+            </body>
+          </html>
+        HTML
+      end
+
+      before do
+        stub_request(:get, base_url).to_return(body: html_content)
+      end
+
+      it 'extracts and absolutizes all links' do
+        links = service.page_links
+        expect(links).to include(
+          'https://example.com/relative',
+          'https://external.com',
+          'https://example.com#anchor'
+        )
+      end
+    end
+
+    context 'with sitemap XML' do
+      let(:sitemap_url) { 'https://example.com/sitemap.xml' }
+      let(:sitemap_service) { described_class.new(sitemap_url) }
+      let(:sitemap_content) do
+        <<~XML
+          <?xml version="1.0" encoding="UTF-8"?>
+          <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+            <url>
+              <loc>https://example.com/page1</loc>
+            </url>
+            <url>
+              <loc>https://example.com/page2</loc>
+            </url>
+          </urlset>
+        XML
+      end
+
+      before do
+        stub_request(:get, sitemap_url).to_return(body: sitemap_content)
+      end
+
+      it 'extracts links from sitemap' do
+        links = sitemap_service.page_links
+        expect(links).to contain_exactly(
+          'https://example.com/page1',
+          'https://example.com/page2'
+        )
+      end
+    end
+  end
+
+  describe '#body_text_content' do
+    let(:html_content) do
+      <<~HTML
+        <html>
+          <body>
+            <h1>Main Title</h1>
+            <p>Some <strong>formatted</strong> content.</p>
+            <ul>
+              <li>List item 1</li>
+              <li>List item 2</li>
+            </ul>
+          </body>
+        </html>
+      HTML
+    end
+
+    before do
+      stub_request(:get, base_url).to_return(body: html_content)
+      allow(ReverseMarkdown).to receive(:convert).and_return("# Main Title\n\nConverted markdown")
+    end
+
+    it 'converts body content to markdown' do
+      expect(service.body_text_content).to eq("# Main Title\n\nConverted markdown")
+      expect(ReverseMarkdown).to have_received(:convert).with(
+        kind_of(Nokogiri::XML::Element),
+        unknown_tags: :bypass,
+        github_flavored: true
+      )
+    end
+  end
+end