feat(ee): Add a service to fetch website content and prepare a persona of Captain Assistant (#12732)

This PR is the first of many to simplify the process of building an
assistant. The new flow will only require the user’s website. We’ll
automatically crawl it, identify the business name and what the business
does, and then generate a suggested assistant persona, complete with a
proposed name and description.

This service returns the following.
Example: tooljet.com
<img width="795" height="217" alt="Screenshot 2025-10-25 at 2 55 04 PM"
src="https://github.com/user-attachments/assets/9cb3594a-9c9c-4970-a0a1-4c9c8869c193"
/>

Example: replit.com
<img width="797" height="176" alt="Screenshot 2025-10-25 at 2 56 42 PM"
src="https://github.com/user-attachments/assets/6a1b4266-aab6-455f-a5e3-696d3a8243c9"
/>
This commit is contained in:
Pranav
2025-10-25 15:50:50 -07:00
committed by GitHub
parent b9864fe1f6
commit 5891fd6f49
5 changed files with 310 additions and 0 deletions

View File

@@ -0,0 +1,99 @@
require 'rails_helper'
RSpec.describe Captain::Onboarding::WebsiteAnalyzerService do
let(:website_url) { 'https://example.com' }
let(:service) { described_class.new(website_url) }
let(:mock_crawler) { instance_double(Captain::Tools::SimplePageCrawlService) }
let(:mock_client) { instance_double(OpenAI::Client) }
before do
create(:installation_config, name: 'CAPTAIN_OPEN_AI_API_KEY', value: 'test-key')
allow(Captain::Tools::SimplePageCrawlService).to receive(:new).and_return(mock_crawler)
allow(service).to receive(:client).and_return(mock_client)
allow(service).to receive(:model).and_return('gpt-3.5-turbo')
end
describe '#analyze' do
context 'when website content is available and OpenAI call is successful' do
let(:openai_response) do
{
'choices' => [{
'message' => {
'content' => {
'business_name' => 'Example Corp',
'suggested_assistant_name' => 'Alex from Example Corp',
'description' => 'You specialize in helping customers with business solutions and support'
}.to_json
}
}]
}
end
before do
allow(mock_crawler).to receive(:body_text_content).and_return('Welcome to Example Corp')
allow(mock_crawler).to receive(:page_title).and_return('Example Corp - Home')
allow(mock_crawler).to receive(:meta_description).and_return('Leading provider of business solutions')
allow(mock_crawler).to receive(:favicon_url).and_return('https://example.com/favicon.ico')
allow(mock_client).to receive(:chat).and_return(openai_response)
end
it 'returns success' do
result = service.analyze
expect(result[:success]).to be true
expect(result[:data]).to include(
business_name: 'Example Corp',
suggested_assistant_name: 'Alex from Example Corp',
description: 'You specialize in helping customers with business solutions and support',
website_url: website_url,
favicon_url: 'https://example.com/favicon.ico'
)
end
end
context 'when website content is errored' do
before do
allow(mock_crawler).to receive(:body_text_content).and_raise(StandardError, 'Network error')
end
it 'returns error' do
result = service.analyze
expect(result[:success]).to be false
expect(result[:error]).to eq('Failed to fetch website content')
end
end
context 'when website content is unavailable' do
before do
allow(mock_crawler).to receive(:body_text_content).and_return('')
allow(mock_crawler).to receive(:page_title).and_return('')
allow(mock_crawler).to receive(:meta_description).and_return('')
end
it 'returns error' do
result = service.analyze
expect(result[:success]).to be false
expect(result[:error]).to eq('Failed to fetch website content')
end
end
context 'when OpenAI error' do
before do
allow(mock_crawler).to receive(:body_text_content).and_return('Welcome to Example Corp')
allow(mock_crawler).to receive(:page_title).and_return('Example Corp - Home')
allow(mock_crawler).to receive(:meta_description).and_return('Leading provider of business solutions')
allow(mock_crawler).to receive(:favicon_url).and_return('https://example.com/favicon.ico')
allow(mock_client).to receive(:chat).and_raise(StandardError, 'API error')
end
it 'returns error' do
result = service.analyze
expect(result[:success]).to be false
expect(result[:error]).to eq('API error')
end
end
end
end

View File

@@ -125,4 +125,63 @@ RSpec.describe Captain::Tools::SimplePageCrawlService do
)
end
end
describe '#meta_description' do
context 'when meta description exists' do
before do
stub_request(:get, base_url)
.to_return(body: '<html><head><meta name="description" content="This is a test page description"></head></html>')
end
it 'returns the meta description content' do
expect(service.meta_description).to eq('This is a test page description')
end
end
context 'when meta description does not exist' do
before do
stub_request(:get, base_url)
.to_return(body: '<html><head><title>Test</title></head></html>')
end
it 'returns nil' do
expect(service.meta_description).to be_nil
end
end
end
describe '#favicon_url' do
context 'when favicon exists with relative URL' do
before do
stub_request(:get, base_url)
.to_return(body: '<html><head><link rel="icon" href="/favicon.ico"></head></html>')
end
it 'returns the resolved absolute favicon URL' do
expect(service.favicon_url).to eq('https://example.com/favicon.ico')
end
end
context 'when favicon exists with absolute URL' do
before do
stub_request(:get, base_url)
.to_return(body: '<html><head><link rel="icon" href="https://cdn.example.com/favicon.ico"></head></html>')
end
it 'returns the absolute favicon URL' do
expect(service.favicon_url).to eq('https://cdn.example.com/favicon.ico')
end
end
context 'when favicon does not exist' do
before do
stub_request(:get, base_url)
.to_return(body: '<html><head><title>Test</title></head></html>')
end
it 'returns nil' do
expect(service.favicon_url).to be_nil
end
end
end
end