feat(ee): Add a service to fetch website content and prepare a persona of Captain Assistant (#12732)
This PR is the first of many to simplify the process of building an assistant. The new flow will only require the user’s website. We’ll automatically crawl it, identify the business name and what the business does, and then generate a suggested assistant persona, complete with a proposed name and description. This service returns the following. Example: tooljet.com <img width="795" height="217" alt="Screenshot 2025-10-25 at 2 55 04 PM" src="https://github.com/user-attachments/assets/9cb3594a-9c9c-4970-a0a1-4c9c8869c193" /> Example: replit.com <img width="797" height="176" alt="Screenshot 2025-10-25 at 2 56 42 PM" src="https://github.com/user-attachments/assets/6a1b4266-aab6-455f-a5e3-696d3a8243c9" />
This commit is contained in:
@@ -0,0 +1,99 @@
|
||||
require 'rails_helper'
|
||||
|
||||
RSpec.describe Captain::Onboarding::WebsiteAnalyzerService do
|
||||
let(:website_url) { 'https://example.com' }
|
||||
let(:service) { described_class.new(website_url) }
|
||||
let(:mock_crawler) { instance_double(Captain::Tools::SimplePageCrawlService) }
|
||||
let(:mock_client) { instance_double(OpenAI::Client) }
|
||||
|
||||
before do
|
||||
create(:installation_config, name: 'CAPTAIN_OPEN_AI_API_KEY', value: 'test-key')
|
||||
allow(Captain::Tools::SimplePageCrawlService).to receive(:new).and_return(mock_crawler)
|
||||
allow(service).to receive(:client).and_return(mock_client)
|
||||
allow(service).to receive(:model).and_return('gpt-3.5-turbo')
|
||||
end
|
||||
|
||||
describe '#analyze' do
|
||||
context 'when website content is available and OpenAI call is successful' do
|
||||
let(:openai_response) do
|
||||
{
|
||||
'choices' => [{
|
||||
'message' => {
|
||||
'content' => {
|
||||
'business_name' => 'Example Corp',
|
||||
'suggested_assistant_name' => 'Alex from Example Corp',
|
||||
'description' => 'You specialize in helping customers with business solutions and support'
|
||||
}.to_json
|
||||
}
|
||||
}]
|
||||
}
|
||||
end
|
||||
|
||||
before do
|
||||
allow(mock_crawler).to receive(:body_text_content).and_return('Welcome to Example Corp')
|
||||
allow(mock_crawler).to receive(:page_title).and_return('Example Corp - Home')
|
||||
allow(mock_crawler).to receive(:meta_description).and_return('Leading provider of business solutions')
|
||||
allow(mock_crawler).to receive(:favicon_url).and_return('https://example.com/favicon.ico')
|
||||
allow(mock_client).to receive(:chat).and_return(openai_response)
|
||||
end
|
||||
|
||||
it 'returns success' do
|
||||
result = service.analyze
|
||||
|
||||
expect(result[:success]).to be true
|
||||
expect(result[:data]).to include(
|
||||
business_name: 'Example Corp',
|
||||
suggested_assistant_name: 'Alex from Example Corp',
|
||||
description: 'You specialize in helping customers with business solutions and support',
|
||||
website_url: website_url,
|
||||
favicon_url: 'https://example.com/favicon.ico'
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
context 'when website content is errored' do
|
||||
before do
|
||||
allow(mock_crawler).to receive(:body_text_content).and_raise(StandardError, 'Network error')
|
||||
end
|
||||
|
||||
it 'returns error' do
|
||||
result = service.analyze
|
||||
|
||||
expect(result[:success]).to be false
|
||||
expect(result[:error]).to eq('Failed to fetch website content')
|
||||
end
|
||||
end
|
||||
|
||||
context 'when website content is unavailable' do
|
||||
before do
|
||||
allow(mock_crawler).to receive(:body_text_content).and_return('')
|
||||
allow(mock_crawler).to receive(:page_title).and_return('')
|
||||
allow(mock_crawler).to receive(:meta_description).and_return('')
|
||||
end
|
||||
|
||||
it 'returns error' do
|
||||
result = service.analyze
|
||||
|
||||
expect(result[:success]).to be false
|
||||
expect(result[:error]).to eq('Failed to fetch website content')
|
||||
end
|
||||
end
|
||||
|
||||
context 'when OpenAI error' do
|
||||
before do
|
||||
allow(mock_crawler).to receive(:body_text_content).and_return('Welcome to Example Corp')
|
||||
allow(mock_crawler).to receive(:page_title).and_return('Example Corp - Home')
|
||||
allow(mock_crawler).to receive(:meta_description).and_return('Leading provider of business solutions')
|
||||
allow(mock_crawler).to receive(:favicon_url).and_return('https://example.com/favicon.ico')
|
||||
allow(mock_client).to receive(:chat).and_raise(StandardError, 'API error')
|
||||
end
|
||||
|
||||
it 'returns error' do
|
||||
result = service.analyze
|
||||
|
||||
expect(result[:success]).to be false
|
||||
expect(result[:error]).to eq('API error')
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -125,4 +125,63 @@ RSpec.describe Captain::Tools::SimplePageCrawlService do
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#meta_description' do
|
||||
context 'when meta description exists' do
|
||||
before do
|
||||
stub_request(:get, base_url)
|
||||
.to_return(body: '<html><head><meta name="description" content="This is a test page description"></head></html>')
|
||||
end
|
||||
|
||||
it 'returns the meta description content' do
|
||||
expect(service.meta_description).to eq('This is a test page description')
|
||||
end
|
||||
end
|
||||
|
||||
context 'when meta description does not exist' do
|
||||
before do
|
||||
stub_request(:get, base_url)
|
||||
.to_return(body: '<html><head><title>Test</title></head></html>')
|
||||
end
|
||||
|
||||
it 'returns nil' do
|
||||
expect(service.meta_description).to be_nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#favicon_url' do
|
||||
context 'when favicon exists with relative URL' do
|
||||
before do
|
||||
stub_request(:get, base_url)
|
||||
.to_return(body: '<html><head><link rel="icon" href="/favicon.ico"></head></html>')
|
||||
end
|
||||
|
||||
it 'returns the resolved absolute favicon URL' do
|
||||
expect(service.favicon_url).to eq('https://example.com/favicon.ico')
|
||||
end
|
||||
end
|
||||
|
||||
context 'when favicon exists with absolute URL' do
|
||||
before do
|
||||
stub_request(:get, base_url)
|
||||
.to_return(body: '<html><head><link rel="icon" href="https://cdn.example.com/favicon.ico"></head></html>')
|
||||
end
|
||||
|
||||
it 'returns the absolute favicon URL' do
|
||||
expect(service.favicon_url).to eq('https://cdn.example.com/favicon.ico')
|
||||
end
|
||||
end
|
||||
|
||||
context 'when favicon does not exist' do
|
||||
before do
|
||||
stub_request(:get, base_url)
|
||||
.to_return(body: '<html><head><title>Test</title></head></html>')
|
||||
end
|
||||
|
||||
it 'returns nil' do
|
||||
expect(service.favicon_url).to be_nil
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user