feat(ee): Add a service to fetch website content and prepare a persona of Captain Assistant (#12732)

This PR is the first of many to simplify the process of building an
assistant. The new flow will only require the user’s website. We’ll
automatically crawl it, identify the business name and what the business
does, and then generate a suggested assistant persona, complete with a
proposed name and description.

This service returns the following.
Example: tooljet.com
<img width="795" height="217" alt="Screenshot 2025-10-25 at 2 55 04 PM"
src="https://github.com/user-attachments/assets/9cb3594a-9c9c-4970-a0a1-4c9c8869c193"
/>

Example: replit.com
<img width="797" height="176" alt="Screenshot 2025-10-25 at 2 56 42 PM"
src="https://github.com/user-attachments/assets/6a1b4266-aab6-455f-a5e3-696d3a8243c9"
/>
This commit is contained in:
Pranav
2025-10-25 15:50:50 -07:00
committed by GitHub
parent b9864fe1f6
commit 5891fd6f49
5 changed files with 310 additions and 0 deletions

View File

@@ -125,4 +125,63 @@ RSpec.describe Captain::Tools::SimplePageCrawlService do
)
end
end
describe '#meta_description' do
context 'when meta description exists' do
before do
stub_request(:get, base_url)
.to_return(body: '<html><head><meta name="description" content="This is a test page description"></head></html>')
end
it 'returns the meta description content' do
expect(service.meta_description).to eq('This is a test page description')
end
end
context 'when meta description does not exist' do
before do
stub_request(:get, base_url)
.to_return(body: '<html><head><title>Test</title></head></html>')
end
it 'returns nil' do
expect(service.meta_description).to be_nil
end
end
end
describe '#favicon_url' do
context 'when favicon exists with relative URL' do
before do
stub_request(:get, base_url)
.to_return(body: '<html><head><link rel="icon" href="/favicon.ico"></head></html>')
end
it 'returns the resolved absolute favicon URL' do
expect(service.favicon_url).to eq('https://example.com/favicon.ico')
end
end
context 'when favicon exists with absolute URL' do
before do
stub_request(:get, base_url)
.to_return(body: '<html><head><link rel="icon" href="https://cdn.example.com/favicon.ico"></head></html>')
end
it 'returns the absolute favicon URL' do
expect(service.favicon_url).to eq('https://cdn.example.com/favicon.ico')
end
end
context 'when favicon does not exist' do
before do
stub_request(:get, base_url)
.to_return(body: '<html><head><title>Test</title></head></html>')
end
it 'returns nil' do
expect(service.favicon_url).to be_nil
end
end
end
end