diff --git a/enterprise/app/services/captain/onboarding/website_analyzer_service.rb b/enterprise/app/services/captain/onboarding/website_analyzer_service.rb new file mode 100644 index 000000000..d3e7d4983 --- /dev/null +++ b/enterprise/app/services/captain/onboarding/website_analyzer_service.rb @@ -0,0 +1,129 @@ +class Captain::Onboarding::WebsiteAnalyzerService < Llm::BaseOpenAiService + MAX_CONTENT_LENGTH = 8000 + + def initialize(website_url) + super() + @website_url = normalize_url(website_url) + @website_content = nil + @favicon_url = nil + end + + def analyze + fetch_website_content + return error_response('Failed to fetch website content') unless @website_content + + extract_business_info + rescue StandardError => e + Rails.logger.error "[Captain Onboarding] Website analysis error: #{e.message}" + error_response(e.message) + end + + private + + def normalize_url(url) + return url if url.match?(%r{\Ahttps?://}) + + "https://#{url}" + end + + def fetch_website_content + crawler = Captain::Tools::SimplePageCrawlService.new(@website_url) + + text_content = crawler.body_text_content + page_title = crawler.page_title + meta_description = crawler.meta_description + + if page_title.blank? && meta_description.blank? && text_content.blank? + Rails.logger.error "[Captain Onboarding] Failed to fetch #{@website_url}: No content found" + return false + end + + combined_content = [] + combined_content << "Title: #{page_title}" if page_title.present? + combined_content << "Description: #{meta_description}" if meta_description.present? + combined_content << text_content + + @website_content = clean_and_truncate_content(combined_content.join("\n\n")) + @favicon_url = crawler.favicon_url + true + rescue StandardError => e + Rails.logger.error "[Captain Onboarding] Failed to fetch #{@website_url}: #{e.message}" + false + end + + def clean_and_truncate_content(content) + cleaned = content.gsub(/\s+/, ' ').strip + cleaned.length > MAX_CONTENT_LENGTH ? cleaned[0...MAX_CONTENT_LENGTH] : cleaned + end + + def extract_business_info + prompt = build_analysis_prompt + + response = client.chat( + parameters: { + model: model, + messages: [{ role: 'user', content: prompt }], + response_format: { type: 'json_object' }, + temperature: 0.1, + max_tokens: 1000 + } + ) + + parse_llm_response(response.dig('choices', 0, 'message', 'content')) + end + + def build_analysis_prompt + <<~PROMPT + Analyze the following website content and extract business information. Return a JSON response with the following structure: + + { + "business_name": "The company or business name", + "suggested_assistant_name": "A friendly assistant name (e.g., 'Captain Assistant', 'Support Genie', etc.)", + "description": "Persona of the assistant based on the business type" + } + + Guidelines: + - business_name: Extract the actual company/brand name from the content + - suggested_assistant_name: Create a friendly, professional name that customers would want to interact with + - description: Provide context about the business and what the assistant can help with. Keep it general and adaptable rather than overly specific. For example: "You specialize in helping customers with their orders and product questions" or "You assist customers with their account needs and general inquiries" + + Website content: + #{@website_content} + + Return only valid JSON, no additional text. + PROMPT + end + + def parse_llm_response(response_text) + parsed_response = JSON.parse(response_text) + + { + success: true, + data: { + business_name: parsed_response['business_name'], + suggested_assistant_name: parsed_response['suggested_assistant_name'], + description: parsed_response['description'], + website_url: @website_url, + favicon_url: @favicon_url + } + } + rescue JSON::ParserError => e + Rails.logger.error "[Captain Onboarding] JSON parsing error: #{e.message}" + Rails.logger.error "[Captain Onboarding] Raw response: #{response_text}" + error_response('Failed to parse business information from website') + end + + def error_response(message) + { + success: false, + error: message, + data: { + business_name: '', + suggested_assistant_name: '', + description: '', + website_url: @website_url, + favicon_url: nil + } + } + end +end diff --git a/enterprise/app/services/captain/tools/simple_page_crawl_service.rb b/enterprise/app/services/captain/tools/simple_page_crawl_service.rb index d6baa1ebe..65731ad90 100644 --- a/enterprise/app/services/captain/tools/simple_page_crawl_service.rb +++ b/enterprise/app/services/captain/tools/simple_page_crawl_service.rb @@ -19,6 +19,20 @@ class Captain::Tools::SimplePageCrawlService ReverseMarkdown.convert @doc.at_xpath('//body'), unknown_tags: :bypass, github_flavored: true end + def meta_description + meta_desc = @doc.at_css('meta[name="description"]') + return nil unless meta_desc && meta_desc['content'] + + meta_desc['content'].strip + end + + def favicon_url + favicon_link = @doc.at_css('link[rel*="icon"]') + return nil unless favicon_link && favicon_link['href'] + + resolve_url(favicon_link['href']) + end + private def sitemap? @@ -35,4 +49,12 @@ class Captain::Tools::SimplePageCrawlService absolute_url end end + + def resolve_url(url) + return url if url.start_with?('http') + + URI.join(@external_link, url).to_s + rescue StandardError + url + end end diff --git a/enterprise/app/services/llm/base_open_ai_service.rb b/enterprise/app/services/llm/base_open_ai_service.rb index 2d3932246..e3e88453c 100644 --- a/enterprise/app/services/llm/base_open_ai_service.rb +++ b/enterprise/app/services/llm/base_open_ai_service.rb @@ -1,5 +1,6 @@ class Llm::BaseOpenAiService DEFAULT_MODEL = 'gpt-4o-mini'.freeze + attr_reader :client, :model def initialize @client = OpenAI::Client.new( diff --git a/spec/enterprise/services/captain/onboarding/website_analyzer_service_spec.rb b/spec/enterprise/services/captain/onboarding/website_analyzer_service_spec.rb new file mode 100644 index 000000000..a2735bd69 --- /dev/null +++ b/spec/enterprise/services/captain/onboarding/website_analyzer_service_spec.rb @@ -0,0 +1,99 @@ +require 'rails_helper' + +RSpec.describe Captain::Onboarding::WebsiteAnalyzerService do + let(:website_url) { 'https://example.com' } + let(:service) { described_class.new(website_url) } + let(:mock_crawler) { instance_double(Captain::Tools::SimplePageCrawlService) } + let(:mock_client) { instance_double(OpenAI::Client) } + + before do + create(:installation_config, name: 'CAPTAIN_OPEN_AI_API_KEY', value: 'test-key') + allow(Captain::Tools::SimplePageCrawlService).to receive(:new).and_return(mock_crawler) + allow(service).to receive(:client).and_return(mock_client) + allow(service).to receive(:model).and_return('gpt-3.5-turbo') + end + + describe '#analyze' do + context 'when website content is available and OpenAI call is successful' do + let(:openai_response) do + { + 'choices' => [{ + 'message' => { + 'content' => { + 'business_name' => 'Example Corp', + 'suggested_assistant_name' => 'Alex from Example Corp', + 'description' => 'You specialize in helping customers with business solutions and support' + }.to_json + } + }] + } + end + + before do + allow(mock_crawler).to receive(:body_text_content).and_return('Welcome to Example Corp') + allow(mock_crawler).to receive(:page_title).and_return('Example Corp - Home') + allow(mock_crawler).to receive(:meta_description).and_return('Leading provider of business solutions') + allow(mock_crawler).to receive(:favicon_url).and_return('https://example.com/favicon.ico') + allow(mock_client).to receive(:chat).and_return(openai_response) + end + + it 'returns success' do + result = service.analyze + + expect(result[:success]).to be true + expect(result[:data]).to include( + business_name: 'Example Corp', + suggested_assistant_name: 'Alex from Example Corp', + description: 'You specialize in helping customers with business solutions and support', + website_url: website_url, + favicon_url: 'https://example.com/favicon.ico' + ) + end + end + + context 'when website content is errored' do + before do + allow(mock_crawler).to receive(:body_text_content).and_raise(StandardError, 'Network error') + end + + it 'returns error' do + result = service.analyze + + expect(result[:success]).to be false + expect(result[:error]).to eq('Failed to fetch website content') + end + end + + context 'when website content is unavailable' do + before do + allow(mock_crawler).to receive(:body_text_content).and_return('') + allow(mock_crawler).to receive(:page_title).and_return('') + allow(mock_crawler).to receive(:meta_description).and_return('') + end + + it 'returns error' do + result = service.analyze + + expect(result[:success]).to be false + expect(result[:error]).to eq('Failed to fetch website content') + end + end + + context 'when OpenAI error' do + before do + allow(mock_crawler).to receive(:body_text_content).and_return('Welcome to Example Corp') + allow(mock_crawler).to receive(:page_title).and_return('Example Corp - Home') + allow(mock_crawler).to receive(:meta_description).and_return('Leading provider of business solutions') + allow(mock_crawler).to receive(:favicon_url).and_return('https://example.com/favicon.ico') + allow(mock_client).to receive(:chat).and_raise(StandardError, 'API error') + end + + it 'returns error' do + result = service.analyze + + expect(result[:success]).to be false + expect(result[:error]).to eq('API error') + end + end + end +end diff --git a/spec/enterprise/services/captain/tools/simple_page_crawl_service_spec.rb b/spec/enterprise/services/captain/tools/simple_page_crawl_service_spec.rb index 5868c0e22..5dfe7177d 100644 --- a/spec/enterprise/services/captain/tools/simple_page_crawl_service_spec.rb +++ b/spec/enterprise/services/captain/tools/simple_page_crawl_service_spec.rb @@ -125,4 +125,63 @@ RSpec.describe Captain::Tools::SimplePageCrawlService do ) end end + + describe '#meta_description' do + context 'when meta description exists' do + before do + stub_request(:get, base_url) + .to_return(body: '') + end + + it 'returns the meta description content' do + expect(service.meta_description).to eq('This is a test page description') + end + end + + context 'when meta description does not exist' do + before do + stub_request(:get, base_url) + .to_return(body: 'Test') + end + + it 'returns nil' do + expect(service.meta_description).to be_nil + end + end + end + + describe '#favicon_url' do + context 'when favicon exists with relative URL' do + before do + stub_request(:get, base_url) + .to_return(body: '') + end + + it 'returns the resolved absolute favicon URL' do + expect(service.favicon_url).to eq('https://example.com/favicon.ico') + end + end + + context 'when favicon exists with absolute URL' do + before do + stub_request(:get, base_url) + .to_return(body: '') + end + + it 'returns the absolute favicon URL' do + expect(service.favicon_url).to eq('https://cdn.example.com/favicon.ico') + end + end + + context 'when favicon does not exist' do + before do + stub_request(:get, base_url) + .to_return(body: 'Test') + end + + it 'returns nil' do + expect(service.favicon_url).to be_nil + end + end + end end