feat: Add BE changes for captain pdf support for faq generation (#12113)

This commit is contained in:
Tanmay Deep Sharma
2025-08-27 22:01:22 +07:00
committed by GitHub
parent 3cefa9b767
commit 1ba00075ce
19 changed files with 856 additions and 12 deletions

View File

@@ -0,0 +1,106 @@
require 'rails_helper'
require 'custom_exceptions/pdf_processing_error'
RSpec.describe Captain::Llm::PaginatedFaqGeneratorService do
let(:document) { create(:captain_document) }
let(:service) { described_class.new(document, pages_per_chunk: 5) }
let(:openai_client) { instance_double(OpenAI::Client) }
before do
# Mock OpenAI configuration
installation_config = instance_double(InstallationConfig, value: 'test-api-key')
allow(InstallationConfig).to receive(:find_by!)
.with(name: 'CAPTAIN_OPEN_AI_API_KEY')
.and_return(installation_config)
allow(OpenAI::Client).to receive(:new).and_return(openai_client)
end
describe '#generate' do
context 'when document lacks OpenAI file ID' do
before do
allow(document).to receive(:openai_file_id).and_return(nil)
end
it 'raises an error' do
expect { service.generate }.to raise_error(CustomExceptions::PdfFaqGenerationError)
end
end
context 'when generating FAQs from PDF pages' do
let(:faq_response) do
{
'choices' => [{
'message' => {
'content' => JSON.generate({
'faqs' => [
{ 'question' => 'What is this document about?', 'answer' => 'It explains key concepts.' }
],
'has_content' => true
})
}
}]
}
end
let(:empty_response) do
{
'choices' => [{
'message' => {
'content' => JSON.generate({
'faqs' => [],
'has_content' => false
})
}
}]
}
end
before do
allow(document).to receive(:openai_file_id).and_return('file-123')
end
it 'generates FAQs from paginated content' do
allow(openai_client).to receive(:chat).and_return(faq_response, empty_response)
faqs = service.generate
expect(faqs).to have_attributes(size: 1)
expect(faqs.first['question']).to eq('What is this document about?')
end
it 'stops when no more content' do
allow(openai_client).to receive(:chat).and_return(empty_response)
faqs = service.generate
expect(faqs).to be_empty
end
it 'respects max iterations limit' do
allow(openai_client).to receive(:chat).and_return(faq_response)
# Force max iterations
service.instance_variable_set(:@iterations_completed, 19)
service.generate
expect(service.iterations_completed).to eq(20)
end
end
end
describe '#should_continue_processing?' do
it 'stops at max iterations' do
service.instance_variable_set(:@iterations_completed, 20)
expect(service.should_continue_processing?(faqs: ['faq'], has_content: true)).to be false
end
it 'stops when no FAQs returned' do
expect(service.should_continue_processing?(faqs: [], has_content: true)).to be false
end
it 'continues when FAQs exist and under limits' do
expect(service.should_continue_processing?(faqs: ['faq'], has_content: true)).to be true
end
end
end

View File

@@ -0,0 +1,58 @@
require 'rails_helper'
require 'custom_exceptions/pdf_processing_error'
RSpec.describe Captain::Llm::PdfProcessingService do
let(:document) { create(:captain_document) }
let(:service) { described_class.new(document) }
before do
# Mock OpenAI configuration
installation_config = instance_double(InstallationConfig, value: 'test-api-key')
allow(InstallationConfig).to receive(:find_by!)
.with(name: 'CAPTAIN_OPEN_AI_API_KEY')
.and_return(installation_config)
end
describe '#process' do
context 'when document already has OpenAI file ID' do
before do
allow(document).to receive(:openai_file_id).and_return('existing-file-id')
end
it 'skips upload' do
expect(document).not_to receive(:store_openai_file_id)
service.process
end
end
context 'when uploading PDF to OpenAI' do
let(:mock_client) { instance_double(OpenAI::Client) }
let(:pdf_content) { 'PDF content' }
before do
allow(document).to receive(:openai_file_id).and_return(nil)
# Use a simple double for ActiveStorage since it's a complex Rails object
pdf_file = double('pdf_file', download: pdf_content) # rubocop:disable RSpec/VerifiedDoubles
allow(document).to receive(:pdf_file).and_return(pdf_file)
allow(OpenAI::Client).to receive(:new).and_return(mock_client)
# Use a simple double for OpenAI::Files as it may not be loaded
files_api = double('files_api') # rubocop:disable RSpec/VerifiedDoubles
allow(files_api).to receive(:upload).and_return({ 'id' => 'file-abc123' })
allow(mock_client).to receive(:files).and_return(files_api)
end
it 'uploads PDF and stores file ID' do
expect(document).to receive(:store_openai_file_id).with('file-abc123')
service.process
end
it 'raises error when upload fails' do
allow(mock_client.files).to receive(:upload).and_return({ 'id' => nil })
expect { service.process }.to raise_error(CustomExceptions::PdfUploadError)
end
end
end
end