feat: legacy features to ruby llm (#12994)

This commit is contained in:
Aakash Bakhle
2025-12-11 14:17:28 +05:30
committed by GitHub
parent f2054e703a
commit 1de8d3e56d
39 changed files with 860 additions and 755 deletions

View File

@@ -1,4 +1,5 @@
class Captain::Llm::ContactAttributesService < Llm::LegacyBaseOpenAiService
class Captain::Llm::ContactAttributesService < Llm::BaseAiService
include Integrations::LlmInstrumentation
def initialize(assistant, conversation)
super()
@assistant = assistant
@@ -17,33 +18,38 @@ class Captain::Llm::ContactAttributesService < Llm::LegacyBaseOpenAiService
attr_reader :content
def generate_attributes
response = @client.chat(parameters: chat_parameters)
parse_response(response)
rescue OpenAI::Error => e
Rails.logger.error "OpenAI API Error: #{e.message}"
response = instrument_llm_call(instrumentation_params) do
chat
.with_params(response_format: { type: 'json_object' })
.with_instructions(system_prompt)
.ask(@content)
end
parse_response(response.content)
rescue RubyLLM::Error => e
ChatwootExceptionTracker.new(e, account: @conversation.account).capture_exception
[]
end
def chat_parameters
prompt = Captain::Llm::SystemPromptsService.attributes_generator
def instrumentation_params
{
span_name: 'llm.captain.contact_attributes',
model: @model,
response_format: { type: 'json_object' },
temperature: @temperature,
account_id: @conversation.account_id,
feature_name: 'contact_attributes',
messages: [
{
role: 'system',
content: prompt
},
{
role: 'user',
content: content
}
]
{ role: 'system', content: system_prompt },
{ role: 'user', content: @content }
],
metadata: { assistant_id: @assistant.id, contact_id: @contact.id }
}
end
def parse_response(response)
content = response.dig('choices', 0, 'message', 'content')
def system_prompt
Captain::Llm::SystemPromptsService.attributes_generator
end
def parse_response(content)
return [] if content.nil?
JSON.parse(content.strip).fetch('attributes', [])

View File

@@ -1,4 +1,5 @@
class Captain::Llm::ContactNotesService < Llm::LegacyBaseOpenAiService
class Captain::Llm::ContactNotesService < Llm::BaseAiService
include Integrations::LlmInstrumentation
def initialize(assistant, conversation)
super()
@assistant = assistant
@@ -18,38 +19,42 @@ class Captain::Llm::ContactNotesService < Llm::LegacyBaseOpenAiService
attr_reader :content
def generate_notes
response = @client.chat(parameters: chat_parameters)
parse_response(response)
rescue OpenAI::Error => e
Rails.logger.error "OpenAI API Error: #{e.message}"
response = instrument_llm_call(instrumentation_params) do
chat
.with_params(response_format: { type: 'json_object' })
.with_instructions(system_prompt)
.ask(@content)
end
parse_response(response.content)
rescue RubyLLM::Error => e
ChatwootExceptionTracker.new(e, account: @conversation.account).capture_exception
[]
end
def chat_parameters
account_language = @conversation.account.locale_english_name
prompt = Captain::Llm::SystemPromptsService.notes_generator(account_language)
def instrumentation_params
{
span_name: 'llm.captain.contact_notes',
model: @model,
response_format: { type: 'json_object' },
temperature: @temperature,
account_id: @conversation.account_id,
feature_name: 'contact_notes',
messages: [
{
role: 'system',
content: prompt
},
{
role: 'user',
content: content
}
]
{ role: 'system', content: system_prompt },
{ role: 'user', content: @content }
],
metadata: { assistant_id: @assistant.id, contact_id: @contact.id }
}
end
def parse_response(response)
content = response.dig('choices', 0, 'message', 'content')
return [] if content.nil?
def system_prompt
account_language = @conversation.account.locale_english_name
Captain::Llm::SystemPromptsService.notes_generator(account_language)
end
JSON.parse(content.strip).fetch('notes', [])
def parse_response(response)
return [] if response.nil?
JSON.parse(response.strip).fetch('notes', [])
rescue JSON::ParserError => e
Rails.logger.error "Error in parsing GPT processed response: #{e.message}"
[]

View File

@@ -1,4 +1,5 @@
class Captain::Llm::ConversationFaqService < Llm::LegacyBaseOpenAiService
class Captain::Llm::ConversationFaqService < Llm::BaseAiService
include Integrations::LlmInstrumentation
DISTANCE_THRESHOLD = 0.3
def initialize(assistant, conversation)
@@ -35,7 +36,7 @@ class Captain::Llm::ConversationFaqService < Llm::LegacyBaseOpenAiService
faqs.each do |faq|
combined_text = "#{faq['question']}: #{faq['answer']}"
embedding = Captain::Llm::EmbeddingService.new.get_embedding(combined_text)
embedding = Captain::Llm::EmbeddingService.new(account_id: @conversation.account_id).get_embedding(combined_text)
similar_faqs = find_similar_faqs(embedding)
if similar_faqs.any?
@@ -81,38 +82,43 @@ class Captain::Llm::ConversationFaqService < Llm::LegacyBaseOpenAiService
end
def generate
response = @client.chat(parameters: chat_parameters)
parse_response(response)
rescue OpenAI::Error => e
Rails.logger.error "OpenAI API Error: #{e.message}"
response = instrument_llm_call(instrumentation_params) do
chat
.with_params(response_format: { type: 'json_object' })
.with_instructions(system_prompt)
.ask(@content)
end
parse_response(response.content)
rescue RubyLLM::Error => e
Rails.logger.error "LLM API Error: #{e.message}"
[]
end
def chat_parameters
account_language = @conversation.account.locale_english_name
prompt = Captain::Llm::SystemPromptsService.conversation_faq_generator(account_language)
def instrumentation_params
{
span_name: 'llm.captain.conversation_faq',
model: @model,
response_format: { type: 'json_object' },
temperature: @temperature,
account_id: @conversation.account_id,
conversation_id: @conversation.id,
feature_name: 'conversation_faq',
messages: [
{
role: 'system',
content: prompt
},
{
role: 'user',
content: content
}
]
{ role: 'system', content: system_prompt },
{ role: 'user', content: @content }
],
metadata: { assistant_id: @assistant.id }
}
end
def parse_response(response)
content = response.dig('choices', 0, 'message', 'content')
return [] if content.nil?
def system_prompt
account_language = @conversation.account.locale_english_name
Captain::Llm::SystemPromptsService.conversation_faq_generator(account_language)
end
JSON.parse(content.strip).fetch('faqs', [])
def parse_response(response)
return [] if response.nil?
JSON.parse(response.strip).fetch('faqs', [])
rescue JSON::ParserError => e
Rails.logger.error "Error in parsing GPT processed response: #{e.message}"
[]

View File

@@ -1,22 +1,38 @@
require 'openai'
class Captain::Llm::EmbeddingService
include Integrations::LlmInstrumentation
class Captain::Llm::EmbeddingService < Llm::LegacyBaseOpenAiService
class EmbeddingsError < StandardError; end
def self.embedding_model
@embedding_model = InstallationConfig.find_by(name: 'CAPTAIN_EMBEDDING_MODEL')&.value.presence || OpenAiConstants::DEFAULT_EMBEDDING_MODEL
def initialize(account_id: nil)
Llm::Config.initialize!
@account_id = account_id
@embedding_model = InstallationConfig.find_by(name: 'CAPTAIN_EMBEDDING_MODEL')&.value.presence || LlmConstants::DEFAULT_EMBEDDING_MODEL
end
def get_embedding(content, model: self.class.embedding_model)
response = @client.embeddings(
parameters: {
model: model,
input: content
}
)
def self.embedding_model
InstallationConfig.find_by(name: 'CAPTAIN_EMBEDDING_MODEL')&.value.presence || LlmConstants::DEFAULT_EMBEDDING_MODEL
end
response.dig('data', 0, 'embedding')
rescue StandardError => e
def get_embedding(content, model: @embedding_model)
return [] if content.blank?
instrument_embedding_call(instrumentation_params(content, model)) do
RubyLLM.embed(content, model: model).vectors
end
rescue RubyLLM::Error => e
Rails.logger.error "Embedding API Error: #{e.message}"
raise EmbeddingsError, "Failed to create an embedding: #{e.message}"
end
private
def instrumentation_params(content, model)
{
span_name: 'llm.captain.embedding',
model: model,
input: content,
feature_name: 'embedding',
account_id: @account_id
}
end
end

View File

@@ -1,15 +1,24 @@
class Captain::Llm::FaqGeneratorService < Llm::LegacyBaseOpenAiService
def initialize(content, language = 'english')
class Captain::Llm::FaqGeneratorService < Llm::BaseAiService
include Integrations::LlmInstrumentation
def initialize(content, language = 'english', account_id: nil)
super()
@language = language
@content = content
@account_id = account_id
end
def generate
response = @client.chat(parameters: chat_parameters)
parse_response(response)
rescue OpenAI::Error => e
Rails.logger.error "OpenAI API Error: #{e.message}"
response = instrument_llm_call(instrumentation_params) do
chat
.with_params(response_format: { type: 'json_object' })
.with_instructions(system_prompt)
.ask(@content)
end
parse_response(response.content)
rescue RubyLLM::Error => e
Rails.logger.error "LLM API Error: #{e.message}"
[]
end
@@ -17,26 +26,25 @@ class Captain::Llm::FaqGeneratorService < Llm::LegacyBaseOpenAiService
attr_reader :content, :language
def chat_parameters
prompt = Captain::Llm::SystemPromptsService.faq_generator(language)
def system_prompt
Captain::Llm::SystemPromptsService.faq_generator(language)
end
def instrumentation_params
{
span_name: 'llm.captain.faq_generator',
model: @model,
response_format: { type: 'json_object' },
temperature: @temperature,
feature_name: 'faq_generator',
account_id: @account_id,
messages: [
{
role: 'system',
content: prompt
},
{
role: 'user',
content: content
}
{ role: 'system', content: system_prompt },
{ role: 'user', content: @content }
]
}
end
def parse_response(response)
content = response.dig('choices', 0, 'message', 'content')
def parse_response(content)
return [] if content.nil?
JSON.parse(content.strip).fetch('faqs', [])

View File

@@ -1,4 +1,6 @@
class Captain::Llm::PaginatedFaqGeneratorService < Llm::LegacyBaseOpenAiService
include Integrations::LlmInstrumentation
# Default pages per chunk - easily configurable
DEFAULT_PAGES_PER_CHUNK = 10
MAX_ITERATIONS = 20 # Safety limit to prevent infinite loops
@@ -13,7 +15,7 @@ class Captain::Llm::PaginatedFaqGeneratorService < Llm::LegacyBaseOpenAiService
@max_pages = options[:max_pages] # Optional limit from UI
@total_pages_processed = 0
@iterations_completed = 0
@model = OpenAiConstants::PDF_PROCESSING_MODEL
@model = LlmConstants::PDF_PROCESSING_MODEL
end
def generate
@@ -43,7 +45,19 @@ class Captain::Llm::PaginatedFaqGeneratorService < Llm::LegacyBaseOpenAiService
private
def generate_standard_faqs
response = @client.chat(parameters: standard_chat_parameters)
params = standard_chat_parameters
instrumentation_params = {
span_name: 'llm.faq_generation',
account_id: @document&.account_id,
feature_name: 'faq_generation',
model: @model,
messages: params[:messages]
}
response = instrument_llm_call(instrumentation_params) do
@client.chat(parameters: params)
end
parse_response(response)
rescue OpenAI::Error => e
Rails.logger.error I18n.t('captain.documents.openai_api_error', error: e.message)
@@ -84,7 +98,13 @@ class Captain::Llm::PaginatedFaqGeneratorService < Llm::LegacyBaseOpenAiService
def process_page_chunk(start_page, end_page)
params = build_chunk_parameters(start_page, end_page)
response = @client.chat(parameters: params)
instrumentation_params = build_instrumentation_params(params, start_page, end_page)
response = instrument_llm_call(instrumentation_params) do
@client.chat(parameters: params)
end
result = parse_chunk_response(response)
{ faqs: result['faqs'] || [], has_content: result['has_content'] != false }
rescue OpenAI::Error => e
@@ -180,21 +200,26 @@ class Captain::Llm::PaginatedFaqGeneratorService < Llm::LegacyBaseOpenAiService
def similarity_score(str1, str2)
words1 = str1.downcase.split(/\W+/).reject(&:empty?)
words2 = str2.downcase.split(/\W+/).reject(&:empty?)
common_words = words1 & words2
total_words = (words1 + words2).uniq.size
return 0 if total_words.zero?
common_words.size.to_f / total_words
end
def determine_stop_reason(last_chunk_result)
return 'Maximum iterations reached' if @iterations_completed >= MAX_ITERATIONS
return 'Maximum pages processed' if @max_pages && @total_pages_processed >= @max_pages
return 'No content found in last chunk' if last_chunk_result[:faqs].empty?
return 'End of document reached' if last_chunk_result[:has_content] == false
'Unknown'
def build_instrumentation_params(params, start_page, end_page)
{
span_name: 'llm.paginated_faq_generation',
account_id: @document&.account_id,
feature_name: 'paginated_faq_generation',
model: @model,
messages: params[:messages],
metadata: {
document_id: @document&.id,
start_page: start_page,
end_page: end_page,
iteration: @iterations_completed + 1
}
}
end
end

View File

@@ -1,4 +1,6 @@
class Captain::Llm::PdfProcessingService < Llm::LegacyBaseOpenAiService
include Integrations::LlmInstrumentation
def initialize(document)
super()
@document = document
@@ -19,13 +21,30 @@ class Captain::Llm::PdfProcessingService < Llm::LegacyBaseOpenAiService
def upload_pdf_to_openai
with_tempfile do |temp_file|
response = @client.files.upload(
parameters: {
file: temp_file,
purpose: 'assistants'
}
)
response['id']
instrument_file_upload do
response = @client.files.upload(
parameters: {
file: temp_file,
purpose: 'assistants'
}
)
response['id']
end
end
end
def instrument_file_upload(&)
return yield unless ChatwootApp.otel_enabled?
tracer.in_span('llm.file.upload') do |span|
span.set_attribute('gen_ai.provider', 'openai')
span.set_attribute('file.purpose', 'assistants')
span.set_attribute(ATTR_LANGFUSE_USER_ID, document.account_id.to_s)
span.set_attribute(ATTR_LANGFUSE_TAGS, ['pdf_upload'].to_json)
span.set_attribute(format(ATTR_LANGFUSE_METADATA, 'document_id'), document.id.to_s)
file_id = yield
span.set_attribute('file.id', file_id) if file_id
file_id
end
end

View File

@@ -1,4 +1,5 @@
class Captain::Onboarding::WebsiteAnalyzerService < Llm::LegacyBaseOpenAiService
class Captain::Onboarding::WebsiteAnalyzerService < Llm::BaseAiService
include Integrations::LlmInstrumentation
MAX_CONTENT_LENGTH = 8000
def initialize(website_url)
@@ -57,19 +58,29 @@ class Captain::Onboarding::WebsiteAnalyzerService < Llm::LegacyBaseOpenAiService
end
def extract_business_info
prompt = build_analysis_prompt
response = instrument_llm_call(instrumentation_params) do
chat
.with_params(response_format: { type: 'json_object' }, max_tokens: 1000)
.with_temperature(0.1)
.with_instructions(build_analysis_prompt)
.ask(@website_content)
end
response = client.chat(
parameters: {
model: model,
messages: [{ role: 'user', content: prompt }],
response_format: { type: 'json_object' },
temperature: 0.1,
max_tokens: 1000
}
)
parse_llm_response(response.content)
end
parse_llm_response(response.dig('choices', 0, 'message', 'content'))
def instrumentation_params
{
span_name: 'llm.captain.website_analyzer',
model: @model,
temperature: 0.1,
feature_name: 'website_analyzer',
messages: [
{ role: 'system', content: build_analysis_prompt },
{ role: 'user', content: @website_content }
],
metadata: { website_url: @website_url }
}
end
def build_analysis_prompt
@@ -95,7 +106,7 @@ class Captain::Onboarding::WebsiteAnalyzerService < Llm::LegacyBaseOpenAiService
end
def parse_llm_response(response_text)
parsed_response = JSON.parse(response_text)
parsed_response = JSON.parse(response_text.strip)
{
success: true,

View File

@@ -1,48 +1,59 @@
class Internal::AccountAnalysis::ContentEvaluatorService < Llm::LegacyBaseOpenAiService
def initialize
super()
class Internal::AccountAnalysis::ContentEvaluatorService
include Integrations::LlmInstrumentation
@model = 'gpt-4o-mini'.freeze
def initialize
Llm::Config.initialize!
end
def evaluate(content)
return default_evaluation if content.blank?
begin
response = send_to_llm(content)
evaluation = handle_response(response)
log_evaluation_results(evaluation)
evaluation
rescue StandardError => e
handle_evaluation_error(e)
moderation_result = instrument_moderation_call(instrumentation_params(content)) do
RubyLLM.moderate(content.to_s[0...10_000])
end
build_evaluation(moderation_result)
rescue StandardError => e
handle_evaluation_error(e)
end
private
def send_to_llm(content)
Rails.logger.info('Sending content to LLM for security evaluation')
@client.chat(
parameters: {
model: @model,
messages: llm_messages(content),
response_format: { type: 'json_object' }
}
)
def instrumentation_params(content)
{
span_name: 'llm.internal.content_moderation',
model: 'text-moderation-latest',
input: content,
feature_name: 'content_evaluator'
}
end
def handle_response(response)
return default_evaluation if response.nil?
def build_evaluation(result)
flagged = result.flagged?
categories = result.flagged_categories
parsed = JSON.parse(response.dig('choices', 0, 'message', 'content').strip)
{
'threat_level' => parsed['threat_level'] || 'unknown',
'threat_summary' => parsed['threat_summary'] || 'No threat summary provided',
'detected_threats' => parsed['detected_threats'] || [],
'illegal_activities_detected' => parsed['illegal_activities_detected'] || false,
'recommendation' => parsed['recommendation'] || 'review'
evaluation = {
'threat_level' => flagged ? determine_threat_level(result) : 'safe',
'threat_summary' => flagged ? "Content flagged for: #{categories.join(', ')}" : 'No threats detected',
'detected_threats' => categories,
'illegal_activities_detected' => categories.any? { |c| c.include?('violence') || c.include?('self-harm') },
'recommendation' => flagged ? 'review' : 'approve'
}
log_evaluation_results(evaluation)
evaluation
end
def determine_threat_level(result)
scores = result.category_scores
max_score = scores.values.max || 0
case max_score
when 0.8.. then 'critical'
when 0.5..0.8 then 'high'
when 0.2..0.5 then 'medium'
else 'low'
end
end
def default_evaluation(error_type = nil)
@@ -56,18 +67,11 @@ class Internal::AccountAnalysis::ContentEvaluatorService < Llm::LegacyBaseOpenAi
end
def log_evaluation_results(evaluation)
Rails.logger.info("LLM evaluation - Level: #{evaluation['threat_level']}, Illegal activities: #{evaluation['illegal_activities_detected']}")
Rails.logger.info("Moderation evaluation - Level: #{evaluation['threat_level']}, Threats: #{evaluation['detected_threats'].join(', ')}")
end
def handle_evaluation_error(error)
Rails.logger.error("Error evaluating content: #{error.message}")
default_evaluation('evaluation_failure')
end
def llm_messages(content)
[
{ role: 'system', content: 'You are a security analysis system that evaluates content for potential threats and scams.' },
{ role: 'user', content: Internal::AccountAnalysis::PromptsService.threat_analyser(content.to_s[0...10_000]) }
]
end
end

View File

@@ -14,8 +14,6 @@ class Llm::BaseAiService
setup_temperature
end
# Returns a configured RubyLLM chat instance.
# Subclasses can override model/temperature via instance variables or pass them explicitly.
def chat(model: @model, temperature: @temperature)
RubyLLM.chat(model: model).with_temperature(temperature)
end

View File

@@ -1,8 +1,11 @@
# frozen_string_literal: true
# DEPRECATED: This class uses the legacy OpenAI Ruby gem directly.
# New features should use Llm::BaseAiService with RubyLLM instead.
# This class will be removed once all services are migrated to RubyLLM.
# Only used for PDF/file operations that require OpenAI's files API:
# - Captain::Llm::PdfProcessingService (files.upload for assistants)
# - Captain::Llm::PaginatedFaqGeneratorService (uses file_id from uploaded files)
#
# For all other LLM operations, use Llm::BaseAiService with RubyLLM instead.
class Llm::LegacyBaseOpenAiService
DEFAULT_MODEL = 'gpt-4o-mini'

View File

@@ -1,4 +1,8 @@
class Messages::AudioTranscriptionService < Llm::LegacyBaseOpenAiService
class Messages::AudioTranscriptionService< Llm::LegacyBaseOpenAiService
include Integrations::LlmInstrumentation
WHISPER_MODEL = 'whisper-1'.freeze
attr_reader :attachment, :message, :account
def initialize(attachment)
@@ -46,7 +50,7 @@ class Messages::AudioTranscriptionService < Llm::LegacyBaseOpenAiService
temp_file_path = fetch_audio_file
response_text = nil
transcribed_text = nil
File.open(temp_file_path, 'rb') do |file|
response = @client.audio.transcribe(
@@ -56,14 +60,23 @@ class Messages::AudioTranscriptionService < Llm::LegacyBaseOpenAiService
temperature: 0.4
}
)
response_text = response['text']
transcribed_text = response['text']
end
FileUtils.rm_f(temp_file_path)
update_transcription(response_text)
response_text
update_transcription(transcribed_text)
transcribed_text
end
def instrumentation_params(file_path)
{
span_name: 'llm.messages.audio_transcription',
model: WHISPER_MODEL,
account_id: account&.id,
feature_name: 'audio_transcription',
file_path: file_path
}
end
def update_transcription(transcribed_text)