Skip to content

Instantly share code, notes, and snippets.

@jnettome
Created December 17, 2023 15:37
Show Gist options
  • Save jnettome/63ae76392e531245b104eddbdedd81fc to your computer and use it in GitHub Desktop.
Save jnettome/63ae76392e531245b104eddbdedd81fc to your computer and use it in GitHub Desktop.
export chatwoot messages as csv or json to use with LLMs
# loop through conversations and create a csv file with the conversation data
attributes = %w{conversation_id sender content}
generated = CSV.generate(headers: true) do |csv|
csv << attributes
Conversation.take(10).each do |conversation|
conversation.messages.where(content_type: 'text').each do |message|
who_sent = ""
if message.message_type == "incoming"
who_sent = "user"
elsif message.message_type == "outgoing"
who_sent = "admin"
end
csv << [conversation.id, who_sent, message.content]
end
end
end
# File.write("curitiba_buyers.csv", generated)
# USEI ISSO AQUI
data = []
Conversation.all.each do |conversation|
conversation.messages.where(content_type: 'text').each do |message|
who_sent = ""
if message.message_type == "incoming"
who_sent = "user"
elsif message.message_type == "outgoing"
who_sent = "admin"
end
data << {
conversation_id: conversation.id,
sender: who_sent,
content: message.content
}
end
end
# data.to_json
# Writing JSON to a file in the public folder
file_path = File.join('public', 'generated_data.json')
File.open(file_path, 'w') do |file|
file.write(data.to_json)
end
puts "JSON data has been written to #{file_path}"
# depois do download
File.remove(file_path)
require 'json'
# Read JSON data from file
data = File.read('generated_data.json')
parsed_data = JSON.parse(data)
# Group messages by conversation_id and filter out lines with nil or empty sender
grouped_messages = parsed_data.group_by { |message| message['conversation_id'] }
grouped_messages.each do |_conversation_id, messages|
messages.select! { |msg| !msg['sender'].nil? && !msg['sender'].empty? }
end
# Convert messages to the desired format and write to a .jsonl file
File.open('formatted_data.jsonl', 'w') do |file|
grouped_messages.values.each do |messages|
formatted_messages = messages.map do |msg|
next if msg['sender'].nil? || msg['sender'].empty?
role = msg['sender'] == 'user' ? 'user' : 'system'
{ 'role' => role, 'content' => msg['content'] }
end.compact
next if formatted_messages.empty?
json_line = { 'messages' => formatted_messages }.to_json
file.puts(json_line)
end
end
# Certainly! Here's a Ruby code that accomplishes the task you described:
require 'json'
# Read JSON data from file
file_content = File.read('generated_data.json')
data = JSON.parse(file_content)
# Group messages by conversation_id
grouped_messages = data.group_by { |message| message['conversation_id'] }
# Filter conversations with at least 3 messages from both system and user
filtered_conversations = grouped_messages.select do |_conversation_id, messages|
user_messages = messages.select { |msg| msg['sender'] == 'user' && msg['content'] }
system_messages = messages.select { |msg| msg['sender'] == 'admin' && msg['content'] }
user_messages.length >= 3 && system_messages.length >= 3
end
# Convert the filtered conversations to JSONL format if any conversation matches the criteria
if filtered_conversations.any?
jsonl_data = filtered_conversations.map do |_conversation_id, messages|
messages_filtered = messages.select { |msg| !msg['content']&.empty? && !msg['sender'].nil? }
formatted_messages = messages_filtered.map do |msg|
role = (msg['sender'] == 'user') ? 'user' : 'system'
{ 'role' => role, 'content' => msg['content'] }
end
{ 'messages' => formatted_messages }
end
# Write the JSONL data to a file
File.open('filtered_data2.jsonl', 'w') do |file|
jsonl_data.each do |conversation|
file.puts(JSON.generate(conversation))
end
end
else
puts 'No conversations found that meet the criteria.'
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment