Created
December 17, 2023 15:37
-
-
Save jnettome/63ae76392e531245b104eddbdedd81fc to your computer and use it in GitHub Desktop.
export chatwoot messages as csv or json to use with LLMs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# loop through conversations and create a csv file with the conversation data | |
attributes = %w{conversation_id sender content} | |
generated = CSV.generate(headers: true) do |csv| | |
csv << attributes | |
Conversation.take(10).each do |conversation| | |
conversation.messages.where(content_type: 'text').each do |message| | |
who_sent = "" | |
if message.message_type == "incoming" | |
who_sent = "user" | |
elsif message.message_type == "outgoing" | |
who_sent = "admin" | |
end | |
csv << [conversation.id, who_sent, message.content] | |
end | |
end | |
end | |
# File.write("curitiba_buyers.csv", generated) | |
# USEI ISSO AQUI | |
data = [] | |
Conversation.all.each do |conversation| | |
conversation.messages.where(content_type: 'text').each do |message| | |
who_sent = "" | |
if message.message_type == "incoming" | |
who_sent = "user" | |
elsif message.message_type == "outgoing" | |
who_sent = "admin" | |
end | |
data << { | |
conversation_id: conversation.id, | |
sender: who_sent, | |
content: message.content | |
} | |
end | |
end | |
# data.to_json | |
# Writing JSON to a file in the public folder | |
file_path = File.join('public', 'generated_data.json') | |
File.open(file_path, 'w') do |file| | |
file.write(data.to_json) | |
end | |
puts "JSON data has been written to #{file_path}" | |
# depois do download | |
File.remove(file_path) | |
require 'json' | |
# Read JSON data from file | |
data = File.read('generated_data.json') | |
parsed_data = JSON.parse(data) | |
# Group messages by conversation_id and filter out lines with nil or empty sender | |
grouped_messages = parsed_data.group_by { |message| message['conversation_id'] } | |
grouped_messages.each do |_conversation_id, messages| | |
messages.select! { |msg| !msg['sender'].nil? && !msg['sender'].empty? } | |
end | |
# Convert messages to the desired format and write to a .jsonl file | |
File.open('formatted_data.jsonl', 'w') do |file| | |
grouped_messages.values.each do |messages| | |
formatted_messages = messages.map do |msg| | |
next if msg['sender'].nil? || msg['sender'].empty? | |
role = msg['sender'] == 'user' ? 'user' : 'system' | |
{ 'role' => role, 'content' => msg['content'] } | |
end.compact | |
next if formatted_messages.empty? | |
json_line = { 'messages' => formatted_messages }.to_json | |
file.puts(json_line) | |
end | |
end | |
# Certainly! Here's a Ruby code that accomplishes the task you described: | |
require 'json' | |
# Read JSON data from file | |
file_content = File.read('generated_data.json') | |
data = JSON.parse(file_content) | |
# Group messages by conversation_id | |
grouped_messages = data.group_by { |message| message['conversation_id'] } | |
# Filter conversations with at least 3 messages from both system and user | |
filtered_conversations = grouped_messages.select do |_conversation_id, messages| | |
user_messages = messages.select { |msg| msg['sender'] == 'user' && msg['content'] } | |
system_messages = messages.select { |msg| msg['sender'] == 'admin' && msg['content'] } | |
user_messages.length >= 3 && system_messages.length >= 3 | |
end | |
# Convert the filtered conversations to JSONL format if any conversation matches the criteria | |
if filtered_conversations.any? | |
jsonl_data = filtered_conversations.map do |_conversation_id, messages| | |
messages_filtered = messages.select { |msg| !msg['content']&.empty? && !msg['sender'].nil? } | |
formatted_messages = messages_filtered.map do |msg| | |
role = (msg['sender'] == 'user') ? 'user' : 'system' | |
{ 'role' => role, 'content' => msg['content'] } | |
end | |
{ 'messages' => formatted_messages } | |
end | |
# Write the JSONL data to a file | |
File.open('filtered_data2.jsonl', 'w') do |file| | |
jsonl_data.each do |conversation| | |
file.puts(JSON.generate(conversation)) | |
end | |
end | |
else | |
puts 'No conversations found that meet the criteria.' | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment