Created
April 18, 2023 12:23
-
-
Save amirhmoradi/eee652fa65eacb2536a9d0f711940842 to your computer and use it in GitHub Desktop.
Use OpenAI API to translate json dictionaries
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Description: This script uses OpenAI's API to translate the English text in the JSON file to Persian language. | |
# You can change the context and languages by changing the task description and the model engine in the variables: | |
# - task_description | |
# - model_engine | |
# | |
# Make sure to set file paths for the source and destination files in the variables: | |
# - source_file | |
# - destination_file | |
# | |
# The script will generate temporary files for each batch of translated text, and then combine them into a single file. | |
# Once the script is done, it will remove the temporary files. | |
# | |
# The script will translate the text in batches of 20 lines. | |
# You can change the batch size by changing the value of the variable: | |
# - batch_size | |
# | |
# The script will create a new JSON file with the translated text. | |
# The JSON file must have the following format: | |
# Source file example: | |
# { | |
# "key1": "value1", | |
# "key2": "value2", | |
# "key3": "value3", | |
# ... | |
# } | |
# Translated file example: | |
# { | |
# "key1": "translated value1", | |
# "key2": "translated value2", | |
# "key3": "translated value3", | |
# ... | |
# } | |
# Note: The OpenAI API is not free. You need to create an account and get an API key to use it. | |
# | |
# Requirements: | |
# - Python 3.6+ | |
# - OpenAI Python library: pip install openai | |
# - OpenAI API key | |
# | |
# Usage: | |
# 1. Install the OpenAI Python library: pip install openai | |
# 2. Set the OPENAI_API_KEY environment variable to your OpenAI API key | |
# 3. Run the script: python openai_translate.py | |
import json | |
import math | |
import time | |
import os | |
import openai | |
def translate_and_save(): | |
# Load OpenAI API credentials from environment variables: | |
openai.api_key = os.environ.get("OPENAI_API_KEY") | |
model_engine = "text-davinci-003" | |
#Description of the task in natural language | |
task_description = "For a CRM software like Hubspot, I need translations. Use a friendly tone and translate from English to Persian language the following:" | |
source_file = "./locales/en/common.json" | |
destination_file = "./locales/fa/common.json" | |
# Load the source JSON file | |
with open(source_file, "r") as f: | |
json_data = json.load(f) | |
# Determine the number of batches needed | |
batch_size = 20 | |
total_lines = len(json_data) | |
total_batches = math.ceil(total_lines / batch_size) | |
print(f"Total lines: {total_lines}") | |
print(f"Batch size: {batch_size}") | |
print(f"Total batches: {total_batches}") | |
# Process the batches | |
for batch_id in range(1, total_batches + 1): | |
# Get the lines for the current batch | |
start = (batch_id - 1) * batch_size | |
end = min(start + batch_size, total_lines) | |
#batch_lines = json_data[start:end] | |
batch_lines = list(json_data.items())[start:end] | |
# Translate the batch | |
translated_lines = {} | |
for key, value in batch_lines: | |
# handle api errors and retry up to 3 times in case of failure, wait 3 seconds between retries: | |
for i in range(3): | |
try: | |
#Perform translation with OpenAI API | |
response = openai.Completion.create( | |
engine=model_engine, | |
#The prompt is created by concatenating the task description and the text to translate | |
prompt = task_description+"\n"+value, | |
temperature=0.7, | |
max_tokens=2048, | |
n=1, | |
stop=None | |
) | |
break | |
except Exception as e: | |
print(f"Error: {e}") | |
print(f"Retrying in 3 seconds...") | |
time.sleep(3) | |
#Add the translated text to the dictionary | |
translated_lines[key] = response.choices[0].text.strip() | |
time.sleep(2) # To avoid hitting the OpenAI API rate limit | |
# Save the translated batch to a new JSON file | |
with open(f"translated_batch_{batch_id}.json", "w") as f: | |
json.dump(translated_lines, f, ensure_ascii=False, indent=4) | |
# Combine all batch files into a single file | |
translated_data = {} | |
for i in range(1, total_batches + 1): | |
with open(f"./translated_batch_{i}.json", "r") as f: | |
batch_data = json.load(f) | |
translated_data.update(batch_data) | |
# Save the translated data to a new JSON file | |
with open(destination_file, "w") as f: | |
json.dump(translated_data, f, ensure_ascii=False, indent=4) | |
# Remove the batch files if translated_full.json is created successfully and has at least 1 line: | |
if len(translated_data) > 0: | |
for i in range(1, total_batches + 1): | |
os.remove(f"./translated_batch_{i}.json") | |
translate_and_save() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment