You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

28 lines
944 B

3 months ago
import json
from datetime import datetime
from datasets import load_dataset
# Load the dataset
dataset = load_dataset("Edoh/manim_python")
# Prepare the output file name with a timestamp
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
output_file = f"edoh-dataset-{timestamp}.jsonl"
# Open the file for writing
with open(output_file, 'w') as f:
# Iterate over each example in the dataset
for example in dataset['train']:
# Create the structured message
entry = {
"messages": [
{"role": "system", "content": "Write Manim scripts for animations in Python. Generate code, not text."},
{"role": "user", "content": example['instruction']},
{"role": "assistant", "content": example['output']}
]
}
# Write the JSON entry to the file
f.write(json.dumps(entry) + '\n')
print(f"Dataset converted and saved to {output_file}")