import json import csv def trainjson2csv(json_file_path = 'output_results.json' ,csv_file_path = 'output.csv'): # 打开CSV文件以写入 with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file: writer = None # 初始化写入器 # 逐行读取JSON文件 with open(json_file_path, 'r', encoding='utf-8') as json_file: for line in json_file: # 解析JSON行 data = json.loads(line.strip()) # 如果writer未初始化,则根据JSON键设置CSV的列头 if writer is None: writer = csv.DictWriter(csv_file, fieldnames=data.keys()) writer.writeheader() # 写入当前行数据 writer.writerow(data) print(f"JSON 数据已成功逐行转换为 CSV 文件,并保存为 {csv_file_path}") def origin2json(originfilepath): # 打开原始 JSON 文件 with open(originfilepath, 'r', encoding='utf-8') as file: datas = file.readlines() datas = json.loads(datas[0]) results = [] # 遍历数据并处理 for data in datas: for key, value in data['mapping'].items(): # 检查 'message' 和 'content' 是否存在并且不为 None if value.get('message') and value['message'].get('content'): input_text = value['message']['content'].get('parts', [None])[0] else: input_text = None # 提取子节点(输出) if value.get('children'): child_id = value['children'][0] if data['mapping'].get(child_id) and data['mapping'][child_id].get('message') and data['mapping'][child_id]['message'].get('content'): output_text = str(data['mapping'][child_id]['message']['content'].get('parts', [None])[0]).replace("\n","") else: output_text = None else: output_text = None # 仅在 input 和 output 都存在的情况下构建字典并添加到结果列表 if input_text and output_text: result = { "title": data['title'], # "id": value['id'], "input": input_text, "output": output_text } results.append(result) # 将每个结果的 dict 写入新的 JSON 文件,每行一个 JSON 对象 with open('output_results.json', 'w', encoding='utf-8') as outfile: for result in results: json.dump(result, outfile, ensure_ascii=False) outfile.write('\n') print("数据已成功保存到 output_results.json 文件中") if __name__ == '__main__': originfilepath = '7239cd96cb1ff7cc6db225c7bed1c1c4216352d844513d89e3d767f9a6db5c71-2024-08-09-06-34-56/conversations.json' #替换为你导出的原始文件中conversations.json的位置 json_file_path = 'output_results.json' # 你需要替换为你的JSON文件路径 csv_file_path = 'output.csv' # 你需要替换为你的csv文件路径 origin2json(originfilepath) trainjson2csv(json_file_path,csv_file_path)