Commit ea10822c authored by szr712's avatar szr712

支持pkl文件翻译

parent 25cce464
......@@ -49,4 +49,4 @@ CUDA_VISIBLE_DEVICES=4 python translate2.py -load_weights weights/token_classifi
CUDA_VISIBLE_DEVICES=4 python translate_file2.py -load_weights weights/token_classifiaction_aug_Chinese/token_classification_split_Chinese_148_0.012723878987599165 -pkl_dir weights/token_classifiaction_aug_Chinese -src_voc ./data/voc/pinyin.txt -trg_voc ./data/voc/hanzi.txt -test_dir data/test_data/end2end/Chinese/pinyin -result_dir data/test_data/end2end/Chinese/hanzi
CUDA_VISIBLE_DEVICES=4 python translate_file2.py -load_weights weights/Chinese_weights/token_classification_split_Chinese_149_0.013830240316456183 -pkl_dir weights/Chinese_weights -src_voc ./data/voc/pinyin.txt -trg_voc ./data/voc/hanzi.txt -test_dir data/test_data/end2end_chinese/pinyin -result_dir data/test_data/end2end_chinese/pre -tone_filter
CUDA_VISIBLE_DEVICES=4 python translate_file2.py -load_weights weights/Chinese_weights/token_classification_split_Chinese_149_0.013830240316456183 -pkl_dir weights/Chinese_weights -src_voc ./data/voc/pinyin.txt -trg_voc ./data/voc/hanzi.txt -test_dir data/pkl/Chinese_paper/pkl -result_dir data/pkl/Chinese_paper/hanzi -tone_filter
......@@ -3,16 +3,17 @@ import os
import pickle
import itertools
pkl_dir="./data/pkl/pinyin_pkl(1)"
pkl_dir="./data/pkl/Chinese_paper/pkl"
for file in tqdm(os.listdir(pkl_dir)):
for file in tqdm(os.listdir(pkl_dir)[2:3]):
# print("filename:{}".format(file))
contents = pickle.load(open(os.path.join(pkl_dir,file),"rb"))
# contents=random.sample(contents,10)
# print(contents)
# contents=[list(itertools.chain.from_iterable(lines)) for lines in contents]
contents=[list(itertools.chain.from_iterable(lines)) for lines in contents]
print(contents)
# contents=list(itertools.chain.from_iterable(contents))
with open(os.path.join("data/pkl/pinyin_pkl_txt",file[:-4]+".txt"),"w",encoding="utf-8") as f:
f.write(" ".join(contents))
# with open(os.path.join("data/pkl/pinyin_pkl_txt",file[:-4]+".txt"),"w",encoding="utf-8") as f:
# f.write(" ".join(contents))
......@@ -19,6 +19,7 @@ import random
import copy
from pypinyin import pinyin, Style
from pypinyin.style._utils import get_initials, get_finals
import itertools
def get_yunmus(file_path):
......@@ -210,8 +211,12 @@ def main():
for file in os.listdir(opt.test_dir):
print("filename:{}".format(file))
contents = open(os.path.join(opt.test_dir, file)
).read().strip().split('\n')
if ".pkl" in file:
contents = pickle.load(open(os.path.join(opt.test_dir, file),"rb"))
contents=[" ".join(list(itertools.chain.from_iterable(lines))) for lines in contents]
else:
contents = open(os.path.join(opt.test_dir, file)
).read().strip().split('\n')
# contents=random.sample(contents,10)
start = time.time()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment