Commit 6442322f authored by szr712's avatar szr712

使用新增数据训练

parent c217436c
......@@ -24,14 +24,17 @@ CUDA_VISIBLE_DEVICES=1 python train2.py -src_data data/pinyin_2.txt -trg_data da
CUDA_VISIBLE_DEVICES=1 nohup python train2.py -src_data data/pinyin_2.txt -trg_data data/hanzi_2.txt -src_lang en_core_web_sm -trg_lang fr_core_news_sm -epochs 100 -model_name token_classification
CUDA_VISIBLE_DEVICES=3 python translate_pkl.py -load_weights weights/token_classification/11-09_22:00:55/token_classification_35_0.055335590355098246 -pkl_dir weights/token_classification/11-09_22:00:55 -test_dir data/pkl/test-pkl -result_dir data/pkl/test-pkl-result
CUDA_VISIBLE_DEVICES=3 python translate_pkl.py -load_weights weights/token_classification/11-09_22:00:55/token_classification_35_0.055335590355098246 -pkl_dir weights/token_classification/11-09_22:00:55 -test_dir data/pkl/pinyin_pkl -result_dir data/pkl/pinyin_pkl_result
CUDA_VISIBLE_DEVICES=2 nohup python train_token_classification.py -src_data data/pinyin_split.txt -trg_data data/hanzi_split.txt -src_lang en_core_web_sm -trg_lang fr_core_news_sm -epochs 100 -model_name token_classification_split -src_voc ./data/voc/pinyin.txt -trg_voc ./data/voc/hanzi.txt
CUDA_VISIBLE_DEVICES=1 python translate2.py -load_weights weights/token_classification_split_2/11-19_17:16:18/token_classification_split_2_5_0.05776993067935109 -pkl_dir weights/token_classification_split_2/11-19_17:16:18 -src_voc ./data/voc/pinyin.txt -trg_voc ./data/voc/hanzi.txt
CUDA_VISIBLE_DEVICES=1 python translate2.py -load_weights weights/token_classification_split_4/11-23_22:02:06/token_classification_split_4_25_0.02742394618457183 -pkl_dir weights/token_classification_split_4/11-23_22:02:06 -src_voc ./data/voc/pinyin.txt -trg_voc ./data/voc/hanzi.txt
CUDA_VISIBLE_DEVICES=4 nohup python translate_file2.py -load_weights weights/token_classification_split_3/11-22_21:56:11/token_classification_split_3_25_0.029638311734888702 -pkl_dir weights/token_classification_split_3/11-22_21:56:11 -test_dir data/test_data/pinyin_split -result_dir data/test_data/result_split -src_voc ./data/voc/pinyin.txt -trg_voc ./data/voc/hanzi.txt >log1 2>&1 &
CUDA_VISIBLE_DEVICES=4 nohup python translate_file2.py -load_weights weights/token_classification_split_4/11-23_22:02:06/token_classification_split_4_25_0.02742394618457183 -pkl_dir weights/token_classification_split_4/11-23_22:02:06 -test_dir data/test_data/pinyin_split -result_dir data/test_data/result_split -src_voc ./data/voc/pinyin.txt -trg_voc ./data/voc/hanzi.txt >log1 2>&1 &
CUDA_VISIBLE_DEVICES=3 python eval_model.py -load_weights weights/token_classification_split_3/11-22_21:56:11/token_classification_split_3_1_0.09703897424042225 -pkl_dir weights/token_classification_split_3/11-22_21:56:11 -dev_dir data/dev -src_voc ./data/voc/pinyin.txt -trg_voc ./data/voc/hanzi.txt
CUDA_VISIBLE_DEVICES=1 python eval_model.py -load_weights weights/token_classification_split_4/11-23_22:02:06/token_classification_split_4_1_0.09183966986835003 -pkl_dir weights/token_classification_split_4/11-23_22:02:06 -dev_dir data/dev -src_voc ./data/voc/pinyin.txt -trg_voc ./data/voc/hanzi.txt >log1 2>&1 &
CUDA_VISIBLE_DEVICES=2 nohup python train_token_classification.py -src_data data/pinyin_split.txt -trg_data data/hanzi_split.txt -src_lang en_core_web_sm -trg_lang fr_core_news_sm -epochs 100 -model_name token_classification_split_4 -src_voc ./data/voc/pinyin.txt -trg_voc ./data/voc/hanzi.txt
\ No newline at end of file
CUDA_VISIBLE_DEVICES=6 nohup python train_token_classification.py -src_data data/pinyin_split.txt -trg_data data/hanzi_split.txt -src_lang en_core_web_sm -trg_lang fr_core_news_sm -epochs 100 -model_name token_classification_split_4 -src_voc ./data/voc/pinyin.txt -trg_voc ./data/voc/hanzi.txt
CUDA_VISIBLE_DEVICES=2 nohup python train_token_classification.py -src_data data/pinyin_new_split.txt -trg_data data/hanzi_new_split.txt -src_lang en_core_web_sm -trg_lang fr_core_news_sm -epochs 100 -model_name token_classification_split_new -src_voc ./data/voc/pinyin.txt -trg_voc ./data/voc/hanzi.txt >log1 2>&1 &
......@@ -214,6 +214,6 @@ if __name__ == "__main__":
# with open("./data/voc/yunmu.txt","r",encoding="utf-8") as f:
# yunmus=f.readlines()
# yunmus=[a.strip() for a in yunmus]
build_corpus("./data/train_set_total.txt",
"./data/pinyin_split.txt", "./data/hanzi_split.txt")
build_corpus("./data/train_set_new.txt",
"./data/pinyin_new_split.txt", "./data/hanzi_new_split.txt")
print("Done")
......@@ -4,8 +4,8 @@ from tqdm import tqdm
from build_corpus import split_initials_finals, wenzi2pinyin
hanzi_dir="./data/test_data/hanzi"
pinyin_dir="./data/test_data/pinyin_split"
hanzi_dir="./data/test_data/hanzi_new"
pinyin_dir="./data/test_data/pinyin_split_new"
with open("./data/voc/yunmu.txt","r",encoding="utf-8") as f:
yunmus=f.readlines()
......
......@@ -91,7 +91,7 @@ def main():
i=1
while i<=60:
for model_name in os.listdir(opt.pkl_dir):
if "token_classification_split_3_"+str(i)+"_" in model_name:
if "token_classification_split_new_"+str(i)+"_" in model_name:
print("model_name:{}".format(model_name))
opt.load_weights=os.path.join(opt.pkl_dir,model_name)
......
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment