增加汉字logits拼接函数

25cce464 · szr712 · d8b00dd6 · 25cce464
Commit 25cce464 authored Jan 17, 2022 by szr712
Show whitespace changes
Inline Side-by-side

Showing with 11 additions and 0 deletions

translate_file2.py translate_file2.py +11 -0

No files found.
--- a/translate_file2.py
+++ b/translate_file2.py
@@ -29,6 +29,16 @@ def get_yunmus(file_path):
    return yunmus


+def get_hanzi_logits(logits, SRC, TRG, opt):
+    preds = torch.argmax(logits, dim=-1)
+    result = torch.randn(1, logits.shape[2]).cuda()
+    for i, tok in enumerate(preds[0][:]):
+        if '\u4e00' <= TRG.vocab.itos[tok] <= '\u9fa5':  # 判断是否是中文
+            result = torch.cat((result, logits[0][i:i+1]), dim=0)
+    result = result.unsqueeze(dim=0)
+    return result
+
+
 def get_result(src, model, SRC, TRG, opt):
    src_mask = (src != SRC.vocab.stoi['<pad>']).unsqueeze(-2)
    output = model(src, src_mask)
@@ -80,6 +90,7 @@ def get_result(src, model, SRC, TRG, opt):
        return ''.join(result).replace("_", "").replace(" ", "")

    else:
+        # output=get_hanzi_logits(output, SRC, TRG, opt)
        preds = torch.argmax(output, dim=-1)
        return ''.join([TRG.vocab.itos[tok] for tok in preds[0][:] if tok.item() != 0]).replace("_", "").replace(" ", "")
    # return ' '.join([TRG.vocab.itos[tok] for tok in preds[0][:]])