Commit 9671a120 authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

1. [modified] 将detect_with_ocr中确认当前帧是否超过限定时间段的判断提前到取帧之前;

2. [modified] 修改detect_with_asr中的旁白区间的写入判断,避免在视频开头处插入旁白推荐字数错误的情况
parent 6cd70d8a
...@@ -53,6 +53,8 @@ def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, st ...@@ -53,6 +53,8 @@ def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, st
texts = '' texts = ''
narratages = [] narratages = []
last_time = 0 last_time = 0
# 已检测到字幕
subtitle_detected = False
# 执行识别 # 执行识别
for i, audio_path in enumerate(audios_path): for i, audio_path in enumerate(audios_path):
print("{}开始处理{}".format(paddle.get_device(), audio_path)) print("{}开始处理{}".format(paddle.get_device(), audio_path))
...@@ -68,8 +70,10 @@ def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, st ...@@ -68,8 +70,10 @@ def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, st
device=paddle.get_device() device=paddle.get_device()
) )
if text: if text:
if i == 0 or (i > 0 and time_stamps[i][0] - last_time >= 1): if not subtitle_detected or (subtitle_detected and time_stamps[i][0] - last_time >= 1):
recommend_lens = int((time_stamps[i][0] - last_time) * normal_speed) recommend_lens = int((time_stamps[i][0] - last_time) * normal_speed) if subtitle_detected else int(
(time_stamps[i][0] + pre_time) * normal_speed)
print("插入旁白,推荐字数为%d" % recommend_lens)
# narratages.append(["", "", "", "插入旁白,推荐字数为%d" % recommend_lens]) # narratages.append(["", "", "", "插入旁白,推荐字数为%d" % recommend_lens])
write_to_sheet(book_name, sheet_name, ["", "", "", "插入旁白,推荐字数为%d" % recommend_lens]) write_to_sheet(book_name, sheet_name, ["", "", "", "插入旁白,推荐字数为%d" % recommend_lens])
# narratages.append([round(time_stamps[i][0] + pre_time, 2), round(time_stamps[i][1] + pre_time, 2), # narratages.append([round(time_stamps[i][0] + pre_time, 2), round(time_stamps[i][1] + pre_time, 2),
...@@ -77,6 +81,7 @@ def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, st ...@@ -77,6 +81,7 @@ def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, st
write_to_sheet(book_name, sheet_name, write_to_sheet(book_name, sheet_name,
[round(time_stamps[i][0] + pre_time, 2), round(time_stamps[i][1] + pre_time, 2), text, '']) [round(time_stamps[i][0] + pre_time, 2), round(time_stamps[i][1] + pre_time, 2), text, ''])
last_time = time_stamps[i][1] last_time = time_stamps[i][1]
subtitle_detected = True
print( print(
"第%d个分割音频 对应时间为%.2f-%.2f 识别结果: %s" % (i, time_stamps[i][0] + pre_time, time_stamps[i][1] + pre_time, text)) "第%d个分割音频 对应时间为%.2f-%.2f 识别结果: %s" % (i, time_stamps[i][0] + pre_time, time_stamps[i][1] + pre_time, text))
state[0] = float((i + 1) / len(audios_path)) if state[0] is None or state[0] < 0.99 else 0.99 state[0] = float((i + 1) / len(audios_path)) if state[0] is None or state[0] < 0.99 else 0.99
......
...@@ -14,7 +14,7 @@ from detect_with_asr import create_sheet, write_to_sheet ...@@ -14,7 +14,7 @@ from detect_with_asr import create_sheet, write_to_sheet
up_b, down_b = 0, 0 up_b, down_b = 0, 0
# 初始化ocr工具 # 初始化ocr工具
ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False) ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False)
# 正常语速为4字/秒 # 正常语速为4字/秒
normal_speed = 4 normal_speed = 4
...@@ -45,7 +45,7 @@ def get_position(video_path, start_time): ...@@ -45,7 +45,7 @@ def get_position(video_path, start_time):
continue continue
img = img[height:] img = img[height:]
res = ocr.ocr(img, cls=True) res = ocr.ocr(img, cls=True)
sorted(res, key=lambda x: x[0][0][1]) sorted(res, key=lambda text: text[0][0][1])
bottom_position = None bottom_position = None
if len(res) == 0: if len(res) == 0:
continue continue
...@@ -86,8 +86,8 @@ def get_position(video_path, start_time): ...@@ -86,8 +86,8 @@ def get_position(video_path, start_time):
if txt_cnt == 3: if txt_cnt == 3:
break break
print(subtitle_position) print(subtitle_position)
up_b, down_b = max(subtitle_position, key=subtitle_position.get) up_bounding, down_bounding = max(subtitle_position, key=subtitle_position.get)
return up_b + height, down_b + height return up_bounding + height, down_bounding + height
def erasePunc(txt): def erasePunc(txt):
...@@ -144,11 +144,9 @@ def detect_subtitle(img): ...@@ -144,11 +144,9 @@ def detect_subtitle(img):
img = img[int(up_b) - 30:int(down_b) + 30] img = img[int(up_b) - 30:int(down_b) + 30]
# img = cv2.resize(img, (int(img.shape[1] * 0.5), int(img.shape[0] * 0.5))) # img = cv2.resize(img, (int(img.shape[1] * 0.5), int(img.shape[0] * 0.5)))
res = ocr.ocr(img, cls=True) res = ocr.ocr(img, cls=True)
sorted(res, key=lambda x: x[0][0][1]) sorted(res, key=lambda text: text[0][0][1])
bottom_position = None
if len(res) == 0: if len(res) == 0:
return None return None
# log = []
possible_txt = [] possible_txt = []
for x in res: for x in res:
rect, (txt, confidence) = x rect, (txt, confidence) = x
...@@ -196,6 +194,16 @@ def process_video(video_path, begin, end, book_path, sheet_name, state): ...@@ -196,6 +194,16 @@ def process_video(video_path, begin, end, book_path, sheet_name, state):
if frame is None: if frame is None:
break break
cnt += 1 cnt += 1
# 判断当前帧是否已超限制
if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 > end:
if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time > 1:
print('--------------------------------------------------')
recommend_lens = int((video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time) * normal_speed)
write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# 判断当前是否有字幕需要被保存下来
if end_time < start_time:
write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end, 2), lastSubTitle, ''])
break
# 每秒取4帧画面左右 # 每秒取4帧画面左右
if cnt % int(fps / 4) == 0: if cnt % int(fps / 4) == 0:
state[0] = float((video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - begin) / (end - begin)) \ state[0] = float((video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - begin) / (end - begin)) \
...@@ -239,18 +247,6 @@ def process_video(video_path, begin, end, book_path, sheet_name, state): ...@@ -239,18 +247,6 @@ def process_video(video_path, begin, end, book_path, sheet_name, state):
continue continue
# 当前字幕与上一段字幕不一样 # 当前字幕与上一段字幕不一样
lastSubTitle = subTitle lastSubTitle = subTitle
if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 > end:
if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time > 1:
print('--------------------------------------------------')
# 还没有字幕被分析出来
# if len(res) == 0:
recommend_lens = int((video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time) * normal_speed)
# else:
# recommend_lens = int(res[-1][0] * normal_speed) if len(res) == 1 else int(
# (res[-1][0] - res[-2][1]) * normal_speed)
# narratage_recommend.append(['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
break
def detect_with_ocr(video_path, book_path, start_time, end_time, state): def detect_with_ocr(video_path, book_path, start_time, end_time, state):
......
...@@ -33,7 +33,7 @@ def create_detail_day() -> str: ...@@ -33,7 +33,7 @@ def create_detail_day() -> str:
return daytime return daytime
def make_print_to_file(path='./'): def make_print_to_file(path: str = './'):
"""将print的内容输出到log文件夹中 """将print的内容输出到log文件夹中
:param path:设置的log文件夹路径 :param path:设置的log文件夹路径
...@@ -132,7 +132,7 @@ def find_save_file(): ...@@ -132,7 +132,7 @@ def find_save_file():
outputFilePath.set(book_path) outputFilePath.set(book_path)
def trans_to_seconds(timePoint): def trans_to_seconds(timePoint: str) -> float:
"""将用户输入的时间字符串转换为秒数 """将用户输入的时间字符串转换为秒数
:param timePoint: 时间字符串 :param timePoint: 时间字符串
...@@ -147,7 +147,7 @@ def trans_to_seconds(timePoint): ...@@ -147,7 +147,7 @@ def trans_to_seconds(timePoint):
return time_in_seconds return time_in_seconds
def check_timePoint(timePoint) -> bool: def check_timePoint(timePoint: str) -> bool:
"""检查时间字符串格式是否正确 """检查时间字符串格式是否正确
:param timePoint: 时间字符串 :param timePoint: 时间字符串
...@@ -179,7 +179,7 @@ def check_timePoint(timePoint) -> bool: ...@@ -179,7 +179,7 @@ def check_timePoint(timePoint) -> bool:
return False return False
def start_process(p, p_label, state, intervals=100): def start_process(p, p_label, state: list, intervals: int = 100):
"""启动进度条 """启动进度条
:param p: 进度条组件 :param p: 进度条组件
...@@ -433,7 +433,7 @@ def start_synthesis(): ...@@ -433,7 +433,7 @@ def start_synthesis():
messagebox.showwarning("警告", "请选择音频存放路径") messagebox.showwarning("警告", "请选择音频存放路径")
return return
elif not os.path.exists(audio_dir): elif not os.path.exists(audio_dir):
messagebox.showwarning("警告", "当前音频存放路径有误,请检查一遍") messagebox.showwarning("警告", "当前音频存放路径有误,请检查一遍")
return return
if len(caption_path) == 0: if len(caption_path) == 0:
messagebox.showwarning("警告", "请选择字幕文件存放路径") messagebox.showwarning("警告", "请选择字幕文件存放路径")
......
...@@ -3,7 +3,7 @@ import os ...@@ -3,7 +3,7 @@ import os
import argparse import argparse
import time import time
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, ResultReason from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, ResultReason
from azure.cognitiveservices.speech.audio import AudioOutputConfig from azure.cognitiveservices.speech.audio import AudioOutputConfig
import openpyxl import openpyxl
...@@ -259,8 +259,6 @@ def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state ...@@ -259,8 +259,6 @@ def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state
adjust_volume(origin_wav_path, start_timestamp, end_timestamp) adjust_volume(origin_wav_path, start_timestamp, end_timestamp)
# 将旁白混入原音频 # 将旁白混入原音频
mix_speech(adjusted_wav_path, narratage_paths, start_timestamp) mix_speech(adjusted_wav_path, narratage_paths, start_timestamp)
if state is not None:
state[0] = 1.00
# 删除临时语音文件、提取出来的原视频音频以及调整后的视频音频 # 删除临时语音文件、提取出来的原视频音频以及调整后的视频音频
if os.path.exists(tmp_file): if os.path.exists(tmp_file):
...@@ -269,6 +267,9 @@ def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state ...@@ -269,6 +267,9 @@ def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state
os.remove(origin_wav_path) os.remove(origin_wav_path)
os.remove(adjusted_wav_path) os.remove(adjusted_wav_path)
if state is not None:
state[0] = 1.00
if __name__ == '__main__': if __name__ == '__main__':
pass pass
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment