Commit dda3b840 authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

1. 修改判定字幕的算法; 2.在界面中添加停止按钮,用于终止当前任务,重新开始新的任务; 3.将检测旁白的进度条优化为真进度条;

parent 742031dc
...@@ -73,7 +73,7 @@ def concat_wav(root): ...@@ -73,7 +73,7 @@ def concat_wav(root):
return output_file return output_file
def detect_with_asr(video_path, book_path, start_time=0, end_time=-1): def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None):
# 临时存储各种中间产物的文件夹 # 临时存储各种中间产物的文件夹
tmp_root = './tmp' tmp_root = './tmp'
if not os.path.exists(tmp_root): if not os.path.exists(tmp_root):
...@@ -102,9 +102,9 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1): ...@@ -102,9 +102,9 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1):
sys.path.append("./PaddlePaddle_DeepSpeech2") sys.path.append("./PaddlePaddle_DeepSpeech2")
from infer_path import predict_long_audio_with_paddle from infer_path import predict_long_audio_with_paddle
table_content = predict_long_audio_with_paddle(audio_path, book_name_xlsx, start_time) table_content = predict_long_audio_with_paddle(audio_path, start_time, state)
write_to_sheet(book_name_xlsx, sheet_name_xlsx, table_content) write_to_sheet(book_name_xlsx, sheet_name_xlsx, table_content)
state[0] = 1
# 删除中间文件 # 删除中间文件
# shutil.rmtree(tmp_root) # shutil.rmtree(tmp_root)
......
...@@ -14,7 +14,7 @@ up_b, down_b = 0, 0 ...@@ -14,7 +14,7 @@ up_b, down_b = 0, 0
ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False) ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
def get_position(video_path,start_time): def get_position(video_path, start_time):
""" """
根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别 根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
:param video_path: 视频存储路径 :param video_path: 视频存储路径
...@@ -130,7 +130,7 @@ def detect_subtitle(img): ...@@ -130,7 +130,7 @@ def detect_subtitle(img):
return None return None
def process_video(video_path, begin, end): def process_video(video_path, begin, end, state):
""" """
处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务 处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
:param video_path: 待处理视频的路径 :param video_path: 待处理视频的路径
...@@ -156,6 +156,7 @@ def process_video(video_path, begin, end): ...@@ -156,6 +156,7 @@ def process_video(video_path, begin, end):
cnt += 1 cnt += 1
# 每秒取4帧画面左右 # 每秒取4帧画面左右
if cnt % int(fps / 4) == 0: if cnt % int(fps / 4) == 0:
state[0] = float(cnt / video.get(cv2.CAP_PROP_FRAME_COUNT)) if state[0] < 0.99 else 0.99
subTitle = detect_subtitle(frame) subTitle = detect_subtitle(frame)
# 第一次找到字幕 # 第一次找到字幕
if lastSubTitle is None and subTitle is not None: if lastSubTitle is None and subTitle is not None:
...@@ -215,7 +216,7 @@ def write_excel_xlsx(path, sheet_name, value): ...@@ -215,7 +216,7 @@ def write_excel_xlsx(path, sheet_name, value):
workbook.save(path) workbook.save(path)
def detect_with_ocr(video_path, book_path, start_time, end_time): def detect_with_ocr(video_path, book_path, start_time, end_time, state):
book_name_xlsx = book_path book_name_xlsx = book_path
sheet_name_xlsx = "旁白插入位置建议" sheet_name_xlsx = "旁白插入位置建议"
...@@ -225,10 +226,11 @@ def detect_with_ocr(video_path, book_path, start_time, end_time): ...@@ -225,10 +226,11 @@ def detect_with_ocr(video_path, book_path, start_time, end_time):
# 获取并构建输出信息 # 获取并构建输出信息
table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']] table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
table_content = table_head + process_video(video_path, start_time, end_time) table_content = table_head + process_video(video_path, start_time, end_time, state)
# 输出旁白位置推荐信息到表格 # 输出旁白位置推荐信息到表格
write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, table_content) write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, table_content)
state[0] = 1
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -68,7 +68,6 @@ def detect_movie(video_path, start, interval): ...@@ -68,7 +68,6 @@ def detect_movie(video_path, start, interval):
interval = interval * fps interval = interval * fps
random_number = 50 random_number = 50
ans = [False] * 3 ans = [False] * 3
print(ans)
for i in range(3): for i in range(3):
random_list = random_int_list(start, start + interval, random_number) random_list = random_int_list(start, start + interval, random_number)
start = start + interval start = start + interval
...@@ -82,15 +81,20 @@ def detect_movie(video_path, start, interval): ...@@ -82,15 +81,20 @@ def detect_movie(video_path, start, interval):
if ans[i]: if ans[i]:
print(random_point) print(random_point)
break break
if not ans[i]:
print('{}-{}时间段内未检测到字幕'.format(start, start + interval))
if i == 1 and Counter(ans).most_common(1)[0][0] is False:
break
video.release() video.release()
print(ans) print(ans)
return Counter(ans).most_common(1)[0][0] return Counter(ans).most_common(1)[0][0]
if __name__ == '__main__': if __name__ == '__main__':
video_path = r'D:\heelo\hysxm.mp4' pass
start_time = time.time() # video_path = r'D:\heelo\hysxm.mp4'
start = 90 # start_time = time.time()
interval = 120 # start = 90
print(detect_movie(video_path, start, interval)) # interval = 120
print(time.time() - start_time) # print(detect_movie(video_path, start, interval))
# print(time.time() - start_time)
...@@ -18,7 +18,7 @@ def trans_to_seconds(timepoint): ...@@ -18,7 +18,7 @@ def trans_to_seconds(timepoint):
return time_in_seconds return time_in_seconds
def detect(video_path, start_time, end_time, book_path): def detect(video_path, start_time, end_time, book_path, state):
print("开始检测") print("开始检测")
if book_path is None: if book_path is None:
book_path = os.path.basename(video_path).split('.')[0] + ".xlsx" book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
...@@ -30,9 +30,9 @@ def detect(video_path, start_time, end_time, book_path): ...@@ -30,9 +30,9 @@ def detect(video_path, start_time, end_time, book_path):
has_subtitle = detect_movie(video_path, start_time, 60) has_subtitle = detect_movie(video_path, start_time, 60)
if has_subtitle: if has_subtitle:
detect_with_ocr(video_path, book_path, start_time, end_time) detect_with_ocr(video_path, book_path, start_time, end_time, state)
else: else:
detect_with_asr(video_path, book_path, start_time, end_time) detect_with_asr(video_path, book_path, start_time, end_time, state)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -160,6 +160,7 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state): ...@@ -160,6 +160,7 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
for i, text in enumerate(narratages): for i, text in enumerate(narratages):
wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i]) wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i])
speech_synthesis(text, wav_path, speed) speech_synthesis(text, wav_path, speed)
time.sleep(1)
print("目前正在处理{}".format(wav_path)) print("目前正在处理{}".format(wav_path))
state[0] = float((i + 1) / len(narratages)) state[0] = float((i + 1) / len(narratages))
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment