Commit dda3b840 authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

1. 修改判定字幕的算法; 2.在界面中添加停止按钮,用于终止当前任务,重新开始新的任务; 3.将检测旁白的进度条优化为真进度条;

parent 742031dc
......@@ -73,7 +73,7 @@ def concat_wav(root):
return output_file
def detect_with_asr(video_path, book_path, start_time=0, end_time=-1):
def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None):
# 临时存储各种中间产物的文件夹
tmp_root = './tmp'
if not os.path.exists(tmp_root):
......@@ -102,9 +102,9 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1):
sys.path.append("./PaddlePaddle_DeepSpeech2")
from infer_path import predict_long_audio_with_paddle
table_content = predict_long_audio_with_paddle(audio_path, book_name_xlsx, start_time)
table_content = predict_long_audio_with_paddle(audio_path, start_time, state)
write_to_sheet(book_name_xlsx, sheet_name_xlsx, table_content)
state[0] = 1
# 删除中间文件
# shutil.rmtree(tmp_root)
......
......@@ -14,7 +14,7 @@ up_b, down_b = 0, 0
ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
def get_position(video_path,start_time):
def get_position(video_path, start_time):
"""
根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
:param video_path: 视频存储路径
......@@ -130,7 +130,7 @@ def detect_subtitle(img):
return None
def process_video(video_path, begin, end):
def process_video(video_path, begin, end, state):
"""
处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
:param video_path: 待处理视频的路径
......@@ -156,6 +156,7 @@ def process_video(video_path, begin, end):
cnt += 1
# 每秒取4帧画面左右
if cnt % int(fps / 4) == 0:
state[0] = float(cnt / video.get(cv2.CAP_PROP_FRAME_COUNT)) if state[0] < 0.99 else 0.99
subTitle = detect_subtitle(frame)
# 第一次找到字幕
if lastSubTitle is None and subTitle is not None:
......@@ -215,7 +216,7 @@ def write_excel_xlsx(path, sheet_name, value):
workbook.save(path)
def detect_with_ocr(video_path, book_path, start_time, end_time):
def detect_with_ocr(video_path, book_path, start_time, end_time, state):
book_name_xlsx = book_path
sheet_name_xlsx = "旁白插入位置建议"
......@@ -225,10 +226,11 @@ def detect_with_ocr(video_path, book_path, start_time, end_time):
# 获取并构建输出信息
table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
table_content = table_head + process_video(video_path, start_time, end_time)
table_content = table_head + process_video(video_path, start_time, end_time, state)
# 输出旁白位置推荐信息到表格
write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, table_content)
state[0] = 1
if __name__ == '__main__':
......
......@@ -68,7 +68,6 @@ def detect_movie(video_path, start, interval):
interval = interval * fps
random_number = 50
ans = [False] * 3
print(ans)
for i in range(3):
random_list = random_int_list(start, start + interval, random_number)
start = start + interval
......@@ -82,15 +81,20 @@ def detect_movie(video_path, start, interval):
if ans[i]:
print(random_point)
break
if not ans[i]:
print('{}-{}时间段内未检测到字幕'.format(start, start + interval))
if i == 1 and Counter(ans).most_common(1)[0][0] is False:
break
video.release()
print(ans)
return Counter(ans).most_common(1)[0][0]
if __name__ == '__main__':
video_path = r'D:\heelo\hysxm.mp4'
start_time = time.time()
start = 90
interval = 120
print(detect_movie(video_path, start, interval))
print(time.time() - start_time)
pass
# video_path = r'D:\heelo\hysxm.mp4'
# start_time = time.time()
# start = 90
# interval = 120
# print(detect_movie(video_path, start, interval))
# print(time.time() - start_time)
......@@ -18,7 +18,7 @@ def trans_to_seconds(timepoint):
return time_in_seconds
def detect(video_path, start_time, end_time, book_path):
def detect(video_path, start_time, end_time, book_path, state):
print("开始检测")
if book_path is None:
book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
......@@ -30,9 +30,9 @@ def detect(video_path, start_time, end_time, book_path):
has_subtitle = detect_movie(video_path, start_time, 60)
if has_subtitle:
detect_with_ocr(video_path, book_path, start_time, end_time)
detect_with_ocr(video_path, book_path, start_time, end_time, state)
else:
detect_with_asr(video_path, book_path, start_time, end_time)
detect_with_asr(video_path, book_path, start_time, end_time, state)
if __name__ == '__main__':
......
......@@ -160,6 +160,7 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
for i, text in enumerate(narratages):
wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i])
speech_synthesis(text, wav_path, speed)
time.sleep(1)
print("目前正在处理{}".format(wav_path))
state[0] = float((i + 1) / len(narratages))
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment