Commit 9b3754fc authored by wux51's avatar wux51

fix:ocr algorithm issue with repetitive subtitle

parent ec38f0a0
...@@ -406,6 +406,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she ...@@ -406,6 +406,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
# res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析 # res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析
res = [] res = []
cnt = 0 cnt = 0
cur_time1 = 0
start_time = 0 start_time = 0
end_time = 0 end_time = 0
video.set(cv2.CAP_PROP_POS_MSEC, begin * 1000) video.set(cv2.CAP_PROP_POS_MSEC, begin * 1000)
...@@ -436,6 +437,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she ...@@ -436,6 +437,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
break break
# 每秒取4帧画面左右 # 每秒取4帧画面左右
# TODO 取帧算法优化 # TODO 取帧算法优化
fl = False
if cnt % int(fps / 4) == 0: if cnt % int(fps / 4) == 0:
# 更新当前工程的检测进度 # 更新当前工程的检测进度
if pre_state is None: if pre_state is None:
...@@ -453,7 +455,30 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she ...@@ -453,7 +455,30 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
subTitle = normalize(subTitle) subTitle = normalize(subTitle)
if len(subTitle) == 0: if len(subTitle) == 0:
subTitle = None subTitle = None
if fl:
if subTitle is None:
continue
else :
if string_similar(lastSubTitle, subTitle) < 0.7:
end_time = cur_time1
res.append([start_time, end_time, lastSubTitle])
if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
print('--------------------------------------------------')
recommend_lens = int((res[-1][0] - last_time) * normal_speed) if len(res) == 1 else int(
(res[-1][0] - res[-2][1]) * normal_speed)
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens],ocr_h)
print(start_time, end_time, lastSubTitle)
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
print(">>>>>>subtitle,ocr_h2:" + str(lastSubTitle) + ">>>" + str(ocr_h))
start_time = cur_time
else :
lastSubTitle = subTitle if conf > lastConf else lastSubTitle
lastConf = max(lastConf, conf)
fl = False
# 第一次找到字幕 # 第一次找到字幕
if lastSubTitle is None and subTitle is not None: if lastSubTitle is None and subTitle is not None:
if cur_ocr_h != None: if cur_ocr_h != None:
...@@ -462,6 +487,9 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she ...@@ -462,6 +487,9 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
# 字幕消失 # 字幕消失
elif lastSubTitle is not None and subTitle is None: elif lastSubTitle is not None and subTitle is None:
fl = False
cur_time1 = cur_time
continue
end_time = cur_time end_time = cur_time
res.append([start_time, end_time, lastSubTitle]) res.append([start_time, end_time, lastSubTitle])
if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1: if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment