Commit 72383c7f authored by smile2019's avatar smile2019

feat:中文字幕位置推荐

parent 9e7eef65
......@@ -9,3 +9,4 @@ dist
build
log
product_test
shenming_test
\ No newline at end of file
......@@ -30,6 +30,8 @@ from typing import Tuple, Union
from detect_with_asr import create_sheet, write_to_sheet
from main_window import MainWindow, Element
import math
# 字幕的上下边界
up_b, down_b = 0, 0
......@@ -43,6 +45,42 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, cl
# 正常语速为4字/秒
normal_speed = 4
def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]:
video = cv2.VideoCapture(video_path)
fps = video.get(cv2.CAP_PROP_FPS)
start = int(start_time * fps)
video.set(cv2.CAP_PROP_POS_FRAMES, start)
frame_num = video.get(cv2.CAP_PROP_FRAME_COUNT)
position_map = {}
# 分成五等份,1/5 2/5 3/5 4/5
for i in range(4):
frame_index = math.floor((i+1)*frame_num/5)
video.set(cv2.CAP_PROP_POS_FRAMES,frame_index)
for j in range(10):#每个阶段取10帧
for k in range(120):#隔120帧取一帧
_,frame = video.read()
_,frame = video.read()
res = ocr.ocr(frame, cls=True)
for result_item in res:
[x1,y1],[x2,y2],[x3,y3],[x4,y4] = result_item[0]
text = result_item[1][0]
english_text = ''.join(re.findall(r'[A-Za-z]',text))
# 跳过英文字幕
if(len(english_text)/len(text)<0.1):
print(i+1,j,text)
if len(position_map) == 0:
position_map[(y1,y3)]=1
else:
keys = list(position_map.keys())
for key in keys:
if abs(y1-key[0])<2 and abs(y3-key[1])<2:
position_map[key]+=1
break
else:
position_map[(y1,y3)]=1
print(sorted( position_map.items(), key = lambda kv:(kv[1], kv[0])))
y1,y2 = sorted( position_map.items(), key = lambda kv:(kv[1], kv[0]))[-1][0]
return y1,y2
def get_position(video_path: str, start_time: float, rate: float, rate_bottom: float) -> Tuple[float, float]:
# return (885.0, 989.0)
......@@ -77,6 +115,7 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f
print(up)
# print(down)
return int(up), int(down)
# TODO 现阶段是主窗体设定字幕的开始位置和结束位置,传入该函数。现在希望做成自动检测的?
# while True:
# _, img = video.read()
# # print("img:", img)
......@@ -248,6 +287,7 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
# conf_thred2 = 0.7
if (rect[0][1] + rect[1][1])/2/img.shape[0] > 0.5 or (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
continue
# TODO 字幕去重算法改进
if confidence > conf_thred1 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6:
subTitle += txt
conf = max(conf,confidence)
......@@ -323,6 +363,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end, 3), lastSubTitle, ''])
break
# 每秒取4帧画面左右
# TODO 取帧算法优化
if cnt % int(fps / 4) == 0:
# 更新当前工程的检测进度
if pre_state is None:
......@@ -450,5 +491,6 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
if __name__ == '__main__':
path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
# path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
path = "C:/Users/Smile/Desktop/accessibility-movie/"
# print("get_pos:", get_position(path, 0))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment