feat:中文字幕位置推荐

72383c7f · smile2019 · 9e7eef65 · 72383c7f · 72383c7f
Commit 72383c7f authored Sep 25, 2023 by smile2019
Hide whitespace changes
Inline Side-by-side

Showing with 45 additions and 1 deletion

.gitignore .gitignore +2 -0

detect_with_ocr.py detect_with_ocr.py +43 -1

No files found.
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,4 @@ dist
 build
 log
 product_test
+shenming_test
\ No newline at end of file
--- a/detect_with_ocr.py
+++ b/detect_with_ocr.py
@@ -30,6 +30,8 @@ from typing import Tuple, Union
 from detect_with_asr import create_sheet, write_to_sheet
 from main_window import MainWindow, Element
+import math
 # 字幕的上下边界
 up_b, down_b = 0, 0
@@ -43,6 +45,42 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, cl
 # 正常语速为4字/秒
 normal_speed = 4
+def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]:
+    video = cv2.VideoCapture(video_path)
+    fps = video.get(cv2.CAP_PROP_FPS)
+    start = int(start_time * fps)
+    video.set(cv2.CAP_PROP_POS_FRAMES, start)
+    frame_num = video.get(cv2.CAP_PROP_FRAME_COUNT)
+    position_map = {}
+    # 分成五等份，1/5 2/5 3/5 4/5
+    for i in range(4):
+        frame_index = math.floor((i+1)*frame_num/5)
+        video.set(cv2.CAP_PROP_POS_FRAMES,frame_index)
+        for j in range(10):#每个阶段取10帧
+            for k in range(120):#隔120帧取一帧
+                _,frame = video.read()
+            _,frame = video.read()
+            res = ocr.ocr(frame, cls=True)
+            for result_item in res:
+                [x1,y1],[x2,y2],[x3,y3],[x4,y4] = result_item[0]
+                text = result_item[1][0]
+                english_text = ''.join(re.findall(r'[A-Za-z]',text))
+                # 跳过英文字幕
+                if(len(english_text)/len(text)<0.1):
+                    print(i+1,j,text)
+                    if len(position_map) == 0:
+                        position_map[(y1,y3)]=1
+                    else:
+                        keys = list(position_map.keys())
+                        for key in keys:
+                            if abs(y1-key[0])<2 and abs(y3-key[1])<2:
+                                position_map[key]+=1
+                                break
+                            else:
+                                position_map[(y1,y3)]=1
+    print(sorted( position_map.items(), key = lambda kv:(kv[1], kv[0])))
+    y1,y2 = sorted( position_map.items(), key = lambda kv:(kv[1], kv[0]))[-1][0]    
+    return y1,y2
 def get_position(video_path: str, start_time: float, rate: float, rate_bottom: float) -> Tuple[float, float]:
    # return (885.0, 989.0)
@@ -77,6 +115,7 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f
    print(up)
    # print(down)
    return int(up), int(down)
+# TODO 现阶段是主窗体设定字幕的开始位置和结束位置，传入该函数。现在希望做成自动检测的？
    # while True:
    #     _, img = video.read()
    #     # print("img:", img)
@@ -248,6 +287,7 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
        # conf_thred2 = 0.7
        if (rect[0][1] + rect[1][1])/2/img.shape[0] > 0.5 or (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
            continue
+        # TODO 字幕去重算法改进
        if confidence > conf_thred1 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6:
            subTitle += txt
            conf = max(conf,confidence)
@@ -323,6 +363,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
                add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end, 3), lastSubTitle, ''])
            break
        # 每秒取4帧画面左右
+        # TODO 取帧算法优化
        if cnt % int(fps / 4) == 0:
            # 更新当前工程的检测进度
            if pre_state is None:
@@ -450,5 +491,6 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
 if __name__ == '__main__':
-    path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
+    # path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
+    path = "C:/Users/Smile/Desktop/accessibility-movie/"
    # print("get_pos:", get_position(path, 0))