dev

03b3f93e · 陈晓勇（工程师） · ee03e0ae · 464b8e26 · 03b3f93e · 03b3f93e
Commit 03b3f93e authored Sep 26, 2023 by 陈晓勇（工程师）
Show whitespace changes
Inline Side-by-side

Showing with 86 additions and 12 deletions

.gitignore .gitignore +2 -0

detect_with_ocr.py detect_with_ocr.py +71 -12

main_window.py main_window.py +6 -0

main_window_ui.py main_window_ui.py +7 -0

No files found.
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,4 @@ res/ffmpeg-4.3.1/bin/output.mp4
 res/ffmpeg-4.3.1/bin/qiji_local.mp4
 venv/
 venv37/
+shenming_test
\ No newline at end of file
--- a/detect_with_ocr.py
+++ b/detect_with_ocr.py
@@ -33,6 +33,8 @@ from main_window import MainWindow, Element
 import time
 import numpy as np
 import copy
+
+import math
 # 字幕的上下边界
 up_b, down_b = 0, 0

@@ -48,6 +50,43 @@ normal_speed = 4

 table_index = 0
 ocr_h_map = {}
+def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]:
+    print(">>>>>>video path:" + video_path)
+    video = cv2.VideoCapture(video_path)
+    fps = video.get(cv2.CAP_PROP_FPS)
+    start = int(start_time * fps)
+    video.set(cv2.CAP_PROP_POS_FRAMES, start)
+    frame_num = video.get(cv2.CAP_PROP_FRAME_COUNT)
+    position_map = {}
+    # 分成五等份，1/5 2/5 3/5 4/5
+    for i in range(4):
+        frame_index = math.floor((i+1)*frame_num/5)
+        video.set(cv2.CAP_PROP_POS_FRAMES,frame_index)
+        for j in range(10):#每个阶段取10帧
+            for k in range(120):#隔120帧取一帧
+                _,frame = video.read()
+            _,frame = video.read()
+            res = ocr.ocr(frame, cls=True)
+            for result_item in res:
+                [x1,y1],[x2,y2],[x3,y3],[x4,y4] = result_item[0]
+                text = result_item[1][0]
+                english_text = ''.join(re.findall(r'[A-Za-z]',text))
+                # 跳过英文字幕
+                if(len(english_text)/len(text)<0.1):
+                    print(i+1,j,text)
+                    if len(position_map) == 0:
+                        position_map[(y1,y3)]=1
+                    else:
+                        keys = list(position_map.keys())
+                        for key in keys:
+                            if abs(y1-key[0])<2 and abs(y3-key[1])<2:
+                                position_map[key]+=1
+                                break
+                            else:
+                                position_map[(y1,y3)]=1
+    print(sorted( position_map.items(), key = lambda kv:(kv[1], kv[0])))
+    y1,y2 = sorted( position_map.items(), key = lambda kv:(kv[1], kv[0]))[-1][0]    
+    return y1,y2

 ocr_positions = []

@@ -228,11 +267,25 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
        up_b = ocr_positions[i][0]
        down_b = ocr_positions[i][1]
        height = down_b - up_b
-        img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
-        print(">>>>>into ocr")
-        print(int(up_b - height*0.7))
-        print(int(down_b + height*0.7))
-        # img = img[int(up_b):int(down_b)]
+        # if len(ocr_positions) == 1:
+        #     img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
+        # else:
+        cropped_img = img[int(up_b):int(down_b)]
+        # 定义要添加的上下空白的高度
+        padding_top = height*0.7
+        padding_bottom = height*0.7
+
+        # 计算新图像的高度
+        new_height = cropped_img.shape[0] + padding_top + padding_bottom
+
+        # 创建一个新的空白图像
+        img = np.zeros((int(new_height), cropped_img.shape[1], 3), dtype=np.uint8)
+
+        # 将裁剪后的图像放置在新图像中间
+        start_y = int(padding_top)
+        end_y = start_y + cropped_img.shape[0]
+        img[start_y:end_y, :] = cropped_img
+        # img = img[int(up_b - height*0.2):int(down_b + height*0.2)]
        # 针对低帧率的视频做图像放大处理
        print(height)
        print(up_b)
@@ -240,10 +293,10 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
        print(img.shape)
        if img.shape[1] < 1000:
            img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
-        # global index
-        # cv2.imwrite(f'./cap/cap{index}.png', img)
-        # index = index + 1
-        cv2.imwrite(f'./cap.png', img)
+        global index
+        cv2.imwrite(f'./cap/cap{index}.png', img)
+        index = index + 1
+        print(">>>>>>>>>>>>>>>>>>>>>>>>>>>new log" + str(index - 1))
        res = ocr.ocr(img, cls=True)
        print('--------> res', res)
        sorted(res, key=lambda text: text[0][0][1])
@@ -277,6 +330,7 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
            # conf_thred2 = 0.7
            if (rect[0][1] + rect[1][1])/2/img.shape[0] > 0.5 or (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
                continue
+            # TODO 字幕去重算法改进
            if confidence > conf_thred1 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6:
                subTitle += txt
                conf = max(conf,confidence)
@@ -299,6 +353,7 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
    return None, 0, None


+
 def process_video(video_path: str, begin: float, end: float, book_path: str, sheet_name: str, state=None, mainWindow: MainWindow=None):
    """处理视频，主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务

@@ -353,8 +408,10 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
            if end_time < start_time:
                # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end, 2), lastSubTitle, ''])
                add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end, 3), lastSubTitle, ''],ocr_h=ocr_h)
+                print(">>>>>>subtitle,ocr_h1:" + str(lastSubTitle) + ">>>" + str(ocr_h))
            break
        # 每秒取4帧画面左右
+        # TODO 取帧算法优化
        if cnt % int(fps / 4) == 0:
            # 更新当前工程的检测进度
            if pre_state is None:
@@ -394,7 +451,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she

                # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
                add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
-                print(">>>>>>subtitle,ocr_h:" + str(lastSubTitle) + ">>>" + str(ocr_h))
+                print(">>>>>>subtitle,ocr_h2:" + str(lastSubTitle) + ">>>" + str(ocr_h))
            elif lastSubTitle is not None and subTitle is not None:
                # 两句话连在一起，但是两句话不一样
                if string_similar(lastSubTitle, subTitle) < 0.6:
@@ -412,7 +469,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
                    print(start_time, end_time, lastSubTitle)
                    # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
                    add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
-                    print(">>>>>>subtitle,ocr_h:" + str(lastSubTitle) + ">>>" + str(ocr_h))
+                    print(">>>>>>subtitle,ocr_h3:" + str(lastSubTitle) + ">>>" + str(ocr_h))
                    start_time = end_time
                else:
                    lastSubTitle = subTitle if conf > lastConf else lastSubTitle
@@ -588,5 +645,7 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time


 if __name__ == '__main__':
-    path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
+    # path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
+    path = "C:/Users/Smile/Desktop/accessibility-movie/"
    # print("get_pos:", get_position(path, 0))
+    evaluate_position("C:/Users/AIA/Desktop/1/1.mp4", 0)
--- a/main_window.py
+++ b/main_window.py
@@ -631,6 +631,12 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.action_operate.setEnabled(True)
        self.action_insert_aside_from_now.setEnabled(True)
        self.insert_aside_from_now_btn.setEnabled(True)
+        # from detect_with_ocr import evaluate_position
+        # print(">>>>>>>>>>>>>>>>>>>>>v_path" + path.path()[1:])
+        # y1,y2 = evaluate_position(path.path()[1:], 0)
+        # self.widget.setY(y1)
+        # self.widget_bottom.setY(y2)
+        # print("y1:%d,y2:%d" %(y1,y2))

    def check_ocr_rate(self):
        if self.rate > self.rate_bottom:

--- a/main_window_ui.py
+++ b/main_window_ui.py
@@ -58,8 +58,15 @@ class MyWidget(QWidget):
        # painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
        # painter.drawLine(0, 1, 800, 1)
        # painter.end()
+        print(">>>>>cur_y : " + str(self.y()))
        return self.y()

+    def setY(self, h):
+        print(">>>>>cur_y2 : " + str(self.y()))
+        self.move(0, h)
+
+        
+
    def down(self, mov_len):
        print(">>>>>>>>>>>down" + str(mov_len))
        self.move(0,self.y() + mov_len)