Commit 03b3f93e authored by 陈晓勇(工程师)'s avatar 陈晓勇(工程师)

dev

parents ee03e0ae 464b8e26
......@@ -13,3 +13,4 @@ res/ffmpeg-4.3.1/bin/output.mp4
res/ffmpeg-4.3.1/bin/qiji_local.mp4
venv/
venv37/
shenming_test
\ No newline at end of file
......@@ -33,6 +33,8 @@ from main_window import MainWindow, Element
import time
import numpy as np
import copy
import math
# 字幕的上下边界
up_b, down_b = 0, 0
......@@ -48,6 +50,43 @@ normal_speed = 4
table_index = 0
ocr_h_map = {}
def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]:
print(">>>>>>video path:" + video_path)
video = cv2.VideoCapture(video_path)
fps = video.get(cv2.CAP_PROP_FPS)
start = int(start_time * fps)
video.set(cv2.CAP_PROP_POS_FRAMES, start)
frame_num = video.get(cv2.CAP_PROP_FRAME_COUNT)
position_map = {}
# 分成五等份,1/5 2/5 3/5 4/5
for i in range(4):
frame_index = math.floor((i+1)*frame_num/5)
video.set(cv2.CAP_PROP_POS_FRAMES,frame_index)
for j in range(10):#每个阶段取10帧
for k in range(120):#隔120帧取一帧
_,frame = video.read()
_,frame = video.read()
res = ocr.ocr(frame, cls=True)
for result_item in res:
[x1,y1],[x2,y2],[x3,y3],[x4,y4] = result_item[0]
text = result_item[1][0]
english_text = ''.join(re.findall(r'[A-Za-z]',text))
# 跳过英文字幕
if(len(english_text)/len(text)<0.1):
print(i+1,j,text)
if len(position_map) == 0:
position_map[(y1,y3)]=1
else:
keys = list(position_map.keys())
for key in keys:
if abs(y1-key[0])<2 and abs(y3-key[1])<2:
position_map[key]+=1
break
else:
position_map[(y1,y3)]=1
print(sorted( position_map.items(), key = lambda kv:(kv[1], kv[0])))
y1,y2 = sorted( position_map.items(), key = lambda kv:(kv[1], kv[0]))[-1][0]
return y1,y2
ocr_positions = []
......@@ -228,11 +267,25 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
up_b = ocr_positions[i][0]
down_b = ocr_positions[i][1]
height = down_b - up_b
img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
print(">>>>>into ocr")
print(int(up_b - height*0.7))
print(int(down_b + height*0.7))
# img = img[int(up_b):int(down_b)]
# if len(ocr_positions) == 1:
# img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
# else:
cropped_img = img[int(up_b):int(down_b)]
# 定义要添加的上下空白的高度
padding_top = height*0.7
padding_bottom = height*0.7
# 计算新图像的高度
new_height = cropped_img.shape[0] + padding_top + padding_bottom
# 创建一个新的空白图像
img = np.zeros((int(new_height), cropped_img.shape[1], 3), dtype=np.uint8)
# 将裁剪后的图像放置在新图像中间
start_y = int(padding_top)
end_y = start_y + cropped_img.shape[0]
img[start_y:end_y, :] = cropped_img
# img = img[int(up_b - height*0.2):int(down_b + height*0.2)]
# 针对低帧率的视频做图像放大处理
print(height)
print(up_b)
......@@ -240,10 +293,10 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
print(img.shape)
if img.shape[1] < 1000:
img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
# global index
# cv2.imwrite(f'./cap/cap{index}.png', img)
# index = index + 1
cv2.imwrite(f'./cap.png', img)
global index
cv2.imwrite(f'./cap/cap{index}.png', img)
index = index + 1
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>new log" + str(index - 1))
res = ocr.ocr(img, cls=True)
print('--------> res', res)
sorted(res, key=lambda text: text[0][0][1])
......@@ -277,6 +330,7 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
# conf_thred2 = 0.7
if (rect[0][1] + rect[1][1])/2/img.shape[0] > 0.5 or (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
continue
# TODO 字幕去重算法改进
if confidence > conf_thred1 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6:
subTitle += txt
conf = max(conf,confidence)
......@@ -299,6 +353,7 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
return None, 0, None
def process_video(video_path: str, begin: float, end: float, book_path: str, sheet_name: str, state=None, mainWindow: MainWindow=None):
"""处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
......@@ -353,8 +408,10 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
if end_time < start_time:
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end, 2), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end, 3), lastSubTitle, ''],ocr_h=ocr_h)
print(">>>>>>subtitle,ocr_h1:" + str(lastSubTitle) + ">>>" + str(ocr_h))
break
# 每秒取4帧画面左右
# TODO 取帧算法优化
if cnt % int(fps / 4) == 0:
# 更新当前工程的检测进度
if pre_state is None:
......@@ -394,7 +451,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
print(">>>>>>subtitle,ocr_h:" + str(lastSubTitle) + ">>>" + str(ocr_h))
print(">>>>>>subtitle,ocr_h2:" + str(lastSubTitle) + ">>>" + str(ocr_h))
elif lastSubTitle is not None and subTitle is not None:
# 两句话连在一起,但是两句话不一样
if string_similar(lastSubTitle, subTitle) < 0.6:
......@@ -412,7 +469,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
print(start_time, end_time, lastSubTitle)
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
print(">>>>>>subtitle,ocr_h:" + str(lastSubTitle) + ">>>" + str(ocr_h))
print(">>>>>>subtitle,ocr_h3:" + str(lastSubTitle) + ">>>" + str(ocr_h))
start_time = end_time
else:
lastSubTitle = subTitle if conf > lastConf else lastSubTitle
......@@ -588,5 +645,7 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
if __name__ == '__main__':
path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
# path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
path = "C:/Users/Smile/Desktop/accessibility-movie/"
# print("get_pos:", get_position(path, 0))
evaluate_position("C:/Users/AIA/Desktop/1/1.mp4", 0)
......@@ -631,6 +631,12 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.action_operate.setEnabled(True)
self.action_insert_aside_from_now.setEnabled(True)
self.insert_aside_from_now_btn.setEnabled(True)
# from detect_with_ocr import evaluate_position
# print(">>>>>>>>>>>>>>>>>>>>>v_path" + path.path()[1:])
# y1,y2 = evaluate_position(path.path()[1:], 0)
# self.widget.setY(y1)
# self.widget_bottom.setY(y2)
# print("y1:%d,y2:%d" %(y1,y2))
def check_ocr_rate(self):
if self.rate > self.rate_bottom:
......
......@@ -58,8 +58,15 @@ class MyWidget(QWidget):
# painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
# painter.drawLine(0, 1, 800, 1)
# painter.end()
print(">>>>>cur_y : " + str(self.y()))
return self.y()
def setY(self, h):
print(">>>>>cur_y2 : " + str(self.y()))
self.move(0, h)
def down(self, mov_len):
print(">>>>>>>>>>>down" + str(mov_len))
self.move(0,self.y() + mov_len)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment