more ocr test

parent 7db705f9
......@@ -30,6 +30,9 @@ from typing import Tuple, Union
from utils import reverse_time_to_seconds
from detect_with_asr import create_sheet, write_to_sheet
from main_window import MainWindow, Element
import time
import numpy as np
import copy
# 字幕的上下边界
up_b, down_b = 0, 0
......@@ -43,8 +46,12 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, cl
# 正常语速为4字/秒
normal_speed = 4
table_index = 0
ocr_h_map = {}
ocr_positions = []
def get_position(video_path: str, start_time: float, rate: float, rate_bottom: float) -> Tuple[float, float]:
def get_position(video_path: str, start_time: float, ocr_ranges) -> Tuple[float, float]:
# return (885.0, 989.0)
"""根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
......@@ -55,28 +62,36 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f
Returns:
Tuple[float, float]: 字幕在整个画面中的上下边界位置
"""
print(">>>>>>>>>>open")
print("video_path:", video_path)
video = cv2.VideoCapture(video_path)
# print("video:", video)
subtitle_position = {}
fps = video.get(cv2.CAP_PROP_FPS)
start = int(start_time * fps)
cnt = 0
txt_cnt = 0
pre_txt = None
video.set(cv2.CAP_PROP_POS_FRAMES, start)
# height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
print(cv2.CAP_PROP_FRAME_HEIGHT)
print(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate))
down = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate_bottom))
print(">>>>get posti")
print(ocr_ranges)
for i in range(len(ocr_ranges)):
rate = ocr_ranges[i][0]
rate_bottom = ocr_ranges[i][1]
print(">>>>>>>>>>open")
print("video_path:", video_path)
video = cv2.VideoCapture(video_path)
# print("video:", video)
subtitle_position = {}
fps = video.get(cv2.CAP_PROP_FPS)
start = int(start_time * fps)
cnt = 0
txt_cnt = 0
pre_txt = None
video.set(cv2.CAP_PROP_POS_FRAMES, start)
# height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
print(cv2.CAP_PROP_FRAME_HEIGHT)
print(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate))
down = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate_bottom))
global ocr_positions
print("add positions")
print(up)
ocr_positions.insert(0,[up, down])
# down = up + 20
# down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
print(up)
# print(down)
return int(up), int(down)
# print(up)
# # print(down)
# return int(up), int(down)
# while True:
# _, img = video.read()
# # print("img:", img)
......@@ -193,8 +208,8 @@ def normalize(text: str) -> str:
text = text + ')'
return text
def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
index = 0
def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
"""检测当前画面得到字幕信息
Args:
......@@ -206,67 +221,82 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
subTitle = ''
# up_b = 276
# down_b = 297
height = down_b - up_b
img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
# 针对低帧率的视频做图像放大处理
print(height)
print(up_b)
print(down_b)
print(img.shape)
if img.shape[1] < 1000:
img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
cv2.imwrite('./cap.png', img)
res = ocr.ocr(img, cls=True)
print('--------> res', res)
sorted(res, key=lambda text: text[0][0][1])
sorted(res, key=lambda text: text[0][0][0])
if len(res) == 0:
return None, 0
possible_txt = []
conf = 0
print('res --------->', res)
for x in res:
# cv2.imshow("cut", img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
rect, (txt, confidence) = x
font_size = rect[2][1] - rect[0][1]
mid = (rect[0][0] + rect[1][0]) / 2
gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
# log.append("文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}".format(txt, confidence, mid / img.shape[1], gradient,
# font_size)) 置信度>0.7 & 斜率<0.1 & 字幕偏移量<=25 & 字幕中心在画面宽的0.4-0.6之间
print("文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}".format(txt, confidence, mid / img.shape[1], gradient, font_size))
print("字体大小差距: {}", format(height - font_size))
print("高度中心:{}".format((rect[0][1] + rect[1][1])/2/img.shape[0]))
conf_thred1 = 0.7
conf_thred2 = 0.85
# conf_thred1 = 0.1
# conf_thred2 = 0.4
# conf_thred1 = 0.5
# conf_thred2 = 0.7
if (rect[0][1] + rect[1][1])/2/img.shape[0] > 0.5 or (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
global ocr_positions
h = None
for i in range(len(ocr_positions)):
img = copy.deepcopy(org_img)
up_b = ocr_positions[i][0]
down_b = ocr_positions[i][1]
height = down_b - up_b
img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
print(">>>>>into ocr")
print(int(up_b - height*0.7))
print(int(down_b + height*0.7))
# img = img[int(up_b):int(down_b)]
# 针对低帧率的视频做图像放大处理
print(height)
print(up_b)
print(down_b)
print(img.shape)
if img.shape[1] < 1000:
img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
# global index
# cv2.imwrite(f'./cap/cap{index}.png', img)
# index = index + 1
cv2.imwrite(f'./cap.png', img)
res = ocr.ocr(img, cls=True)
print('--------> res', res)
sorted(res, key=lambda text: text[0][0][1])
sorted(res, key=lambda text: text[0][0][0])
if len(res) == 0:
continue
if confidence > conf_thred1 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6:
subTitle += txt
conf = max(conf,confidence)
# possible_txt.append([txt, mid/img.shape[1]])
possible_txt.append(txt)
# 如果字幕在一行中分为两个(或以上)对话文本
elif confidence > conf_thred2 and gradient < 0.1:
if 0.3 < mid / img.shape[1] < 0.4 or 0.6 < mid / img.shape[1] < 0.7:
# return None, 0, None
possible_txt = []
conf = 0
print('res --------->', res)
for x in res:
# cv2.imshow("cut", img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
rect, (txt, confidence) = x
font_size = rect[2][1] - rect[0][1]
mid = (rect[0][0] + rect[1][0]) / 2
gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
# log.append("文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}".format(txt, confidence, mid / img.shape[1], gradient,
# font_size)) 置信度>0.7 & 斜率<0.1 & 字幕偏移量<=25 & 字幕中心在画面宽的0.4-0.6之间
print("文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}".format(txt, confidence, mid / img.shape[1], gradient, font_size))
print("字体大小差距: {}", format(height - font_size))
print("高度中心:{}".format((rect[0][1] + rect[1][1])/2/img.shape[0]))
if h == None:
h = font_size
conf_thred1 = 0.7
conf_thred2 = 0.85
# conf_thred1 = 0.1
# conf_thred2 = 0.4
# conf_thred1 = 0.5
# conf_thred2 = 0.7
if (rect[0][1] + rect[1][1])/2/img.shape[0] > 0.5 or (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
continue
if confidence > conf_thred1 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6:
subTitle += txt
conf = max(conf,confidence)
# possible_txt.append([txt, mid/img.shape[1]])
possible_txt.append(txt)
conf = max(conf, confidence)
# sorted(possible_txt, key=lambda pos : pos[1])
# print(possible_txt)
if len(possible_txt) >= 2:
# subTitle = ' '.join([x[0] for x in possible_txt])
subTitle = ' '.join(possible_txt)
print(subTitle, conf)
# 如果字幕在一行中分为两个(或以上)对话文本
elif confidence > conf_thred2 and gradient < 0.1:
if 0.3 < mid / img.shape[1] < 0.4 or 0.6 < mid / img.shape[1] < 0.7:
# possible_txt.append([txt, mid/img.shape[1]])
possible_txt.append(txt)
conf = max(conf, confidence)
# sorted(possible_txt, key=lambda pos : pos[1])
# print(possible_txt)
if len(possible_txt) >= 2:
# subTitle = ' '.join([x[0] for x in possible_txt])
subTitle = ' '.join(possible_txt)
print(subTitle, conf)
if len(subTitle) > 0:
return subTitle, conf
return None, 0
return subTitle, conf, h
return None, 0, None
def process_video(video_path: str, begin: float, end: float, book_path: str, sheet_name: str, state=None, mainWindow: MainWindow=None):
......@@ -302,12 +332,14 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
end_time = 0
video.set(cv2.CAP_PROP_POS_MSEC, begin * 1000)
pre_state = state[0]
ocr_h = None
while True:
_, frame = video.read()
if frame is None:
break
cnt += 1
cur_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
# 判断当前帧是否已超限制
if cur_time > end:
if cur_time - end_time > 1:
......@@ -320,7 +352,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
# 判断当前是否有字幕需要被保存下来
if end_time < start_time:
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end, 2), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end, 3), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end, 3), lastSubTitle, ''],ocr_h=ocr_h)
break
# 每秒取4帧画面左右
if cnt % int(fps / 4) == 0:
......@@ -332,7 +364,9 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
mainWindow.projectContext.nd_process = state[0]
mainWindow.projectContext.last_time = cur_time
subTitle, conf = detect_subtitle(frame)
subTitle, conf, cur_ocr_h = detect_subtitle(frame)
if subTitle is not None:
subTitle = normalize(subTitle)
......@@ -341,6 +375,8 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
# 第一次找到字幕
if lastSubTitle is None and subTitle is not None:
if cur_ocr_h != None:
ocr_h = cur_ocr_h
start_time = cur_time
# 字幕消失
......@@ -353,14 +389,17 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
(res[-1][0] - res[-2][1]) * normal_speed)
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens])
add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens],ocr_h)
print(start_time, end_time, lastSubTitle)
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
print(">>>>>>subtitle,ocr_h:" + str(lastSubTitle) + ">>>" + str(ocr_h))
elif lastSubTitle is not None and subTitle is not None:
# 两句话连在一起,但是两句话不一样
if string_similar(lastSubTitle, subTitle) < 0.6:
if cur_ocr_h != None:
ocr_h = cur_ocr_h
end_time = cur_time
res.append([start_time, end_time, lastSubTitle])
if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
......@@ -369,10 +408,11 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
(res[-1][0] - res[-2][1]) * normal_speed)
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens])
add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens],ocr_h)
print(start_time, end_time, lastSubTitle)
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
print(">>>>>>subtitle,ocr_h:" + str(lastSubTitle) + ">>>" + str(ocr_h))
start_time = end_time
else:
lastSubTitle = subTitle if conf > lastConf else lastSubTitle
......@@ -380,13 +420,69 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
# 当前字幕与上一段字幕不一样
lastSubTitle = subTitle
lastConf = conf
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>end 1")
# print(ocr_h_map)
def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
def process_err_ocr(mainWindow):
# if 1==1:
# return
for i in range (len(mainWindow.projectContext.all_elements)):
if mainWindow.projectContext.all_elements[i].subtitle != None and mainWindow.projectContext.all_elements[i].subtitle != "":
ocr_h_map[i] = mainWindow.projectContext.all_elements[i].ocr_h
print(ocr_h_map)
data = list(ocr_h_map.values())
print(">>>>values")
print(ocr_h_map.values())
table_indexs = list(ocr_h_map.keys())
print(table_indexs)
# 计算均值和标准差
mean = np.mean(data)
std_dev = np.std(data)
# 定义阈值(例如,可以选择 2 倍标准差作为阈值)
threshold = 2.7 * std_dev
print(mean)
print(threshold)
rm_list = []
for i, x in enumerate(data):
if abs(x - mean) > threshold:
rm_list.append(i)
print(rm_list)
# while(mainWindow.refresh_tab_timer.isActive()):
# time.sleep(1)
print(len(mainWindow.projectContext.all_elements))
for i in range(len(rm_list)):
try:
print(">>>>>>>>>will rm" + str(table_indexs[rm_list[i]] + 1 - i) + ", subtitle:" + mainWindow.projectContext.all_elements[table_indexs[rm_list[i]] - i].subtitle)
print(table_indexs[rm_list[i]])
mainWindow.del_line_operation_slot(row = table_indexs[rm_list[i]] + 1 - i, show_msg_flag = False)
except Exception as e:
print(">>>>>>>>>>>>>>>>>>>>>>del err")
print(e)
# mainWindow.refresh_all_tab_slot()
def add_to_list(mainWindow: MainWindow, element_type: str, li: list, ocr_h : int = None):
# 默认使用配置文件中的语速
speed = mainWindow.projectContext.speaker_speed
aside_head_time = float(reverse_time_to_seconds(mainWindow.aside_head_time)) if mainWindow.aside_head_time != None else float(0)
st_time_sec, ed_time_sec, subtitle, suggest = li
# global table_index
# if ocr_h != None and element_type == "字幕":
# index = table_index
# ocr_h_map[index] = ocr_h
# print(">>>>>>>>>>>>>>>>>>ocr H map:" + str(index) + ",subtitle:" + subtitle)
# print(ocr_h_map)
print(">>>>>>>>start time:")
print(aside_head_time)
print(st_time_sec)
......@@ -394,11 +490,12 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
print(">>>>>>need del")
print(st_time_sec)
if not mainWindow.add_head_aside:
new_element = Element('0.00', "", "", "0/100", "",speed)
new_element = Element('0.00', "", "", "0/100", "",speed,ocr_h)
mainWindow.projectContext.aside_list.append(new_element)
mainWindow.projectContext.all_elements.append(mainWindow.projectContext.aside_list[-1])
mainWindow.last_aside_index = len(mainWindow.projectContext.all_elements) - 1
mainWindow.add_head_aside = True
# table_index = table_index + 1
return
st_time_sec, ed_time_sec = str(st_time_sec), str(ed_time_sec)
......@@ -406,7 +503,7 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
aside = ""
i = len(mainWindow.projectContext.all_elements)
if element_type == "字幕":
new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed)
new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed,ocr_h)
new_element.print_self()
if mainWindow.last_aside_index != None and mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec == "" and new_element.ed_time_sec != "":
mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec = new_element.st_time_sec
......@@ -414,9 +511,11 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
# print(">>>>>>>>>>>remove short aside")
mainWindow.projectContext.aside_list.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index])
mainWindow.projectContext.all_elements.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index])
# table_index = table_index - 1
mainWindow.last_aside_index = None
mainWindow.projectContext.subtitle_list.append(new_element)
mainWindow.projectContext.all_elements.append(mainWindow.projectContext.subtitle_list[-1])
# table_index = table_index + 1
else:
if i == 0:
st_time_sec = "0.01"
......@@ -427,7 +526,7 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
# 因为暂时没有用到ed_time_sec,所以直接赋值空吧
ed_time_sec = ""
new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed)
new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed, ocr_h)
new_element.print_self()
if mainWindow.last_aside_index != None and mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec == "" and new_element.ed_time_sec != "":
mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec = new_element.st_time_sec
......@@ -435,13 +534,16 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
# print(">>>>>>>>>>>remove short aside")
mainWindow.projectContext.aside_list.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index])
mainWindow.projectContext.all_elements.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index])
# table_index = table_index - 1
mainWindow.last_aside_index = None
new_element.suggest = "0/" + new_element.suggest
if (st_time_sec != None and st_time_sec != "" and aside_head_time > float(st_time_sec)):
return
mainWindow.projectContext.aside_list.append(new_element)
mainWindow.projectContext.all_elements.append(mainWindow.projectContext.aside_list[-1])
# table_index = table_index + 1
mainWindow.last_aside_index = len(mainWindow.projectContext.all_elements) - 1
def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time: float, state=None, mainWindow: MainWindow=None):
......@@ -469,7 +571,11 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
up_b, down_b = context.caption_boundings[0], context.caption_boundings[1]
else:
# 此处start_time + 300是为了节省用户调整视频开始时间的功夫(强行跳过前5分钟)
up_b, down_b = get_position(video_path, 0, mainWindow.rate, mainWindow.rate_bottom)
# up_b, down_b = get_position(video_path, 0, mainWindow.rate, mainWindow.rate_bottom)
get_position(video_path, 0, mainWindow.ocr_ranges)
print(">>>>>positions:")
global ocr_positions
print(ocr_positions)
context.caption_boundings = [up_b, down_b]
context.detected = True
......
......@@ -161,7 +161,14 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.video_timer.start(1000) # todo 作为参数配置
self.refresh_tab_timer = QTimer()
self.refresh_tab_timer.timeout.connect(self.refresh_tab_slot)
self.up_ocr_timer = QTimer()
self.down_ocr_timer = QTimer()
self.up_ocr_timer.timeout.connect(self.up_ocr_timer_func)
self.down_ocr_timer.timeout.connect(self.down_ocr_timer_func)
self.up_ocr_bottom_timer = QTimer()
self.down_ocr_bottom_timer = QTimer()
self.up_ocr_bottom_timer.timeout.connect(self.up_ocr_bottom_timer_func)
self.down_ocr_bottom_timer.timeout.connect(self.down_ocr_bottom_timer_func)
"""状态栏相关空间
"""
......@@ -219,18 +226,28 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.insert_aside_from_now_btn.clicked.connect(
self.insert_aside_from_now_slot)
self.insert_aside_from_now_btn.setEnabled(False)
self.up_ocr_btn.clicked.connect(
self.up_ocr_btn.pressed.connect(
self.up_ocr)
self.down_ocr_btn.clicked.connect(
self.up_ocr_btn.released.connect(
self.up_ocr_stop)
self.down_ocr_btn.pressed.connect(
self.down_ocr)
self.up_ocr_bottom_btn.clicked.connect(
self.down_ocr_btn.released.connect(
self.down_ocr_stop)
self.up_ocr_bottom_btn.pressed.connect(
self.up_ocr_bottom)
self.down_ocr_bottom_btn.clicked.connect(
self.up_ocr_bottom_btn.released.connect(
self.up_ocr_bottom_stop)
self.down_ocr_bottom_btn.pressed.connect(
self.down_ocr_bottom)
self.down_ocr_bottom_btn.released.connect(
self.down_ocr_bottom_stop)
self.confirm_ocr_btn.clicked.connect(
self.confirm_ocr)
self.confirm_head_aside_btn.clicked.connect(
self.confirm_head_aside)
self.detect_btn.clicked.connect(
self.show_detect_dialog)
self.show_confirmation_dialog)
"""视频预览相关信息
......@@ -398,6 +415,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.sld_video.setFocus()
self.aside_head_time = None
self.add_head_aside = False
self.ocr_ranges = []
# 打印到log文件中
t = RunThread(funcName=make_print_to_file, args=os.path.join(os.getcwd(), 'log'), name="logging")
......@@ -407,6 +425,13 @@ class MainWindow(QMainWindow, Ui_MainWindow):
get_focus_thread.setDaemon(True)
get_focus_thread.start()
def show_confirmation_dialog(self):
confirm_box = QtWidgets.QMessageBox.question(self, u'警告', u'确认已经校准OCR范围(只包含中文,不含英文)?',
QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No)
if confirm_box == QtWidgets.QMessageBox.Yes:
self.show_detect_dialog()
else:
print(1)
def getFocus(self):
while(True):
......@@ -495,12 +520,17 @@ class MainWindow(QMainWindow, Ui_MainWindow):
"""弹出旁白区间检测相关信息填写窗口
"""
if self.rate == None:
self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
return
if self.rate_bottom == None:
self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
# if self.rate == None:
# self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
# return
# if self.rate_bottom == None:
# self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
# return
if not self.check_ocr_rate():
self.prompt_dialog.show_with_msg("字幕上边界不能低于下边界")
return
if len(self.ocr_ranges) == 0:
self.ocr_ranges.append([self.rate, self.rate_bottom])
self.detect_dialog.init_self()
self.detect_dialog.show()
......@@ -602,8 +632,14 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.action_insert_aside_from_now.setEnabled(True)
self.insert_aside_from_now_btn.setEnabled(True)
def up_ocr(self):
self.widget.change_painter_flag(True)
def check_ocr_rate(self):
if self.rate > self.rate_bottom:
return False
else:
return True
def up_ocr_timer_func(self):
# self.widget.change_painter_flag(True)
h = self.widget.up(3)
video_h = self.wgt_video.height()
self.rate = float(h-10)/float(video_h)
......@@ -612,9 +648,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
print(self.wgt_video.height())
print(">>>>>>>>>rate" + str(self.rate))
def down_ocr(self):
self.widget.change_painter_flag(True)
def down_ocr_timer_func(self):
# self.widget.change_painter_flag(True)
h = self.widget.down(3)
video_h = self.wgt_video.height()
self.rate = float(h-10)/float(video_h)
......@@ -622,19 +657,70 @@ class MainWindow(QMainWindow, Ui_MainWindow):
print(self.wgt_video.height())
print(">>>>>>>>>rate" + str(self.rate))
def up_ocr_bottom(self):
def up_ocr(self):
self.user_editing_content = True
self.up_ocr_timer.start(50)
# self.widget.change_painter_flag(True)
# h = self.widget.up(3)
# video_h = self.wgt_video.height()
# self.rate = float(h-10)/float(video_h)
# print(">>>>>video_h: "+str(video_h))
# print(">>>>>up h:" + str(h))
# print(self.wgt_video.height())
# print(">>>>>>>>>rate" + str(self.rate))
def up_ocr_stop(self):
self.user_editing_content = False
self.up_ocr_timer.stop()
def down_ocr_stop(self):
self.user_editing_content = False
self.down_ocr_timer.stop()
def down_ocr(self):
self.user_editing_content = True
self.down_ocr_timer.start(50)
# self.widget.change_painter_flag(True)
# h = self.widget.down(3)
# video_h = self.wgt_video.height()
# self.rate = float(h-10)/float(video_h)
# print(">>>>>down h:" + str(h))
# print(self.wgt_video.height())
# print(">>>>>>>>>rate" + str(self.rate))
def up_ocr_bottom_timer_func(self):
self.widget_bottom.change_painter_flag(True)
h = self.widget_bottom.up(3)
video_h = self.wgt_video.height()
self.rate_bottom = float(h-6)/float(video_h)
def down_ocr_bottom(self):
def down_ocr_bottom_timer_func(self):
self.widget_bottom.change_painter_flag(True)
h = self.widget_bottom.down(3)
video_h = self.wgt_video.height()
self.rate_bottom = float(h-6)/float(video_h)
def up_ocr_bottom_stop(self):
self.user_editing_content = False
self.up_ocr_bottom_timer.stop()
def down_ocr_bottom_stop(self):
self.user_editing_content = False
self.down_ocr_bottom_timer.stop()
def up_ocr_bottom(self):
self.user_editing_content = True
self.up_ocr_bottom_timer.start(50)
# self.widget_bottom.change_painter_flag(True)
# h = self.widget_bottom.up(3)
# video_h = self.wgt_video.height()
# self.rate_bottom = float(h-6)/float(video_h)
def down_ocr_bottom(self):
self.user_editing_content = True
self.down_ocr_bottom_timer.start(50)
# self.widget_bottom.change_painter_flag(True)
# h = self.widget_bottom.down(3)
# video_h = self.wgt_video.height()
# self.rate_bottom = float(h-6)/float(video_h)
def refresh_on_import(self):
print(">>>>>>>refresh in")
self.refresh_tab_slot()
......@@ -904,6 +990,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
elif type == "检测":
self.detect_timer.stop()
self.refresh_tab_timer.stop()
from narratage_detection import process_err
process_err(self)
elif type == "旁白导入":
self.import_excel_timer.stop()
# self.refresh_tab_timer.stop()
......@@ -2274,7 +2363,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.prompt_dialog.show_with_msg("操作成功!!请查看变化")
# 只有row起作用
def del_line_operation_slot(self, row: int, start_time="0", end_time="0", subtitle="", suggest="", aside="", speed="", refresh_flag = True):
def del_line_operation_slot(self, row: int, start_time="0", end_time="0", subtitle="", suggest="", aside="", speed="", refresh_flag = True, show_msg_flag = True):
"""删除一行
Args:
......@@ -2313,7 +2402,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.projectContext.all_elements.pop(int(row)-1)
if refresh_flag:
self.refresh_tab_slot()
self.prompt_dialog.show_with_msg("操作成功!!请查看变化")
if show_msg_flag:
self.prompt_dialog.show_with_msg("操作成功!!请查看变化")
def pb_item_changed_by_double_clicked_slot(self, item):
"""双击后修改旁白文本
......@@ -2418,4 +2508,17 @@ class MainWindow(QMainWindow, Ui_MainWindow):
str(round(video_position/1000, 2)))
self.import_excel_dialog.show_with_msg("定位成功:" + self.aside_head_time)
\ No newline at end of file
def confirm_ocr(self):
if self.rate == None:
self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
return
if self.rate_bottom == None:
self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
return
if not self.check_ocr_rate():
self.prompt_dialog.show_with_msg("字幕上边界不能低于下边界")
return
self.ocr_ranges.append([self.rate, self.rate_bottom])
self.prompt_dialog.show_with_msg(f"操作成功,如果电影存在多行字幕,请移动字幕上下边界,再次点击该按钮确认,目前已存在{len(self.ocr_ranges)}组字幕边界")
\ No newline at end of file
......@@ -18,6 +18,9 @@ class MyWidget(QWidget):
# def __init__(self, parent=None):
# super(QWidget, self).__init__(parent)
# self.painter_flag = True
def __init__(self, parent=None, color = Qt.red):
super(QWidget, self).__init__(parent)
self.color = color
def paintEvent(self, event):
# print(">>>>>>>>into paint")
......@@ -26,7 +29,7 @@ class MyWidget(QWidget):
lock.acquire()
painter = QPainter(self)
painter.setRenderHint(QPainter.Antialiasing) # Optional: Enable anti-aliasing
painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
painter.setPen(QPen(self.color, 2, Qt.SolidLine))
painter.drawLine(0, 1, 800, 1)
painter.end()
lock.release()
......@@ -314,6 +317,8 @@ class Ui_MainWindow(object):
self.horizontalLayout_7.setObjectName("horizontalLayout_7")
self.up_ocr_btn = QtWidgets.QPushButton(self.centralwidget)
self.up_ocr_btn.setObjectName("up_ocr_btn")
# self.up_ocr_btn.setAutoRepeatDelay(False)
# self.up_ocr_btn.setAutoRepeat
self.horizontalLayout_7.addWidget(self.up_ocr_btn)
self.down_ocr_btn = QtWidgets.QPushButton(self.centralwidget)
self.down_ocr_btn.setObjectName("down_ocr_btn")
......@@ -324,9 +329,13 @@ class Ui_MainWindow(object):
self.down_ocr_bottom_btn = QtWidgets.QPushButton(self.centralwidget)
self.down_ocr_bottom_btn.setObjectName("down_ocr_bottom_btn")
self.horizontalLayout_7.addWidget(self.down_ocr_bottom_btn)
self.confirm_ocr_btn = QtWidgets.QPushButton(self.centralwidget)
self.confirm_ocr_btn.setObjectName("confirm_ocr_btn")
self.horizontalLayout_7.addWidget(self.confirm_ocr_btn)
self.confirm_head_aside_btn = QtWidgets.QPushButton(self.centralwidget)
self.confirm_head_aside_btn.setObjectName("confirm_head_aside_btn")
self.horizontalLayout_7.addWidget(self.confirm_head_aside_btn)
self.horizontalLayout_8 = QtWidgets.QHBoxLayout()
self.horizontalLayout_8.setObjectName("horizontalLayout_8")
......@@ -523,7 +532,8 @@ class Ui_MainWindow(object):
self.action_redo = QtWidgets.QAction(MainWindow)
# self.action_redo.setFont(font)
self.action_redo.setObjectName("action_redo")
self.action_3 = QtWidgets.QAction("旁白区间检测",self,triggered=self.show_detect_dialog)
# self.action_3 = QtWidgets.QAction("旁白区间检测",self,triggered=self.show_detect_dialog)
self.action_3 = QtWidgets.QAction("旁白区间检测",self,triggered=self.show_confirmation_dialog)
self.action_3.setEnabled(False)
self.action_4 = QtWidgets.QAction("旁白音频合成",self,triggered=self.show_assemble_dialog)
self.action_4.setEnabled(False)
......@@ -539,7 +549,6 @@ class Ui_MainWindow(object):
self.action_9.setEnabled(True)
self.action_10 = QtWidgets.QAction("片头旁白定位",self,triggered=self.confirm_head_aside)
self.action_10.setEnabled(True)
# self.action_3.setObjectName("action_3")
# self.action_4 = QtWidgets.QAction(MainWindow)
# self.action_4.setObjectName("action_4")
......@@ -604,6 +613,7 @@ class Ui_MainWindow(object):
self.up_ocr_bottom_btn.setText(_translate("MainWindow", "字幕下边界上移"))
self.down_ocr_bottom_btn.setText(_translate("MainWindow", "字幕下边界下移"))
self.confirm_head_aside_btn.setText(_translate("MainWindow", "片头旁白定位"))
self.confirm_ocr_btn.setText(_translate("MainWindow", "字幕边界确认"))
self.detect_btn.setText(_translate("MainWindow", "旁白区间检测"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.all_tab), _translate("MainWindow", "字幕旁白"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.zm_tab), _translate("MainWindow", "字幕"))
......
......@@ -77,13 +77,14 @@ class OperateRecord:
# 每一行的具体信息,"起始时间", "终止时间", "字幕", '建议', '解说脚本'
class Element:
def __init__(self, st_time_sec: str, ed_time_sec: str, subtitle, suggest, aside, speed = "1.00(4字/秒)"):
def __init__(self, st_time_sec: str, ed_time_sec: str, subtitle, suggest, aside, speed = "1.00(4字/秒)",ocr_h = None):
self.st_time_sec = st_time_sec
self.ed_time_sec = ed_time_sec
self.subtitle = subtitle
self.suggest = suggest
self.aside = aside
self.speed = speed
self.ocr_h = ocr_h
# 判断当前元素是否是字幕
def is_subtitle(self):
......
......@@ -63,6 +63,9 @@ def detect(video_path: str, start_time: float, end_time: float, book_path: str,
from detect_with_ocr import detect_with_ocr
detect_with_ocr(video_path, book_path, start_time, end_time, state, mainWindow)
def process_err(mainWindow: MainWindow=None):
from detect_with_ocr import process_err_ocr
process_err_ocr(mainWindow)
if __name__ == '__main__':
# 定义参数
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment