Commit 9b46d4c7 authored by wux51's avatar wux51

fix:ocr algorithm issue with repetitive subtitle

parents 9b3754fc 2c4cd5c0
......@@ -12,10 +12,11 @@ import os
class Content:
StartTimeColumn = 0
SubtitleColumnNumber = 2
AsideColumnNumber = 4
SpeedColumnNumber = 5
# ActivateColumns = [2, 3]
ActivateColumns = [4,5]
ActivateColumns = [2,4,5]
# ColumnCount = 3
ObjectName = "all_tableWidget"
# TimeFormatColumns = [0]
......
......@@ -62,7 +62,38 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
start = int(start_time * fps)
video.set(cv2.CAP_PROP_POS_FRAMES, start)
frame_num = video.get(cv2.CAP_PROP_FRAME_COUNT)
time1 = time.time()
# 一共有60*60*60*3 均匀取2000帧
gap = math.floor(frame_num/2000) # 前一帧与后一帧的跨度
# #读取方案1:跳转读写头到指定帧
# for i in range(2000):
# frame_index = i*gap + 1
# video.set(cv2.CAP_PROP_POS_FRAMES,frame_index)
# _, frame = video.read()
# cv2.imwrite(str.format('./extrated_imgs/{}.png',i),frame)
# 读写方案2:连续读取,读到指定帧保存
# i=1
# while i<frame_num:
# _, frame = video.read()
# if i%gap == 0:
# print(i,gap)
# cv2.imwrite(str.format('./extrated_imgs_2/{}.png',i),frame)
# i = i + 1
# 主要工作:
paddle_dir = "res/.paddleocr/2.3.0.1/ocr/"
cur_cls_model_dir = paddle_dir + "cls/ch_ppocr_mobile_v2.0_cls_infer"
cur_det_model_dir = paddle_dir + "det/ch/ch_PP-OCRv2_det_infer"
cur_rec_model_dir = paddle_dir + "rec/ch/ch_PP-OCRv2_rec_infer"
ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, cls_model_dir=cur_cls_model_dir, det_model_dir=cur_det_model_dir, rec_model_dir=cur_rec_model_dir)
position_map = {}
R = []
G = []
B = []
# 统计位置(高度)
# 分成五等份,1/5 2/5 3/5 4/5
for i in range(4):
frame_index = math.floor((i+1)*frame_num/5)
......@@ -70,9 +101,10 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
for j in range(10):#每个阶段取10帧
for k in range(120):#隔120帧取一帧
_,frame = video.read()
_,frame = video.read()
if (j+1)*120+frame_index>frame_num:
break
_,frame = video.read()
# cv2.imwrite(str.format('./imgs_2/{}_{}.png',i+1,j),frame)
res = ocr.ocr(frame, cls=True)
for result_item in res:
[x1,y1],[x2,y2],[x3,y3],[x4,y4] = result_item[0]
......@@ -80,22 +112,87 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
english_text = ''.join(re.findall(r'[A-Za-z]',text))
# 跳过英文字幕
if(len(english_text)/len(text)<0.1):
# 提取ocr区域像素点颜色
subtitle_img = frame[int(y1):int(y3),int(x1):int(x2)]
r = subtitle_img[:,:,0].copy()
r = r.reshape(-1)
g = subtitle_img[:,:,1].copy()
g = g.reshape(-1)
b = subtitle_img[:,:,2].copy()
b = b.reshape(-1)
R = np.append(R,r)
G = np.append(G,g)
B = np.append(B,b)
print(i+1,j,text)
# 分析文字位置
if len(position_map) == 0:
position_map[(y1,y3)]=1
else:
# 在列表中查找
keys = list(position_map.keys())
flag = False
for key in keys:
if abs(y1-key[0])<2 and abs(y3-key[1])<2:
height = y3-y1
threshold = int((y3-y1)*0.3)
# print(threshold)
if abs(y1-key[0])<threshold or abs(y3-key[1])<threshold:
position_map[key]+=1
# # 绘制字幕横线到图片上:
# frame1 = cv2.line(frame,(0,int(key[0])),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(key[0])),(255,0,0),2)
# frame1 = cv2.line(frame,(0,int(key[1])),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(key[1])),(255,0,0),2)
# file_name = './mytest('+str(key[0])+','+str(key[1])+').png'
# cv2.imwrite(file_name,frame1)
flag = True# 如果能归为某一类则flag置为True
break
else:
if flag == False:# 如果找不到,则向列表添加
position_map[(y1,y3)]=1
print(sorted( position_map.items(), key = lambda kv:(kv[1], kv[0])))
y1,y2 = sorted( position_map.items(), key = lambda kv:(kv[1], kv[0]))[-1][0]
# # 绘制字幕横线到图片上:
# frame1 = cv2.line(frame,(0,int(y1)),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(y1)),(255,0,0),2)
# frame1 = cv2.line(frame,(0,int(y3)),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(y3)),(255,0,0),2)
# file_name = './mytest('+str(y1)+','+str(y3)+').png'
# cv2.imwrite(file_name,frame1)
# 给出字幕行数
position_rank_list = sorted( position_map.items(), key = lambda kv:(kv[1], kv[0]))
print(position_rank_list)
line_num = 0
num = position_rank_list[-1][1]
y1,y2 = position_rank_list[-1][0]
height = abs(y2-y1)
result = []
item_result = []
for item in position_rank_list:
if item[1]>num*0.3 and item[0][0]>0.5*video.get(cv2.CAP_PROP_FRAME_HEIGHT) and item[0][1]>0.5*video.get(cv2.CAP_PROP_FRAME_HEIGHT):#出现次数达到rank1的30%以上,判定为新的一行字幕
line_num += 1
y1,y2 = item[0]
y1 = y1/video.get(cv2.CAP_PROP_FRAME_HEIGHT)
y2 = y2/video.get(cv2.CAP_PROP_FRAME_HEIGHT)
return y1,y2
item_result.append([y1,y2])
print([line_num,height,item_result])
# 给出文字和背景的颜色
import matplotlib.pyplot as plt
from sklearn import cluster
plt.hist(R)
plt.savefig('R-hist-all')
plt.close()
# 尝试聚成两类(k-means算法)
estimator = cluster.KMeans(n_clusters=2)
estimator.fit(R.reshape(-1,1))
# print(R[0].shape)
r_centers = estimator.cluster_centers_.reshape(-1)
estimator = cluster.KMeans(n_clusters=2)
estimator.fit(G.reshape(-1,1))
g_centers = estimator.cluster_centers_.reshape(-1)
estimator = cluster.KMeans(n_clusters=2)
estimator.fit(B.reshape(-1,1))
b_centers = estimator.cluster_centers_.reshape(-1)
r,g,b = max(r_centers),max(g_centers),max(b_centers)
print('颜色结果:',r,g,b)
# 返回值:字幕行数 字幕高度 字幕位置(比例形式) 字幕颜色(r,g,b)
# [2, 109.0, [[0.8637892376681614, 0.9243273542600897], [0.9304932735426009, 0.9915919282511211]],(r,g,b)]
return item_result[0][0],item_result[0][1]
return [line_num,height,item_result,(r,g,b)]
ocr_positions = []
......@@ -257,6 +354,29 @@ def normalize(text: str) -> str:
return text
def resize_img(img):
resize_height = 152
height, width = img.shape[:2]
if resize_height > height:
# 定义放大倍数
scale_factor = float(resize_height / height)
# 计算新的宽度和高度
new_width = int(width * scale_factor)
new_height = int(height * scale_factor)
# 使用插值方法进行图像放大
enlarged_image = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
return enlarged_image
else:
# 定义缩小倍数
scale_factor = float(height / resize_height) # 0.5表示缩小为原来的一半大小
# 使用插值方法进行图像缩小
smaller_image = cv2.resize(img, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
return smaller_image
def extract_white_prior(img, threshold=200):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
......@@ -267,7 +387,8 @@ def extract_white_prior(img, threshold=200):
index = 0
def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
t = 140
def detect_subtitle(org_img: np.ndarray, lastSubTitle, last_confidence) -> Tuple[Union[str, None], float]:
"""检测当前画面得到字幕信息
Args:
......@@ -276,23 +397,36 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
Returns:
Tuple[Union[str, None]]: 字幕信息(没有字幕时返回None)和置信度
"""
subTitle = ''
ocr_res = ""
# up_b = 276
# down_b = 297
global ocr_positions
# ocr_positions.append([676, 712])
h = None
global index
for i in range(len(ocr_positions)):
img = copy.deepcopy(org_img)
up_b = ocr_positions[i][0]
down_b = ocr_positions[i][1]
height = down_b - up_b
# if len(ocr_positions) == 1:
# img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
# else:
if len(ocr_positions) == 1:
img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
else:
# cropped_img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
cropped_img = img[int(up_b):int(down_b)]
# cropped_img = resize_img(cropped_img)
# x = float(150 / height)
# img_h, img_w = cropped_img.shape[:2]
# img_h = int(img_h * x)
# img_w = int(img_w * x)
# cropped_img = cv2.resize(cropped_img, (img_w, img_h))
# 定义要添加的上下空白的高度
padding_top = height*0.7
padding_bottom = height*0.7
# padding_top = 150
# padding_bottom = 150
# 计算新图像的高度
new_height = cropped_img.shape[0] + padding_top + padding_bottom
......@@ -304,6 +438,23 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
start_y = int(padding_top)
end_y = start_y + cropped_img.shape[0]
img[start_y:end_y, :] = cropped_img
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# global index
# cv2.imwrite(f'./cap/cap{index}.png', img)
# img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# # ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# # img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
# t = 230
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# cv2.imwrite(f'./test2.png', img)
# index = index + 1
# img = img[int(up_b - height*0.2):int(down_b + height*0.2)]
# 针对低帧率的视频做图像放大处理
print(height)
......@@ -312,7 +463,8 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
print(img.shape)
if img.shape[1] < 1000:
img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
global index
# img = extract_white_prior(img)
......@@ -327,6 +479,7 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
continue
# return None, 0, None
possible_txt = []
subTitle = ''
conf = 0
print('res --------->', res)
res.sort(key=lambda rect: rect[0][0][0] + rect[0][1][0]) # 按照中心点排序
......@@ -343,6 +496,11 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
print("文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}".format(txt, confidence, mid / img.shape[1], gradient, font_size))
print("字体大小差距: {}", format(height - font_size))
print("高度中心:{}".format((rect[0][1] + rect[1][1])/2/img.shape[0]))
# if confidence < 0.95:
# # global t
# print("小于0.95,重新检测,阈值为:" + str(t))
# t = t + 20
# return detect_subtitle(org_img, lastSubTitle, last_confidence)
if h == None:
h = font_size
conf_thred1 = 0.7
......@@ -372,8 +530,17 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
subTitle = ' '.join(possible_txt)
print(subTitle, conf)
if len(subTitle) > 0:
return subTitle, conf, h
return None, 0, None
ocr_res = ocr_res + subTitle
if (len(ocr_res)) >0:
print(">>>>>>>>>>>>cur subtitle:" + ocr_res + ",confidence: " + str(confidence) + ",last_confidence: " + str(last_confidence))
# if len(ocr_positions) == 1 and last_confidence != None and confidence != None and confidence < last_confidence:
# ocr_res = lastSubTitle
# confidence = last_confidence
# print(">>>>>>>>>>>>res subtitle:" + ocr_res + ",confidence: " + str(confidence) + ",last_confidence: " + str(last_confidence))
return ocr_res, confidence, conf, h
if check_have_ocr(img):
return "err", None, 0, None
return None, None, 0, None
......@@ -402,6 +569,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
video = cv2.VideoCapture(video_path)
fps = video.get(cv2.CAP_PROP_FPS)
lastSubTitle = None
last_confidence = None
lastConf = 0
# res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析
res = []
......@@ -447,8 +615,9 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
mainWindow.projectContext.nd_process = state[0]
mainWindow.projectContext.last_time = cur_time
subTitle, conf, cur_ocr_h = detect_subtitle(frame)
subTitle, confidence, conf, cur_ocr_h = detect_subtitle(frame, lastSubTitle, last_confidence)
if subTitle == "err":
continue
if subTitle is not None:
......@@ -490,20 +659,20 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
fl = False
cur_time1 = cur_time
continue
end_time = cur_time
res.append([start_time, end_time, lastSubTitle])
if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
print('--------------------------------------------------')
recommend_lens = int((res[-1][0] - last_time) * normal_speed) if len(res) == 1 else int(
(res[-1][0] - res[-2][1]) * normal_speed)
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens],ocr_h)
print(start_time, end_time, lastSubTitle)
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
print(">>>>>>subtitle,ocr_h2:" + str(lastSubTitle) + ">>>" + str(ocr_h))
# end_time = cur_time
# res.append([start_time, end_time, lastSubTitle])
# if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
# print('--------------------------------------------------')
# recommend_lens = int((res[-1][0] - last_time) * normal_speed) if len(res) == 1 else int(
# (res[-1][0] - res[-2][1]) * normal_speed)
# # write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# # add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens],ocr_h)
# print(start_time, end_time, lastSubTitle)
# # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
# add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
# print(">>>>>>subtitle,ocr_h2:" + str(lastSubTitle) + ">>>" + str(ocr_h))
elif lastSubTitle is not None and subTitle is not None:
# 两句话连在一起,但是两句话不一样
if string_similar(lastSubTitle, subTitle) < 0.6:
......@@ -527,10 +696,26 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
lastSubTitle = subTitle if conf > lastConf else lastSubTitle
continue
# 当前字幕与上一段字幕不一样
if subTitle != "err":
lastSubTitle = subTitle
last_confidence = confidence
lastConf = conf
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>end 1")
# print(ocr_h_map)
# mainWindow.refresh_tab_slot()
while(mainWindow.refresh_flag == True):
time.sleep(1)
mainWindow.detect_lock = True
try:
process_err_ocr(mainWindow)
finally:
mainWindow.detect_lock = False
# mainWindow.refresh_lock.acquire()
# try:
# process_err_ocr(mainWindow)
# finally:
# mainWindow.refresh_lock.release()
......@@ -572,6 +757,7 @@ def process_err_ocr(mainWindow):
print(">>>>>>>>>will rm" + str(table_indexs[rm_list[i]] + 1 - i) + ", subtitle:" + mainWindow.projectContext.all_elements[table_indexs[rm_list[i]] - i].subtitle)
print(table_indexs[rm_list[i]])
mainWindow.del_line_operation_slot(row = table_indexs[rm_list[i]] + 1 - i, show_msg_flag = False)
time.sleep(0.5)
except Exception as e:
print(">>>>>>>>>>>>>>>>>>>>>>del err")
print(e)
......@@ -695,9 +881,37 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
# print("process the total video at time: ", datetime.datetime.now())
process_video(video_path, start_time, end_time, book_name_xlsx, sheet_name_xlsx, state, mainWindow)
def check_have_ocr(img):
new_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
t = 230
_, new_img = cv2.threshold(new_img, t,255, cv2.THRESH_BINARY)
if np.any(new_img == 255):
return True
else:
return False
if __name__ == '__main__':
# path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
path = "C:/Users/Smile/Desktop/accessibility-movie/"
# print("get_pos:", get_position(path, 0))
evaluate_position("C:/Users/AIA/Desktop/1/1.mp4", 0)
# evaluate_position("C:/Users/AIA/Desktop/1/1.mp4", 0)
# img = cv2.imread("./cap/cap879.png")
img = cv2.imread("./cap/cap812.png", cv2.IMREAD_GRAYSCALE)
# img = cv2.equalizeHist(img)
t = 230
_, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# img = resize_img(img)
detect_subtitle(img, None, None)
# img = cv2.equalizeHist(img)
# t = 120
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# # ret, binary_image = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# # binary_image = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
cv2.imwrite(f'./binary2.png', img)
if np.any(img == 255):
print("111111")
else:
print("222222")
......@@ -13,7 +13,7 @@ import os
import cv2
from PyQt5 import QtWidgets
from PyQt5.QtWidgets import QMainWindow, QFileDialog, QTableWidget, QTableWidgetItem, QAbstractItemView, QProgressBar, QLabel, QApplication, QPushButton, QMenu
from PyQt5.QtWidgets import QMainWindow, QFileDialog, QTableWidget, QTableWidgetItem, QAbstractItemView, QProgressBar, QLabel, QApplication, QPushButton, QMenu, QDialog, QVBoxLayout
from PyQt5.QtCore import QUrl, Qt, QTimer, QRect, pyqtSignal, QPersistentModelIndex
from PyQt5.QtMultimedia import *
from PyQt5.QtGui import QIcon
......@@ -43,6 +43,7 @@ from excel_utils import read_xls,read_xlsx
from ffmpeg_util import adjust_audio_volume
from PyQt5.QtCore import QThread;
import threading
# from emit_import_event import emit_import_event
......@@ -71,12 +72,56 @@ class WorkerThread(QThread):
for t in self.main_window.threads:
t.start()
def refresh(self):
self.main_window.import_process_sign.emit(self.elements)
class ProcessErrThread(QThread):
def __init__(self, main_window):
super().__init__()
self.main_window = main_window
def run(self):
from narratage_detection import process_err
t = RunThread(funcName=process_err,args=(self.main_window, ),name="process_err")
t.setDaemon(True)
t.start()
class AutoCloseDialog(QDialog):
def __init__(self, parent = None):
super(AutoCloseDialog, self).__init__(parent)
self.setWindowFlags(Qt.CustomizeWindowHint | Qt.NoDropShadowWindowHint)
layout = QVBoxLayout()
label = QLabel("字幕边界定位中,请稍后!")
layout.addWidget(label)
self.setLayout(layout)
class LocalOcrThread(QThread):
def __init__(self, main_window,path):
super().__init__()
self.main_window = main_window
self.path = path
def refresh(self):
self.main_window.import_process_sign.emit(self.elements)
def run(self):
# 在后台执行耗时操作
state = [None]
self.main_window.state = state
state[0] = 0.1
self.main_window.threads = []
t = RunThread(funcName=self.main_window.auto_location_ocr,
args=(self.path, ),
name="auto_location_ocr")
t.setDaemon(True)
self.main_window.threads.append(t)
self.main_window.all_threads.append(t)
for t in self.main_window.threads:
t.start()
def finish(self):
self.main_window.location_ocr_sign.emit()
class CustomDelegate(QtWidgets.QStyledItemDelegate):
def paint(self, painter, option, index):
......@@ -92,9 +137,11 @@ class MainWindow(QMainWindow, Ui_MainWindow):
renew_signal = pyqtSignal(str)
import_process_sign = pyqtSignal(list)
refresh_sign = pyqtSignal()
location_ocr_sign = pyqtSignal()
def __init__(self, project_path):
super(MainWindow, self).__init__()
self.location_ocr_sign.connect(self.finish_location_ocr)
self.last_aside_index = None
self.setupUi(self)
self.statusbar.showMessage("hello", 5000)
......@@ -150,6 +197,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
# 所有QTimer集中管理
self.import_excel_timer = QTimer()
self.import_excel_timer.timeout.connect(self.check_if_import_excel_over)
self.location_ocr_timer = QTimer()
self.location_ocr_timer.timeout.connect(self.check_if_location_ocr)
self.detect_timer = QTimer()
self.detect_timer.timeout.connect(self.check_if_detect_over_slot)
self.synthesis_timer = QTimer()
......@@ -416,6 +465,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.aside_head_time = None
self.add_head_aside = False
self.ocr_ranges = []
self.refresh_lock = threading.Lock()
self.detect_lock = False
self.refresh_flag = False
# 打印到log文件中
t = RunThread(funcName=make_print_to_file, args=os.path.join(os.getcwd(), 'log'), name="logging")
......@@ -425,13 +477,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
get_focus_thread.setDaemon(True)
get_focus_thread.start()
def finish_location_ocr(self):
self.import_excel_dialog.show_with_msg("字幕定位结束,请检查是否准确,并调整正确")
def show_confirmation_dialog(self):
confirm_box = QtWidgets.QMessageBox.question(self, u'警告', u'确认已经校准OCR范围(只包含中文,不含英文)?',
QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No)
if confirm_box == QtWidgets.QMessageBox.Yes:
self.show_detect_dialog()
else:
print(1)
print(">>>>>>show_confirmation_dialog")
def getFocus(self):
while(True):
......@@ -520,12 +575,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
"""弹出旁白区间检测相关信息填写窗口
"""
# if self.rate == None:
if self.rate == None:
# self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
# return
# if self.rate_bottom == None:
h = self.widget.get_h()
video_h = self.wgt_video.height()
self.rate = float(h-10)/float(video_h)
if self.rate_bottom == None:
# self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
# return
h = self.widget_bottom.get_h()
video_h = self.wgt_video.height()
self.rate_bottom = float(h-6)/float(video_h)
if not self.check_ocr_rate():
self.prompt_dialog.show_with_msg("字幕上边界不能低于下边界")
return
......@@ -631,12 +690,30 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.action_operate.setEnabled(True)
self.action_insert_aside_from_now.setEnabled(True)
self.insert_aside_from_now_btn.setEnabled(True)
# from detect_with_ocr import evaluate_position
# print(">>>>>>>>>>>>>>>>>>>>>v_path" + path.path()[1:])
# y1,y2 = evaluate_position(path.path()[1:], 0)
# self.widget.setY(y1)
# self.widget_bottom.setY(y2)
# print("y1:%d,y2:%d" %(y1,y2))
# self.import_excel_dialog.show_with_msg("正在自动定位字幕边界,请稍后!")
confirm_box = QtWidgets.QMessageBox.question(self, u'警告', u'是否需要自动定位字幕边界?',
QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No)
if confirm_box == QtWidgets.QMessageBox.Yes:
self.location_ocr_thread = LocalOcrThread(self, path)
self.location_ocr_thread.start()
self.location_ocr_timer.start(1000)
# self.auto_close_dialog = AutoCloseDialog()
# self.auto_close_dialog.exec_()
else:
print(">>>>>>>>>>>>>play_video")
def auto_location_ocr(self, path):
from detect_with_ocr import evaluate_position
print(">>>>>>>>>>>>>>>>>>>>>v_path" + path.path()[1:])
y1,y2 = evaluate_position(path.path()[1:], 0)
video_h = self.wgt_video.height()
self.widget.setY(int(video_h * y1) + 6)
time.sleep(1)
self.widget_bottom.setY(int(video_h * y2) + 10)
print("y1:%d,y2:%d" %(y1,y2))
def check_ocr_rate(self):
if self.rate > self.rate_bottom:
......@@ -969,6 +1046,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
"""
self.check_if_over("旁白导入")
def check_if_location_ocr(self):
self.check_if_over("字幕定位")
alive = True
for t in self.threads:
alive = alive and t.is_alive()
if not alive:
self.location_ocr_timer.stop()
# self.auto_close_dialog.close()
self.threads = []
# type = 检测 或 合成 或 导出
def check_if_over(self, type: str):
"""确认传入的待检测任务是否完成
......@@ -996,12 +1083,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
elif type == "检测":
self.detect_timer.stop()
self.refresh_tab_timer.stop()
# t = ProcessErrThread(self)
# t.start()
# from narratage_detection import process_err
# process_err(self)
from narratage_detection import process_err
process_err(self)
elif type == "旁白导入":
self.import_excel_timer.stop()
# self.refresh_tab_timer.stop()
elif type == "字幕定位":
self.location_ocr_timer.stop()
else:
self.export_timer.stop()
......@@ -1016,6 +1107,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.progressBar.setValue(100)
self.progressLabel.setText(f"100%")
self.projectContext.nd_process = 1
self.threads = []
def deal_synthesis_callback_slot(self, threads, state):
"""实现旁白音频合成任务状态在界面中的实时显示,更新界面中的对应变量,每5s更新一次任务状态
......@@ -1400,9 +1492,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
item = QTableWidgetItem(text)
item.setTextAlignment(Qt.AlignCenter)
# 设置为不可编辑
if self.checkIfTableItemCanChange(table, idx, j) == False:
# if self.checkIfTableItemCanChange(table, idx, j) == False:
# item.setFlags(Qt.ItemIsEnabled)
print(1)
# print(">>>>>>>>>setElememtToTable")
table.setItem(idx, j, item)
# 只有Content页的字幕列和 Aside页的字幕列 可编辑
......@@ -1871,7 +1963,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
pre_item = self.all_tableWidget.item(row, col - 1)
suggest = pre_item.text()
if suggest != None and suggest != "":
print(">>>>>>>>suggest:" + suggest)
if col == constant.Content.AsideColumnNumber and suggest != None and suggest != "":
arrays = suggest.split("/")
if len(arrays) == 2:
suggest = str(len(text)) + "/" + arrays[1]
......@@ -1902,6 +1996,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
# self.all_tableWidget.setItem(
# int(idx), constant.Content.SpeedColumnNumber, QTableWidgetItem(text))
self.projectContext.refresh_speed(row, text)
elif col == constant.Content.SubtitleColumnNumber:
self.projectContext.refresh_subtitle(row, text)
# self.all_tableWidget_idx = int(row)
# self.set_table_to_window(False)
......@@ -2037,8 +2133,13 @@ class MainWindow(QMainWindow, Ui_MainWindow):
将表格内容更新至界面中,并保存当前工程内容
"""
if not self.detect_lock:
self.refresh_flag = True
try:
self.set_table_to_window(need_refresh_all=False)
self.projectContext.save_project(False)
finally:
self.refresh_flag = False
def refresh_all_tab_slot(self):
"""刷新整个表格
......
......@@ -65,6 +65,8 @@ class MyWidget(QWidget):
print(">>>>>cur_y2 : " + str(self.y()))
self.move(0, h)
def get_h(self):
return self.y()
def down(self, mov_len):
......
......@@ -264,6 +264,11 @@ class ProjectContext:
if not self.initial_ing:
save_excel_to_path(self.all_elements, self.excel_path, self.write_header, self.excel_sheet_name)
def refresh_subtitle(self, row, subtitle: str):
self.all_elements[int(row)].subtitle = subtitle
if not self.initial_ing:
save_excel_to_path(self.all_elements, self.excel_path, self.write_header, self.excel_sheet_name)
def refresh_speed(self, row, speed: str)->None:
self.all_elements[int(row)].speed = speed
if not self.initial_ing:
......@@ -308,7 +313,7 @@ class ProjectContext:
if d["终止时间"][i] is None:
# 如果是最后一条
if i == len(d["字幕"]) - 1:
print(1)
print(">>>>>>>>>load_excel_from_path")
# ed_time_sec = "360000" if self.duration == 0 else self.duration # todo 默认最大时长是100h
else:
ed_time_sec = "%.2f"%(float(d["起始时间"][i + 1]) - 0.01)
......@@ -429,6 +434,8 @@ def save_excel_to_path(all_element, new_excel_path, header, excel_sheet_name):
backup_path = os.path.dirname(new_excel_path) + "/tmp_"+str(time.time())+".xlsx"
# os.remove(new_excel_path)
os.rename(new_excel_path, backup_path)
# print(">>>>>>new_excel_path:" + new_excel_path)
# print(">>>>>>>>>>backup_path:" + backup_path)
try:
create_sheet(new_excel_path, "旁白插入位置建议", [header])
# for element in all_element:
......
......@@ -65,7 +65,11 @@ def detect(video_path: str, start_time: float, end_time: float, book_path: str,
def process_err(mainWindow: MainWindow=None):
from detect_with_ocr import process_err_ocr
try:
process_err_ocr(mainWindow)
except Exception as e:
print("process_err err")
print(e)
if __name__ == '__main__':
# 定义参数
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment