update locate

parent c01b4948
......@@ -12,10 +12,11 @@ import os
class Content:
StartTimeColumn = 0
SubtitleColumnNumber = 2
AsideColumnNumber = 4
SpeedColumnNumber = 5
# ActivateColumns = [2, 3]
ActivateColumns = [4,5]
ActivateColumns = [2,4,5]
# ColumnCount = 3
ObjectName = "all_tableWidget"
# TimeFormatColumns = [0]
......
......@@ -251,8 +251,33 @@ def normalize(text: str) -> str:
text = text + ')'
return text
def resize_img(img):
resize_height = 152
height, width = img.shape[:2]
if resize_height > height:
# 定义放大倍数
scale_factor = float(resize_height / height)
# 计算新的宽度和高度
new_width = int(width * scale_factor)
new_height = int(height * scale_factor)
# 使用插值方法进行图像放大
enlarged_image = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
return enlarged_image
else:
# 定义缩小倍数
scale_factor = float(height / resize_height) # 0.5表示缩小为原来的一半大小
# 使用插值方法进行图像缩小
smaller_image = cv2.resize(img, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
return smaller_image
index = 0
def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
t = 140
def detect_subtitle(org_img: np.ndarray, lastSubTitle, last_confidence) -> Tuple[Union[str, None], float]:
"""检测当前画面得到字幕信息
Args:
......@@ -261,34 +286,64 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
Returns:
Tuple[Union[str, None]]: 字幕信息(没有字幕时返回None)和置信度
"""
subTitle = ''
ocr_res = ""
# up_b = 276
# down_b = 297
global ocr_positions
# ocr_positions.append([676, 712])
h = None
global index
for i in range(len(ocr_positions)):
img = copy.deepcopy(org_img)
up_b = ocr_positions[i][0]
down_b = ocr_positions[i][1]
height = down_b - up_b
# if len(ocr_positions) == 1:
# img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
# else:
cropped_img = img[int(up_b):int(down_b)]
# 定义要添加的上下空白的高度
padding_top = height*0.7
padding_bottom = height*0.7
# 计算新图像的高度
new_height = cropped_img.shape[0] + padding_top + padding_bottom
# 创建一个新的空白图像
img = np.zeros((int(new_height), cropped_img.shape[1], 3), dtype=np.uint8)
# 将裁剪后的图像放置在新图像中间
start_y = int(padding_top)
end_y = start_y + cropped_img.shape[0]
img[start_y:end_y, :] = cropped_img
if len(ocr_positions) == 1:
img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
else:
# cropped_img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
cropped_img = img[int(up_b):int(down_b)]
# cropped_img = resize_img(cropped_img)
# x = float(150 / height)
# img_h, img_w = cropped_img.shape[:2]
# img_h = int(img_h * x)
# img_w = int(img_w * x)
# cropped_img = cv2.resize(cropped_img, (img_w, img_h))
# 定义要添加的上下空白的高度
padding_top = height*0.7
padding_bottom = height*0.7
# padding_top = 150
# padding_bottom = 150
# 计算新图像的高度
new_height = cropped_img.shape[0] + padding_top + padding_bottom
# 创建一个新的空白图像
img = np.zeros((int(new_height), cropped_img.shape[1], 3), dtype=np.uint8)
# 将裁剪后的图像放置在新图像中间
start_y = int(padding_top)
end_y = start_y + cropped_img.shape[0]
img[start_y:end_y, :] = cropped_img
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# global index
# cv2.imwrite(f'./cap/cap{index}.png', img)
# img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# # ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# # img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
# t = 230
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# cv2.imwrite(f'./test2.png', img)
index = index + 1
# img = img[int(up_b - height*0.2):int(down_b + height*0.2)]
# 针对低帧率的视频做图像放大处理
print(height)
......@@ -297,9 +352,9 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
print(img.shape)
if img.shape[1] < 1000:
img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
global index
cv2.imwrite(f'./cap/cap{index}.png', img)
index = index + 1
# global index
# cv2.imwrite(f'./cap/cap{index}.png', img)
# index = index + 1
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>new log" + str(index - 1))
res = ocr.ocr(img, cls=True)
print('--------> res', res)
......@@ -309,6 +364,7 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
continue
# return None, 0, None
possible_txt = []
subTitle = ''
conf = 0
print('res --------->', res)
for x in res:
......@@ -324,6 +380,11 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
print("文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}".format(txt, confidence, mid / img.shape[1], gradient, font_size))
print("字体大小差距: {}", format(height - font_size))
print("高度中心:{}".format((rect[0][1] + rect[1][1])/2/img.shape[0]))
# if confidence < 0.95:
# # global t
# print("小于0.95,重新检测,阈值为:" + str(t))
# t = t + 20
# return detect_subtitle(org_img, lastSubTitle, last_confidence)
if h == None:
h = font_size
conf_thred1 = 0.7
......@@ -352,9 +413,18 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
# subTitle = ' '.join([x[0] for x in possible_txt])
subTitle = ' '.join(possible_txt)
print(subTitle, conf)
if len(subTitle) > 0:
return subTitle, conf, h
return None, 0, None
if len(subTitle) > 0:
ocr_res = ocr_res + subTitle
if (len(ocr_res)) >0:
print(">>>>>>>>>>>>cur subtitle:" + ocr_res + ",confidence: " + str(confidence) + ",last_confidence: " + str(last_confidence))
# if len(ocr_positions) == 1 and last_confidence != None and confidence != None and confidence < last_confidence:
# ocr_res = lastSubTitle
# confidence = last_confidence
# print(">>>>>>>>>>>>res subtitle:" + ocr_res + ",confidence: " + str(confidence) + ",last_confidence: " + str(last_confidence))
return ocr_res, confidence, conf, h
if check_have_ocr(img):
return "err", None, 0, None
return None, None, 0, None
......@@ -383,6 +453,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
video = cv2.VideoCapture(video_path)
fps = video.get(cv2.CAP_PROP_FPS)
lastSubTitle = None
last_confidence = None
lastConf = 0
# res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析
res = []
......@@ -425,8 +496,9 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
mainWindow.projectContext.nd_process = state[0]
mainWindow.projectContext.last_time = cur_time
subTitle, conf, cur_ocr_h = detect_subtitle(frame)
subTitle, confidence, conf, cur_ocr_h = detect_subtitle(frame, lastSubTitle, last_confidence)
if subTitle == "err":
continue
if subTitle is not None:
......@@ -479,10 +551,26 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
lastSubTitle = subTitle if conf > lastConf else lastSubTitle
continue
# 当前字幕与上一段字幕不一样
lastSubTitle = subTitle
lastConf = conf
if subTitle != "err":
lastSubTitle = subTitle
last_confidence = confidence
lastConf = conf
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>end 1")
# print(ocr_h_map)
# mainWindow.refresh_tab_slot()
while(mainWindow.refresh_flag == True):
time.sleep(1)
mainWindow.detect_lock = True
try:
process_err_ocr(mainWindow)
finally:
mainWindow.detect_lock = False
# mainWindow.refresh_lock.acquire()
# try:
# process_err_ocr(mainWindow)
# finally:
# mainWindow.refresh_lock.release()
......@@ -524,6 +612,7 @@ def process_err_ocr(mainWindow):
print(">>>>>>>>>will rm" + str(table_indexs[rm_list[i]] + 1 - i) + ", subtitle:" + mainWindow.projectContext.all_elements[table_indexs[rm_list[i]] - i].subtitle)
print(table_indexs[rm_list[i]])
mainWindow.del_line_operation_slot(row = table_indexs[rm_list[i]] + 1 - i, show_msg_flag = False)
time.sleep(0.5)
except Exception as e:
print(">>>>>>>>>>>>>>>>>>>>>>del err")
print(e)
......@@ -647,9 +736,37 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
# print("process the total video at time: ", datetime.datetime.now())
process_video(video_path, start_time, end_time, book_name_xlsx, sheet_name_xlsx, state, mainWindow)
def check_have_ocr(img):
new_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
t = 230
_, new_img = cv2.threshold(new_img, t,255, cv2.THRESH_BINARY)
if np.any(new_img == 255):
return True
else:
return False
if __name__ == '__main__':
# path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
path = "C:/Users/Smile/Desktop/accessibility-movie/"
# print("get_pos:", get_position(path, 0))
evaluate_position("C:/Users/AIA/Desktop/1/1.mp4", 0)
# evaluate_position("C:/Users/AIA/Desktop/1/1.mp4", 0)
# img = cv2.imread("./cap/cap879.png")
img = cv2.imread("./cap/cap812.png", cv2.IMREAD_GRAYSCALE)
# img = cv2.equalizeHist(img)
t = 230
_, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# img = resize_img(img)
detect_subtitle(img, None, None)
# img = cv2.equalizeHist(img)
# t = 120
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# # ret, binary_image = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# # binary_image = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
cv2.imwrite(f'./binary2.png', img)
if np.any(img == 255):
print("111111")
else:
print("222222")
......@@ -13,7 +13,7 @@ import os
import cv2
from PyQt5 import QtWidgets
from PyQt5.QtWidgets import QMainWindow, QFileDialog, QTableWidget, QTableWidgetItem, QAbstractItemView, QProgressBar, QLabel, QApplication, QPushButton, QMenu
from PyQt5.QtWidgets import QMainWindow, QFileDialog, QTableWidget, QTableWidgetItem, QAbstractItemView, QProgressBar, QLabel, QApplication, QPushButton, QMenu, QDialog, QVBoxLayout
from PyQt5.QtCore import QUrl, Qt, QTimer, QRect, pyqtSignal, QPersistentModelIndex
from PyQt5.QtMultimedia import *
from PyQt5.QtGui import QIcon
......@@ -43,6 +43,7 @@ from excel_utils import read_xls,read_xlsx
from ffmpeg_util import adjust_audio_volume
from PyQt5.QtCore import QThread;
import threading
# from emit_import_event import emit_import_event
......@@ -71,13 +72,57 @@ class WorkerThread(QThread):
for t in self.main_window.threads:
t.start()
def refresh(self):
self.main_window.import_process_sign.emit(self.elements)
class ProcessErrThread(QThread):
def __init__(self, main_window):
super().__init__()
self.main_window = main_window
def run(self):
from narratage_detection import process_err
t = RunThread(funcName=process_err,args=(self.main_window, ),name="process_err")
t.setDaemon(True)
t.start()
class AutoCloseDialog(QDialog):
def __init__(self, parent = None):
super(AutoCloseDialog, self).__init__(parent)
self.setWindowFlags(Qt.CustomizeWindowHint | Qt.NoDropShadowWindowHint)
layout = QVBoxLayout()
label = QLabel("字幕边界定位中,请稍后!")
layout.addWidget(label)
self.setLayout(layout)
class LocalOcrThread(QThread):
def __init__(self, main_window,path):
super().__init__()
self.main_window = main_window
self.path = path
def run(self):
# 在后台执行耗时操作
state = [None]
self.main_window.state = state
state[0] = 0.1
self.main_window.threads = []
t = RunThread(funcName=self.main_window.auto_location_ocr,
args=(self.path, ),
name="auto_location_ocr")
t.setDaemon(True)
self.main_window.threads.append(t)
self.main_window.all_threads.append(t)
for t in self.main_window.threads:
t.start()
def finish(self):
self.main_window.location_ocr_sign.emit()
class CustomDelegate(QtWidgets.QStyledItemDelegate):
def paint(self, painter, option, index):
# Customize the painting behavior for the specific column
......@@ -92,9 +137,11 @@ class MainWindow(QMainWindow, Ui_MainWindow):
renew_signal = pyqtSignal(str)
import_process_sign = pyqtSignal(list)
refresh_sign = pyqtSignal()
location_ocr_sign = pyqtSignal()
def __init__(self, project_path):
super(MainWindow, self).__init__()
self.location_ocr_sign.connect(self.finish_location_ocr)
self.last_aside_index = None
self.setupUi(self)
self.statusbar.showMessage("hello", 5000)
......@@ -150,6 +197,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
# 所有QTimer集中管理
self.import_excel_timer = QTimer()
self.import_excel_timer.timeout.connect(self.check_if_import_excel_over)
self.location_ocr_timer = QTimer()
self.location_ocr_timer.timeout.connect(self.check_if_location_ocr)
self.detect_timer = QTimer()
self.detect_timer.timeout.connect(self.check_if_detect_over_slot)
self.synthesis_timer = QTimer()
......@@ -416,6 +465,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.aside_head_time = None
self.add_head_aside = False
self.ocr_ranges = []
self.refresh_lock = threading.Lock()
self.detect_lock = False
self.refresh_flag = False
# 打印到log文件中
t = RunThread(funcName=make_print_to_file, args=os.path.join(os.getcwd(), 'log'), name="logging")
......@@ -425,13 +477,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
get_focus_thread.setDaemon(True)
get_focus_thread.start()
def finish_location_ocr(self):
self.import_excel_dialog.show_with_msg("字幕定位结束,请检查是否准确,并调整正确")
def show_confirmation_dialog(self):
confirm_box = QtWidgets.QMessageBox.question(self, u'警告', u'确认已经校准OCR范围(只包含中文,不含英文)?',
QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No)
if confirm_box == QtWidgets.QMessageBox.Yes:
self.show_detect_dialog()
else:
print(1)
print(">>>>>>show_confirmation_dialog")
def getFocus(self):
while(True):
......@@ -520,12 +575,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
"""弹出旁白区间检测相关信息填写窗口
"""
# if self.rate == None:
# self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
# return
# if self.rate_bottom == None:
# self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
# return
if self.rate == None:
# self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
h = self.widget.get_h()
video_h = self.wgt_video.height()
self.rate = float(h-10)/float(video_h)
if self.rate_bottom == None:
# self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
h = self.widget_bottom.get_h()
video_h = self.wgt_video.height()
self.rate_bottom = float(h-6)/float(video_h)
if not self.check_ocr_rate():
self.prompt_dialog.show_with_msg("字幕上边界不能低于下边界")
return
......@@ -631,12 +690,29 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.action_operate.setEnabled(True)
self.action_insert_aside_from_now.setEnabled(True)
self.insert_aside_from_now_btn.setEnabled(True)
# from detect_with_ocr import evaluate_position
# print(">>>>>>>>>>>>>>>>>>>>>v_path" + path.path()[1:])
# y1,y2 = evaluate_position(path.path()[1:], 0)
# self.widget.setY(y1)
# self.widget_bottom.setY(y2)
# print("y1:%d,y2:%d" %(y1,y2))
# self.import_excel_dialog.show_with_msg("正在自动定位字幕边界,请稍后!")
confirm_box = QtWidgets.QMessageBox.question(self, u'警告', u'是否需要自动定位字幕边界?',
QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No)
if confirm_box == QtWidgets.QMessageBox.Yes:
self.location_ocr_thread = LocalOcrThread(self, path)
self.location_ocr_thread.start()
self.location_ocr_timer.start(1000)
# self.auto_close_dialog = AutoCloseDialog()
# self.auto_close_dialog.exec_()
else:
print(">>>>>>>>>>>>>play_video")
def auto_location_ocr(self, path):
from detect_with_ocr import evaluate_position
print(">>>>>>>>>>>>>>>>>>>>>v_path" + path.path()[1:])
y1,y2 = evaluate_position(path.path()[1:], 0)
video_h = self.wgt_video.height()
self.widget.setY(int(video_h * y1) + 6)
self.widget_bottom.setY(int(video_h * y2) + 10)
print("y1:%d,y2:%d" %(y1,y2))
def check_ocr_rate(self):
if self.rate > self.rate_bottom:
......@@ -969,6 +1045,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
"""
self.check_if_over("旁白导入")
def check_if_location_ocr(self):
self.check_if_over("字幕定位")
alive = True
for t in self.threads:
alive = alive and t.is_alive()
if not alive:
self.location_ocr_timer.stop()
# self.auto_close_dialog.close()
self.threads = []
# type = 检测 或 合成 或 导出
def check_if_over(self, type: str):
"""确认传入的待检测任务是否完成
......@@ -996,12 +1082,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
elif type == "检测":
self.detect_timer.stop()
self.refresh_tab_timer.stop()
from narratage_detection import process_err
process_err(self)
# t = ProcessErrThread(self)
# t.start()
# from narratage_detection import process_err
# process_err(self)
elif type == "旁白导入":
self.import_excel_timer.stop()
# self.refresh_tab_timer.stop()
elif type == "字幕定位":
self.location_ocr_timer.stop()
else:
self.export_timer.stop()
......@@ -1016,6 +1106,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.progressBar.setValue(100)
self.progressLabel.setText(f"100%")
self.projectContext.nd_process = 1
self.threads = []
def deal_synthesis_callback_slot(self, threads, state):
"""实现旁白音频合成任务状态在界面中的实时显示,更新界面中的对应变量,每5s更新一次任务状态
......@@ -1400,9 +1491,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
item = QTableWidgetItem(text)
item.setTextAlignment(Qt.AlignCenter)
# 设置为不可编辑
if self.checkIfTableItemCanChange(table, idx, j) == False:
# if self.checkIfTableItemCanChange(table, idx, j) == False:
# item.setFlags(Qt.ItemIsEnabled)
print(1)
# print(">>>>>>>>>setElememtToTable")
table.setItem(idx, j, item)
# 只有Content页的字幕列和 Aside页的字幕列 可编辑
......@@ -1871,7 +1962,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
pre_item = self.all_tableWidget.item(row, col - 1)
suggest = pre_item.text()
if suggest != None and suggest != "":
print(">>>>>>>>suggest:" + suggest)
if col == constant.Content.AsideColumnNumber and suggest != None and suggest != "":
arrays = suggest.split("/")
if len(arrays) == 2:
suggest = str(len(text)) + "/" + arrays[1]
......@@ -1902,6 +1995,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
# self.all_tableWidget.setItem(
# int(idx), constant.Content.SpeedColumnNumber, QTableWidgetItem(text))
self.projectContext.refresh_speed(row, text)
elif col == constant.Content.SubtitleColumnNumber:
self.projectContext.refresh_subtitle(row, text)
# self.all_tableWidget_idx = int(row)
# self.set_table_to_window(False)
......@@ -2037,8 +2132,13 @@ class MainWindow(QMainWindow, Ui_MainWindow):
将表格内容更新至界面中,并保存当前工程内容
"""
self.set_table_to_window(need_refresh_all=False)
self.projectContext.save_project(False)
if not self.detect_lock:
self.refresh_flag = True
try:
self.set_table_to_window(need_refresh_all=False)
self.projectContext.save_project(False)
finally:
self.refresh_flag = False
def refresh_all_tab_slot(self):
"""刷新整个表格
......
......@@ -65,6 +65,8 @@ class MyWidget(QWidget):
print(">>>>>cur_y2 : " + str(self.y()))
self.move(0, h)
def get_h(self):
return self.y()
def down(self, mov_len):
......
......@@ -264,6 +264,11 @@ class ProjectContext:
if not self.initial_ing:
save_excel_to_path(self.all_elements, self.excel_path, self.write_header, self.excel_sheet_name)
def refresh_subtitle(self, row, subtitle: str):
self.all_elements[int(row)].subtitle = subtitle
if not self.initial_ing:
save_excel_to_path(self.all_elements, self.excel_path, self.write_header, self.excel_sheet_name)
def refresh_speed(self, row, speed: str)->None:
self.all_elements[int(row)].speed = speed
if not self.initial_ing:
......@@ -308,7 +313,7 @@ class ProjectContext:
if d["终止时间"][i] is None:
# 如果是最后一条
if i == len(d["字幕"]) - 1:
print(1)
print(">>>>>>>>>load_excel_from_path")
# ed_time_sec = "360000" if self.duration == 0 else self.duration # todo 默认最大时长是100h
else:
ed_time_sec = "%.2f"%(float(d["起始时间"][i + 1]) - 0.01)
......@@ -429,6 +434,8 @@ def save_excel_to_path(all_element, new_excel_path, header, excel_sheet_name):
backup_path = os.path.dirname(new_excel_path) + "/tmp_"+str(time.time())+".xlsx"
# os.remove(new_excel_path)
os.rename(new_excel_path, backup_path)
# print(">>>>>>new_excel_path:" + new_excel_path)
# print(">>>>>>>>>>backup_path:" + backup_path)
try:
create_sheet(new_excel_path, "旁白插入位置建议", [header])
# for element in all_element:
......
......@@ -65,7 +65,11 @@ def detect(video_path: str, start_time: float, end_time: float, book_path: str,
def process_err(mainWindow: MainWindow=None):
from detect_with_ocr import process_err_ocr
process_err_ocr(mainWindow)
try:
process_err_ocr(mainWindow)
except Exception as e:
print("process_err err")
print(e)
if __name__ == '__main__':
# 定义参数
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment