Commit 2c4cd5c0 authored by smile2019's avatar smile2019

Merge remote-tracking branch 'refs/remotes/origin/feat_1' into feat_1

parents d3fcd34f 7296e8d4
...@@ -14,3 +14,30 @@ res/ffmpeg-4.3.1/bin/qiji_local.mp4 ...@@ -14,3 +14,30 @@ res/ffmpeg-4.3.1/bin/qiji_local.mp4
venv/ venv/
venv37/ venv37/
shenming_test shenming_test
cap.png
requirements3.8.txt
venv3.8-new/
webrtcvad-2.0.10-cp38-abi3-win_amd64.whl
xlsx-resource/
deal_ocr.csv
deal_srt.csv
new.srt
shenhai1.xlsx
shenhai2.xlsx
test,py
"\346\267\261\346\265\267\347\237\255\347\211\2072.xlsx"
"\346\267\261\346\265\267\347\237\255\347\211\207origin.xlsx"
11.py
222.py
cap/
cap1597.png
cap831.png
deal.py
deal_movie.py
movie_1.txt
movie_pro.txt
res/.paddleocr/2.3.0.1/ocr/paddleocr/
script1.py
test/
\ No newline at end of file
...@@ -12,10 +12,11 @@ import os ...@@ -12,10 +12,11 @@ import os
class Content: class Content:
StartTimeColumn = 0 StartTimeColumn = 0
SubtitleColumnNumber = 2
AsideColumnNumber = 4 AsideColumnNumber = 4
SpeedColumnNumber = 5 SpeedColumnNumber = 5
# ActivateColumns = [2, 3] # ActivateColumns = [2, 3]
ActivateColumns = [4,5] ActivateColumns = [2,4,5]
# ColumnCount = 3 # ColumnCount = 3
ObjectName = "all_tableWidget" ObjectName = "all_tableWidget"
# TimeFormatColumns = [0] # TimeFormatColumns = [0]
......
...@@ -30,6 +30,9 @@ from typing import Tuple, Union ...@@ -30,6 +30,9 @@ from typing import Tuple, Union
from utils import reverse_time_to_seconds from utils import reverse_time_to_seconds
from detect_with_asr import create_sheet, write_to_sheet from detect_with_asr import create_sheet, write_to_sheet
from main_window import MainWindow, Element from main_window import MainWindow, Element
import time
import numpy as np
import copy
import math import math
# 字幕的上下边界 # 字幕的上下边界
...@@ -42,10 +45,18 @@ cur_det_model_dir = paddle_dir + "det/ch/ch_PP-OCRv2_det_infer" ...@@ -42,10 +45,18 @@ cur_det_model_dir = paddle_dir + "det/ch/ch_PP-OCRv2_det_infer"
cur_rec_model_dir = paddle_dir + "rec/ch/ch_PP-OCRv2_rec_infer" cur_rec_model_dir = paddle_dir + "rec/ch/ch_PP-OCRv2_rec_infer"
ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, cls_model_dir=cur_cls_model_dir, det_model_dir=cur_det_model_dir, rec_model_dir=cur_rec_model_dir) ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, cls_model_dir=cur_cls_model_dir, det_model_dir=cur_det_model_dir, rec_model_dir=cur_rec_model_dir)
# paddle_dir = "res/.paddleocr/2.3.0.1/ocr/paddleocr/"
# cur_det_model_dir = paddle_dir + "ch_PP-OCRv4_det_infer"
# cur_rec_model_dir = paddle_dir + "ch_PP-OCRv4_rec_infer"
# ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, det_model_dir=cur_det_model_dir, rec_model_dir=cur_rec_model_dir)
# 正常语速为4字/秒 # 正常语速为4字/秒
normal_speed = 4 normal_speed = 4
table_index = 0
ocr_h_map = {}
def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]: def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]:
print(">>>>>>video path:" + video_path)
video = cv2.VideoCapture(video_path) video = cv2.VideoCapture(video_path)
fps = video.get(cv2.CAP_PROP_FPS) fps = video.get(cv2.CAP_PROP_FPS)
start = int(start_time * fps) start = int(start_time * fps)
...@@ -183,7 +194,9 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float] ...@@ -183,7 +194,9 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
def get_position(video_path: str, start_time: float, rate: float, rate_bottom: float) -> Tuple[float, float]: ocr_positions = []
def get_position(video_path: str, start_time: float, ocr_ranges) -> Tuple[float, float]:
# return (885.0, 989.0) # return (885.0, 989.0)
"""根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别 """根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
...@@ -194,6 +207,11 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f ...@@ -194,6 +207,11 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f
Returns: Returns:
Tuple[float, float]: 字幕在整个画面中的上下边界位置 Tuple[float, float]: 字幕在整个画面中的上下边界位置
""" """
print(">>>>get posti")
print(ocr_ranges)
for i in range(len(ocr_ranges)):
rate = ocr_ranges[i][0]
rate_bottom = ocr_ranges[i][1]
print(">>>>>>>>>>open") print(">>>>>>>>>>open")
print("video_path:", video_path) print("video_path:", video_path)
video = cv2.VideoCapture(video_path) video = cv2.VideoCapture(video_path)
...@@ -210,18 +228,15 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f ...@@ -210,18 +228,15 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f
print(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) print(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate)) up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate))
down = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate_bottom)) down = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate_bottom))
global ocr_positions
print("add positions")
print(up)
ocr_positions.insert(0,[up, down])
# down = up + 20 # down = up + 20
# down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73) # down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
print(up) # print(up)
# print(down) # # print(down)
# return int(up), int(down)
up_rate,down_rate = evaluate_position(video_path,0)
up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)*up_rate)
down = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)*down_rate)
return int(up), int(down)
# TODO 现阶段是主窗体设定字幕的开始位置和结束位置,传入该函数。现在希望做成自动检测的?
# while True: # while True:
# _, img = video.read() # _, img = video.read()
# # print("img:", img) # # print("img:", img)
...@@ -339,7 +354,41 @@ def normalize(text: str) -> str: ...@@ -339,7 +354,41 @@ def normalize(text: str) -> str:
return text return text
def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]: def resize_img(img):
resize_height = 152
height, width = img.shape[:2]
if resize_height > height:
# 定义放大倍数
scale_factor = float(resize_height / height)
# 计算新的宽度和高度
new_width = int(width * scale_factor)
new_height = int(height * scale_factor)
# 使用插值方法进行图像放大
enlarged_image = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
return enlarged_image
else:
# 定义缩小倍数
scale_factor = float(height / resize_height) # 0.5表示缩小为原来的一半大小
# 使用插值方法进行图像缩小
smaller_image = cv2.resize(img, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
return smaller_image
def extract_white_prior(img, threshold=200):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 设定阈值,将非白色部分二值化为黑色
ret, binary_image = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY)
return binary_image
index = 0
t = 140
def detect_subtitle(org_img: np.ndarray, lastSubTitle, last_confidence) -> Tuple[Union[str, None], float]:
"""检测当前画面得到字幕信息 """检测当前画面得到字幕信息
Args: Args:
...@@ -348,12 +397,65 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]: ...@@ -348,12 +397,65 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
Returns: Returns:
Tuple[Union[str, None]]: 字幕信息(没有字幕时返回None)和置信度 Tuple[Union[str, None]]: 字幕信息(没有字幕时返回None)和置信度
""" """
subTitle = ''
ocr_res = ""
# up_b = 276 # up_b = 276
# down_b = 297 # down_b = 297
global ocr_positions
# ocr_positions.append([676, 712])
h = None
global index
for i in range(len(ocr_positions)):
img = copy.deepcopy(org_img)
up_b = ocr_positions[i][0]
down_b = ocr_positions[i][1]
height = down_b - up_b height = down_b - up_b
if len(ocr_positions) == 1:
img = img[int(up_b - height*0.7):int(down_b + height*0.7)] img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
else:
# cropped_img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
cropped_img = img[int(up_b):int(down_b)]
# cropped_img = resize_img(cropped_img)
# x = float(150 / height)
# img_h, img_w = cropped_img.shape[:2]
# img_h = int(img_h * x)
# img_w = int(img_w * x)
# cropped_img = cv2.resize(cropped_img, (img_w, img_h))
# 定义要添加的上下空白的高度
padding_top = height*0.7
padding_bottom = height*0.7
# padding_top = 150
# padding_bottom = 150
# 计算新图像的高度
new_height = cropped_img.shape[0] + padding_top + padding_bottom
# 创建一个新的空白图像
img = np.zeros((int(new_height), cropped_img.shape[1], 3), dtype=np.uint8)
# 将裁剪后的图像放置在新图像中间
start_y = int(padding_top)
end_y = start_y + cropped_img.shape[0]
img[start_y:end_y, :] = cropped_img
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# global index
# cv2.imwrite(f'./cap/cap{index}.png', img)
# img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# # ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# # img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
# t = 230
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# cv2.imwrite(f'./test2.png', img)
# index = index + 1
# img = img[int(up_b - height*0.2):int(down_b + height*0.2)]
# 针对低帧率的视频做图像放大处理 # 针对低帧率的视频做图像放大处理
print(height) print(height)
print(up_b) print(up_b)
...@@ -361,16 +463,26 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]: ...@@ -361,16 +463,26 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
print(img.shape) print(img.shape)
if img.shape[1] < 1000: if img.shape[1] < 1000:
img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5))) img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
cv2.imwrite('./cap.png', img)
# img = extract_white_prior(img)
cv2.imwrite(f'./cap/cap{index}.png', img)
index = index + 1
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>new log" + str(index - 1))
res = ocr.ocr(img, cls=True) res = ocr.ocr(img, cls=True)
print('--------> res', res) print('--------> res', res)
sorted(res, key=lambda text: text[0][0][1]) sorted(res, key=lambda text: text[0][0][1])
sorted(res, key=lambda text: text[0][0][0]) sorted(res, key=lambda text: text[0][0][0])
if len(res) == 0: if len(res) == 0:
return None, 0 continue
# return None, 0, None
possible_txt = [] possible_txt = []
subTitle = ''
conf = 0 conf = 0
print('res --------->', res) print('res --------->', res)
res.sort(key=lambda rect: rect[0][0][0] + rect[0][1][0]) # 按照中心点排序
for x in res: for x in res:
# cv2.imshow("cut", img) # cv2.imshow("cut", img)
# cv2.waitKey(0) # cv2.waitKey(0)
...@@ -384,6 +496,13 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]: ...@@ -384,6 +496,13 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
print("文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}".format(txt, confidence, mid / img.shape[1], gradient, font_size)) print("文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}".format(txt, confidence, mid / img.shape[1], gradient, font_size))
print("字体大小差距: {}", format(height - font_size)) print("字体大小差距: {}", format(height - font_size))
print("高度中心:{}".format((rect[0][1] + rect[1][1])/2/img.shape[0])) print("高度中心:{}".format((rect[0][1] + rect[1][1])/2/img.shape[0]))
# if confidence < 0.95:
# # global t
# print("小于0.95,重新检测,阈值为:" + str(t))
# t = t + 20
# return detect_subtitle(org_img, lastSubTitle, last_confidence)
if h == None:
h = font_size
conf_thred1 = 0.7 conf_thred1 = 0.7
conf_thred2 = 0.85 conf_thred2 = 0.85
# conf_thred1 = 0.1 # conf_thred1 = 0.1
...@@ -411,8 +530,18 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]: ...@@ -411,8 +530,18 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
subTitle = ' '.join(possible_txt) subTitle = ' '.join(possible_txt)
print(subTitle, conf) print(subTitle, conf)
if len(subTitle) > 0: if len(subTitle) > 0:
return subTitle, conf ocr_res = ocr_res + subTitle
return None, 0 if (len(ocr_res)) >0:
print(">>>>>>>>>>>>cur subtitle:" + ocr_res + ",confidence: " + str(confidence) + ",last_confidence: " + str(last_confidence))
# if len(ocr_positions) == 1 and last_confidence != None and confidence != None and confidence < last_confidence:
# ocr_res = lastSubTitle
# confidence = last_confidence
# print(">>>>>>>>>>>>res subtitle:" + ocr_res + ",confidence: " + str(confidence) + ",last_confidence: " + str(last_confidence))
return ocr_res, confidence, conf, h
if check_have_ocr(img):
return "err", None, 0, None
return None, None, 0, None
def process_video(video_path: str, begin: float, end: float, book_path: str, sheet_name: str, state=None, mainWindow: MainWindow=None): def process_video(video_path: str, begin: float, end: float, book_path: str, sheet_name: str, state=None, mainWindow: MainWindow=None):
...@@ -440,6 +569,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she ...@@ -440,6 +569,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
video = cv2.VideoCapture(video_path) video = cv2.VideoCapture(video_path)
fps = video.get(cv2.CAP_PROP_FPS) fps = video.get(cv2.CAP_PROP_FPS)
lastSubTitle = None lastSubTitle = None
last_confidence = None
lastConf = 0 lastConf = 0
# res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析 # res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析
res = [] res = []
...@@ -448,13 +578,16 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she ...@@ -448,13 +578,16 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
end_time = 0 end_time = 0
video.set(cv2.CAP_PROP_POS_MSEC, begin * 1000) video.set(cv2.CAP_PROP_POS_MSEC, begin * 1000)
pre_state = state[0] pre_state = state[0]
ocr_h = None
while True: while True:
_, frame = video.read() _, frame = video.read()
if frame is None: if frame is None:
break break
cnt += 1 cnt += 1
cur_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000 cur_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
# 判断当前帧是否已超限制 # 判断当前帧是否已超限制
# end 主要用来判断是否越界
if cur_time > end: if cur_time > end:
if cur_time - end_time > 1: if cur_time - end_time > 1:
print('--------------------------------------------------') print('--------------------------------------------------')
...@@ -466,7 +599,8 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she ...@@ -466,7 +599,8 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
# 判断当前是否有字幕需要被保存下来 # 判断当前是否有字幕需要被保存下来
if end_time < start_time: if end_time < start_time:
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end, 2), lastSubTitle, '']) # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end, 2), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end, 3), lastSubTitle, '']) add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end, 3), lastSubTitle, ''],ocr_h=ocr_h)
print(">>>>>>subtitle,ocr_h1:" + str(lastSubTitle) + ">>>" + str(ocr_h))
break break
# 每秒取4帧画面左右 # 每秒取4帧画面左右
# TODO 取帧算法优化 # TODO 取帧算法优化
...@@ -479,7 +613,10 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she ...@@ -479,7 +613,10 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
mainWindow.projectContext.nd_process = state[0] mainWindow.projectContext.nd_process = state[0]
mainWindow.projectContext.last_time = cur_time mainWindow.projectContext.last_time = cur_time
subTitle, conf = detect_subtitle(frame) subTitle, confidence, conf, cur_ocr_h = detect_subtitle(frame, lastSubTitle, last_confidence)
if subTitle == "err":
continue
if subTitle is not None: if subTitle is not None:
subTitle = normalize(subTitle) subTitle = normalize(subTitle)
...@@ -488,6 +625,8 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she ...@@ -488,6 +625,8 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
# 第一次找到字幕 # 第一次找到字幕
if lastSubTitle is None and subTitle is not None: if lastSubTitle is None and subTitle is not None:
if cur_ocr_h != None:
ocr_h = cur_ocr_h
start_time = cur_time start_time = cur_time
# 字幕消失 # 字幕消失
...@@ -500,14 +639,17 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she ...@@ -500,14 +639,17 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
(res[-1][0] - res[-2][1]) * normal_speed) (res[-1][0] - res[-2][1]) * normal_speed)
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens]) # write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens]) # add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens]) add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens],ocr_h)
print(start_time, end_time, lastSubTitle) print(start_time, end_time, lastSubTitle)
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, '']) # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, '']) add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
print(">>>>>>subtitle,ocr_h2:" + str(lastSubTitle) + ">>>" + str(ocr_h))
elif lastSubTitle is not None and subTitle is not None: elif lastSubTitle is not None and subTitle is not None:
# 两句话连在一起,但是两句话不一样 # 两句话连在一起,但是两句话不一样
if string_similar(lastSubTitle, subTitle) < 0.6: if string_similar(lastSubTitle, subTitle) < 0.6:
if cur_ocr_h != None:
ocr_h = cur_ocr_h
end_time = cur_time end_time = cur_time
res.append([start_time, end_time, lastSubTitle]) res.append([start_time, end_time, lastSubTitle])
if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1: if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
...@@ -516,24 +658,98 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she ...@@ -516,24 +658,98 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
(res[-1][0] - res[-2][1]) * normal_speed) (res[-1][0] - res[-2][1]) * normal_speed)
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens]) # write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens]) # add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens]) add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens],ocr_h)
print(start_time, end_time, lastSubTitle) print(start_time, end_time, lastSubTitle)
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, '']) # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, '']) add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
print(">>>>>>subtitle,ocr_h3:" + str(lastSubTitle) + ">>>" + str(ocr_h))
start_time = end_time start_time = end_time
else: else:
lastSubTitle = subTitle if conf > lastConf else lastSubTitle lastSubTitle = subTitle if conf > lastConf else lastSubTitle
continue continue
# 当前字幕与上一段字幕不一样 # 当前字幕与上一段字幕不一样
if subTitle != "err":
lastSubTitle = subTitle lastSubTitle = subTitle
last_confidence = confidence
lastConf = conf lastConf = conf
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>end 1")
def add_to_list(mainWindow: MainWindow, element_type: str, li: list): # print(ocr_h_map)
# mainWindow.refresh_tab_slot()
while(mainWindow.refresh_flag == True):
time.sleep(1)
mainWindow.detect_lock = True
try:
process_err_ocr(mainWindow)
finally:
mainWindow.detect_lock = False
# mainWindow.refresh_lock.acquire()
# try:
# process_err_ocr(mainWindow)
# finally:
# mainWindow.refresh_lock.release()
def process_err_ocr(mainWindow):
# if 1==1:
# return
for i in range (len(mainWindow.projectContext.all_elements)):
if mainWindow.projectContext.all_elements[i].subtitle != None and mainWindow.projectContext.all_elements[i].subtitle != "":
ocr_h_map[i] = mainWindow.projectContext.all_elements[i].ocr_h
print(ocr_h_map)
data = list(ocr_h_map.values())
print(">>>>values")
print(ocr_h_map.values())
table_indexs = list(ocr_h_map.keys())
print(table_indexs)
# 计算均值和标准差
mean = np.mean(data)
std_dev = np.std(data)
# 定义阈值(例如,可以选择 2 倍标准差作为阈值)
threshold = 2.7 * std_dev
print(mean)
print(threshold)
rm_list = []
for i, x in enumerate(data):
if abs(x - mean) > threshold:
rm_list.append(i)
print(rm_list)
# while(mainWindow.refresh_tab_timer.isActive()):
# time.sleep(1)
print(len(mainWindow.projectContext.all_elements))
for i in range(len(rm_list)):
try:
print(">>>>>>>>>will rm" + str(table_indexs[rm_list[i]] + 1 - i) + ", subtitle:" + mainWindow.projectContext.all_elements[table_indexs[rm_list[i]] - i].subtitle)
print(table_indexs[rm_list[i]])
mainWindow.del_line_operation_slot(row = table_indexs[rm_list[i]] + 1 - i, show_msg_flag = False)
time.sleep(0.5)
except Exception as e:
print(">>>>>>>>>>>>>>>>>>>>>>del err")
print(e)
# mainWindow.refresh_all_tab_slot()
def add_to_list(mainWindow: MainWindow, element_type: str, li: list, ocr_h : int = None):
# 默认使用配置文件中的语速 # 默认使用配置文件中的语速
speed = mainWindow.projectContext.speaker_speed speed = mainWindow.projectContext.speaker_speed
aside_head_time = float(reverse_time_to_seconds(mainWindow.aside_head_time)) if mainWindow.aside_head_time != None else float(0) aside_head_time = float(reverse_time_to_seconds(mainWindow.aside_head_time)) if mainWindow.aside_head_time != None else float(0)
st_time_sec, ed_time_sec, subtitle, suggest = li st_time_sec, ed_time_sec, subtitle, suggest = li
# global table_index
# if ocr_h != None and element_type == "字幕":
# index = table_index
# ocr_h_map[index] = ocr_h
# print(">>>>>>>>>>>>>>>>>>ocr H map:" + str(index) + ",subtitle:" + subtitle)
# print(ocr_h_map)
print(">>>>>>>>start time:") print(">>>>>>>>start time:")
print(aside_head_time) print(aside_head_time)
print(st_time_sec) print(st_time_sec)
...@@ -541,11 +757,12 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list): ...@@ -541,11 +757,12 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
print(">>>>>>need del") print(">>>>>>need del")
print(st_time_sec) print(st_time_sec)
if not mainWindow.add_head_aside: if not mainWindow.add_head_aside:
new_element = Element('0.00', "", "", "0/100", "",speed) new_element = Element('0.00', "", "", "0/100", "",speed,ocr_h)
mainWindow.projectContext.aside_list.append(new_element) mainWindow.projectContext.aside_list.append(new_element)
mainWindow.projectContext.all_elements.append(mainWindow.projectContext.aside_list[-1]) mainWindow.projectContext.all_elements.append(mainWindow.projectContext.aside_list[-1])
mainWindow.last_aside_index = len(mainWindow.projectContext.all_elements) - 1 mainWindow.last_aside_index = len(mainWindow.projectContext.all_elements) - 1
mainWindow.add_head_aside = True mainWindow.add_head_aside = True
# table_index = table_index + 1
return return
st_time_sec, ed_time_sec = str(st_time_sec), str(ed_time_sec) st_time_sec, ed_time_sec = str(st_time_sec), str(ed_time_sec)
...@@ -553,7 +770,7 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list): ...@@ -553,7 +770,7 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
aside = "" aside = ""
i = len(mainWindow.projectContext.all_elements) i = len(mainWindow.projectContext.all_elements)
if element_type == "字幕": if element_type == "字幕":
new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed) new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed,ocr_h)
new_element.print_self() new_element.print_self()
if mainWindow.last_aside_index != None and mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec == "" and new_element.ed_time_sec != "": if mainWindow.last_aside_index != None and mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec == "" and new_element.ed_time_sec != "":
mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec = new_element.st_time_sec mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec = new_element.st_time_sec
...@@ -561,9 +778,11 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list): ...@@ -561,9 +778,11 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
# print(">>>>>>>>>>>remove short aside") # print(">>>>>>>>>>>remove short aside")
mainWindow.projectContext.aside_list.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index]) mainWindow.projectContext.aside_list.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index])
mainWindow.projectContext.all_elements.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index]) mainWindow.projectContext.all_elements.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index])
# table_index = table_index - 1
mainWindow.last_aside_index = None mainWindow.last_aside_index = None
mainWindow.projectContext.subtitle_list.append(new_element) mainWindow.projectContext.subtitle_list.append(new_element)
mainWindow.projectContext.all_elements.append(mainWindow.projectContext.subtitle_list[-1]) mainWindow.projectContext.all_elements.append(mainWindow.projectContext.subtitle_list[-1])
# table_index = table_index + 1
else: else:
if i == 0: if i == 0:
st_time_sec = "0.01" st_time_sec = "0.01"
...@@ -574,7 +793,7 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list): ...@@ -574,7 +793,7 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
# 因为暂时没有用到ed_time_sec,所以直接赋值空吧 # 因为暂时没有用到ed_time_sec,所以直接赋值空吧
ed_time_sec = "" ed_time_sec = ""
new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed) new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed, ocr_h)
new_element.print_self() new_element.print_self()
if mainWindow.last_aside_index != None and mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec == "" and new_element.ed_time_sec != "": if mainWindow.last_aside_index != None and mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec == "" and new_element.ed_time_sec != "":
mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec = new_element.st_time_sec mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec = new_element.st_time_sec
...@@ -582,15 +801,18 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list): ...@@ -582,15 +801,18 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
# print(">>>>>>>>>>>remove short aside") # print(">>>>>>>>>>>remove short aside")
mainWindow.projectContext.aside_list.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index]) mainWindow.projectContext.aside_list.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index])
mainWindow.projectContext.all_elements.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index]) mainWindow.projectContext.all_elements.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index])
# table_index = table_index - 1
mainWindow.last_aside_index = None mainWindow.last_aside_index = None
new_element.suggest = "0/" + new_element.suggest new_element.suggest = "0/" + new_element.suggest
if (st_time_sec != None and st_time_sec != "" and aside_head_time > float(st_time_sec)): if (st_time_sec != None and st_time_sec != "" and aside_head_time > float(st_time_sec)):
return return
mainWindow.projectContext.aside_list.append(new_element) mainWindow.projectContext.aside_list.append(new_element)
mainWindow.projectContext.all_elements.append(mainWindow.projectContext.aside_list[-1]) mainWindow.projectContext.all_elements.append(mainWindow.projectContext.aside_list[-1])
# table_index = table_index + 1
mainWindow.last_aside_index = len(mainWindow.projectContext.all_elements) - 1 mainWindow.last_aside_index = len(mainWindow.projectContext.all_elements) - 1
# end_time 主要用来判断是否越界
def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time: float, state=None, mainWindow: MainWindow=None): def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time: float, state=None, mainWindow: MainWindow=None):
"""使用ocr检测视频获取字幕并输出旁白推荐 """使用ocr检测视频获取字幕并输出旁白推荐
...@@ -616,7 +838,11 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time ...@@ -616,7 +838,11 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
up_b, down_b = context.caption_boundings[0], context.caption_boundings[1] up_b, down_b = context.caption_boundings[0], context.caption_boundings[1]
else: else:
# 此处start_time + 300是为了节省用户调整视频开始时间的功夫(强行跳过前5分钟) # 此处start_time + 300是为了节省用户调整视频开始时间的功夫(强行跳过前5分钟)
up_b, down_b = get_position(video_path, 0, mainWindow.rate, mainWindow.rate_bottom) # up_b, down_b = get_position(video_path, 0, mainWindow.rate, mainWindow.rate_bottom)
get_position(video_path, 0, mainWindow.ocr_ranges)
print(">>>>>positions:")
global ocr_positions
print(ocr_positions)
context.caption_boundings = [up_b, down_b] context.caption_boundings = [up_b, down_b]
context.detected = True context.detected = True
...@@ -627,8 +853,37 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time ...@@ -627,8 +853,37 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
# print("process the total video at time: ", datetime.datetime.now()) # print("process the total video at time: ", datetime.datetime.now())
process_video(video_path, start_time, end_time, book_name_xlsx, sheet_name_xlsx, state, mainWindow) process_video(video_path, start_time, end_time, book_name_xlsx, sheet_name_xlsx, state, mainWindow)
def check_have_ocr(img):
new_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
t = 230
_, new_img = cv2.threshold(new_img, t,255, cv2.THRESH_BINARY)
if np.any(new_img == 255):
return True
else:
return False
if __name__ == '__main__': if __name__ == '__main__':
# path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4" # path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
path = "C:/Users/Smile/Desktop/accessibility-movie/" path = "C:/Users/Smile/Desktop/accessibility-movie/"
# print("get_pos:", get_position(path, 0)) # print("get_pos:", get_position(path, 0))
# evaluate_position("C:/Users/AIA/Desktop/1/1.mp4", 0)
# img = cv2.imread("./cap/cap879.png")
img = cv2.imread("./cap/cap812.png", cv2.IMREAD_GRAYSCALE)
# img = cv2.equalizeHist(img)
t = 230
_, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# img = resize_img(img)
detect_subtitle(img, None, None)
# img = cv2.equalizeHist(img)
# t = 120
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# # ret, binary_image = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# # binary_image = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
cv2.imwrite(f'./binary2.png', img)
if np.any(img == 255):
print("111111")
else:
print("222222")
...@@ -13,7 +13,7 @@ import os ...@@ -13,7 +13,7 @@ import os
import cv2 import cv2
from PyQt5 import QtWidgets from PyQt5 import QtWidgets
from PyQt5.QtWidgets import QMainWindow, QFileDialog, QTableWidget, QTableWidgetItem, QAbstractItemView, QProgressBar, QLabel, QApplication, QPushButton, QMenu from PyQt5.QtWidgets import QMainWindow, QFileDialog, QTableWidget, QTableWidgetItem, QAbstractItemView, QProgressBar, QLabel, QApplication, QPushButton, QMenu, QDialog, QVBoxLayout
from PyQt5.QtCore import QUrl, Qt, QTimer, QRect, pyqtSignal, QPersistentModelIndex from PyQt5.QtCore import QUrl, Qt, QTimer, QRect, pyqtSignal, QPersistentModelIndex
from PyQt5.QtMultimedia import * from PyQt5.QtMultimedia import *
from PyQt5.QtGui import QIcon from PyQt5.QtGui import QIcon
...@@ -43,6 +43,7 @@ from excel_utils import read_xls,read_xlsx ...@@ -43,6 +43,7 @@ from excel_utils import read_xls,read_xlsx
from ffmpeg_util import adjust_audio_volume from ffmpeg_util import adjust_audio_volume
from PyQt5.QtCore import QThread; from PyQt5.QtCore import QThread;
import threading
# from emit_import_event import emit_import_event # from emit_import_event import emit_import_event
...@@ -71,12 +72,56 @@ class WorkerThread(QThread): ...@@ -71,12 +72,56 @@ class WorkerThread(QThread):
for t in self.main_window.threads: for t in self.main_window.threads:
t.start() t.start()
def refresh(self):
self.main_window.import_process_sign.emit(self.elements)
class ProcessErrThread(QThread):
def __init__(self, main_window):
super().__init__()
self.main_window = main_window
def run(self):
from narratage_detection import process_err
t = RunThread(funcName=process_err,args=(self.main_window, ),name="process_err")
t.setDaemon(True)
t.start()
class AutoCloseDialog(QDialog):
def __init__(self, parent = None):
super(AutoCloseDialog, self).__init__(parent)
self.setWindowFlags(Qt.CustomizeWindowHint | Qt.NoDropShadowWindowHint)
layout = QVBoxLayout()
label = QLabel("字幕边界定位中,请稍后!")
layout.addWidget(label)
self.setLayout(layout)
def refresh(self): class LocalOcrThread(QThread):
self.main_window.import_process_sign.emit(self.elements) def __init__(self, main_window,path):
super().__init__()
self.main_window = main_window
self.path = path
def run(self):
# 在后台执行耗时操作
state = [None]
self.main_window.state = state
state[0] = 0.1
self.main_window.threads = []
t = RunThread(funcName=self.main_window.auto_location_ocr,
args=(self.path, ),
name="auto_location_ocr")
t.setDaemon(True)
self.main_window.threads.append(t)
self.main_window.all_threads.append(t)
for t in self.main_window.threads:
t.start()
def finish(self):
self.main_window.location_ocr_sign.emit()
class CustomDelegate(QtWidgets.QStyledItemDelegate): class CustomDelegate(QtWidgets.QStyledItemDelegate):
def paint(self, painter, option, index): def paint(self, painter, option, index):
...@@ -92,9 +137,11 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -92,9 +137,11 @@ class MainWindow(QMainWindow, Ui_MainWindow):
renew_signal = pyqtSignal(str) renew_signal = pyqtSignal(str)
import_process_sign = pyqtSignal(list) import_process_sign = pyqtSignal(list)
refresh_sign = pyqtSignal() refresh_sign = pyqtSignal()
location_ocr_sign = pyqtSignal()
def __init__(self, project_path): def __init__(self, project_path):
super(MainWindow, self).__init__() super(MainWindow, self).__init__()
self.location_ocr_sign.connect(self.finish_location_ocr)
self.last_aside_index = None self.last_aside_index = None
self.setupUi(self) self.setupUi(self)
self.statusbar.showMessage("hello", 5000) self.statusbar.showMessage("hello", 5000)
...@@ -150,6 +197,8 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -150,6 +197,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
# 所有QTimer集中管理 # 所有QTimer集中管理
self.import_excel_timer = QTimer() self.import_excel_timer = QTimer()
self.import_excel_timer.timeout.connect(self.check_if_import_excel_over) self.import_excel_timer.timeout.connect(self.check_if_import_excel_over)
self.location_ocr_timer = QTimer()
self.location_ocr_timer.timeout.connect(self.check_if_location_ocr)
self.detect_timer = QTimer() self.detect_timer = QTimer()
self.detect_timer.timeout.connect(self.check_if_detect_over_slot) self.detect_timer.timeout.connect(self.check_if_detect_over_slot)
self.synthesis_timer = QTimer() self.synthesis_timer = QTimer()
...@@ -161,7 +210,14 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -161,7 +210,14 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.video_timer.start(1000) # todo 作为参数配置 self.video_timer.start(1000) # todo 作为参数配置
self.refresh_tab_timer = QTimer() self.refresh_tab_timer = QTimer()
self.refresh_tab_timer.timeout.connect(self.refresh_tab_slot) self.refresh_tab_timer.timeout.connect(self.refresh_tab_slot)
self.up_ocr_timer = QTimer()
self.down_ocr_timer = QTimer()
self.up_ocr_timer.timeout.connect(self.up_ocr_timer_func)
self.down_ocr_timer.timeout.connect(self.down_ocr_timer_func)
self.up_ocr_bottom_timer = QTimer()
self.down_ocr_bottom_timer = QTimer()
self.up_ocr_bottom_timer.timeout.connect(self.up_ocr_bottom_timer_func)
self.down_ocr_bottom_timer.timeout.connect(self.down_ocr_bottom_timer_func)
"""状态栏相关空间 """状态栏相关空间
""" """
...@@ -219,18 +275,28 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -219,18 +275,28 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.insert_aside_from_now_btn.clicked.connect( self.insert_aside_from_now_btn.clicked.connect(
self.insert_aside_from_now_slot) self.insert_aside_from_now_slot)
self.insert_aside_from_now_btn.setEnabled(False) self.insert_aside_from_now_btn.setEnabled(False)
self.up_ocr_btn.clicked.connect( self.up_ocr_btn.pressed.connect(
self.up_ocr) self.up_ocr)
self.down_ocr_btn.clicked.connect( self.up_ocr_btn.released.connect(
self.up_ocr_stop)
self.down_ocr_btn.pressed.connect(
self.down_ocr) self.down_ocr)
self.up_ocr_bottom_btn.clicked.connect( self.down_ocr_btn.released.connect(
self.down_ocr_stop)
self.up_ocr_bottom_btn.pressed.connect(
self.up_ocr_bottom) self.up_ocr_bottom)
self.down_ocr_bottom_btn.clicked.connect( self.up_ocr_bottom_btn.released.connect(
self.up_ocr_bottom_stop)
self.down_ocr_bottom_btn.pressed.connect(
self.down_ocr_bottom) self.down_ocr_bottom)
self.down_ocr_bottom_btn.released.connect(
self.down_ocr_bottom_stop)
self.confirm_ocr_btn.clicked.connect(
self.confirm_ocr)
self.confirm_head_aside_btn.clicked.connect( self.confirm_head_aside_btn.clicked.connect(
self.confirm_head_aside) self.confirm_head_aside)
self.detect_btn.clicked.connect( self.detect_btn.clicked.connect(
self.show_detect_dialog) self.show_confirmation_dialog)
"""视频预览相关信息 """视频预览相关信息
...@@ -398,6 +464,10 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -398,6 +464,10 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.sld_video.setFocus() self.sld_video.setFocus()
self.aside_head_time = None self.aside_head_time = None
self.add_head_aside = False self.add_head_aside = False
self.ocr_ranges = []
self.refresh_lock = threading.Lock()
self.detect_lock = False
self.refresh_flag = False
# 打印到log文件中 # 打印到log文件中
t = RunThread(funcName=make_print_to_file, args=os.path.join(os.getcwd(), 'log'), name="logging") t = RunThread(funcName=make_print_to_file, args=os.path.join(os.getcwd(), 'log'), name="logging")
...@@ -407,6 +477,16 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -407,6 +477,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
get_focus_thread.setDaemon(True) get_focus_thread.setDaemon(True)
get_focus_thread.start() get_focus_thread.start()
def finish_location_ocr(self):
self.import_excel_dialog.show_with_msg("字幕定位结束,请检查是否准确,并调整正确")
def show_confirmation_dialog(self):
confirm_box = QtWidgets.QMessageBox.question(self, u'警告', u'确认已经校准OCR范围(只包含中文,不含英文)?',
QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No)
if confirm_box == QtWidgets.QMessageBox.Yes:
self.show_detect_dialog()
else:
print(">>>>>>show_confirmation_dialog")
def getFocus(self): def getFocus(self):
while(True): while(True):
...@@ -496,11 +576,20 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -496,11 +576,20 @@ class MainWindow(QMainWindow, Ui_MainWindow):
""" """
if self.rate == None: if self.rate == None:
self.prompt_dialog.show_with_msg("请选择字幕上边界范围") # self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
return h = self.widget.get_h()
video_h = self.wgt_video.height()
self.rate = float(h-10)/float(video_h)
if self.rate_bottom == None: if self.rate_bottom == None:
self.prompt_dialog.show_with_msg("请选择字幕下边界范围") # self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
h = self.widget_bottom.get_h()
video_h = self.wgt_video.height()
self.rate_bottom = float(h-6)/float(video_h)
if not self.check_ocr_rate():
self.prompt_dialog.show_with_msg("字幕上边界不能低于下边界")
return return
if len(self.ocr_ranges) == 0:
self.ocr_ranges.append([self.rate, self.rate_bottom])
self.detect_dialog.init_self() self.detect_dialog.init_self()
self.detect_dialog.show() self.detect_dialog.show()
...@@ -601,9 +690,39 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -601,9 +690,39 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.action_operate.setEnabled(True) self.action_operate.setEnabled(True)
self.action_insert_aside_from_now.setEnabled(True) self.action_insert_aside_from_now.setEnabled(True)
self.insert_aside_from_now_btn.setEnabled(True) self.insert_aside_from_now_btn.setEnabled(True)
# self.import_excel_dialog.show_with_msg("正在自动定位字幕边界,请稍后!")
confirm_box = QtWidgets.QMessageBox.question(self, u'警告', u'是否需要自动定位字幕边界?',
QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No)
if confirm_box == QtWidgets.QMessageBox.Yes:
self.location_ocr_thread = LocalOcrThread(self, path)
self.location_ocr_thread.start()
self.location_ocr_timer.start(1000)
# self.auto_close_dialog = AutoCloseDialog()
# self.auto_close_dialog.exec_()
else:
print(">>>>>>>>>>>>>play_video")
def up_ocr(self):
self.widget.change_painter_flag(True)
def auto_location_ocr(self, path):
from detect_with_ocr import evaluate_position
print(">>>>>>>>>>>>>>>>>>>>>v_path" + path.path()[1:])
y1,y2 = evaluate_position(path.path()[1:], 0)
video_h = self.wgt_video.height()
self.widget.setY(int(video_h * y1) + 6)
time.sleep(1)
self.widget_bottom.setY(int(video_h * y2) + 10)
print("y1:%d,y2:%d" %(y1,y2))
def check_ocr_rate(self):
if self.rate > self.rate_bottom:
return False
else:
return True
def up_ocr_timer_func(self):
# self.widget.change_painter_flag(True)
h = self.widget.up(3) h = self.widget.up(3)
video_h = self.wgt_video.height() video_h = self.wgt_video.height()
self.rate = float(h-10)/float(video_h) self.rate = float(h-10)/float(video_h)
...@@ -612,9 +731,8 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -612,9 +731,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
print(self.wgt_video.height()) print(self.wgt_video.height())
print(">>>>>>>>>rate" + str(self.rate)) print(">>>>>>>>>rate" + str(self.rate))
def down_ocr_timer_func(self):
def down_ocr(self): # self.widget.change_painter_flag(True)
self.widget.change_painter_flag(True)
h = self.widget.down(3) h = self.widget.down(3)
video_h = self.wgt_video.height() video_h = self.wgt_video.height()
self.rate = float(h-10)/float(video_h) self.rate = float(h-10)/float(video_h)
...@@ -622,19 +740,70 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -622,19 +740,70 @@ class MainWindow(QMainWindow, Ui_MainWindow):
print(self.wgt_video.height()) print(self.wgt_video.height())
print(">>>>>>>>>rate" + str(self.rate)) print(">>>>>>>>>rate" + str(self.rate))
def up_ocr_bottom(self): def up_ocr(self):
self.user_editing_content = True
self.up_ocr_timer.start(50)
# self.widget.change_painter_flag(True)
# h = self.widget.up(3)
# video_h = self.wgt_video.height()
# self.rate = float(h-10)/float(video_h)
# print(">>>>>video_h: "+str(video_h))
# print(">>>>>up h:" + str(h))
# print(self.wgt_video.height())
# print(">>>>>>>>>rate" + str(self.rate))
def up_ocr_stop(self):
self.user_editing_content = False
self.up_ocr_timer.stop()
def down_ocr_stop(self):
self.user_editing_content = False
self.down_ocr_timer.stop()
def down_ocr(self):
self.user_editing_content = True
self.down_ocr_timer.start(50)
# self.widget.change_painter_flag(True)
# h = self.widget.down(3)
# video_h = self.wgt_video.height()
# self.rate = float(h-10)/float(video_h)
# print(">>>>>down h:" + str(h))
# print(self.wgt_video.height())
# print(">>>>>>>>>rate" + str(self.rate))
def up_ocr_bottom_timer_func(self):
self.widget_bottom.change_painter_flag(True) self.widget_bottom.change_painter_flag(True)
h = self.widget_bottom.up(3) h = self.widget_bottom.up(3)
video_h = self.wgt_video.height() video_h = self.wgt_video.height()
self.rate_bottom = float(h-6)/float(video_h) self.rate_bottom = float(h-6)/float(video_h)
def down_ocr_bottom_timer_func(self):
def down_ocr_bottom(self):
self.widget_bottom.change_painter_flag(True) self.widget_bottom.change_painter_flag(True)
h = self.widget_bottom.down(3) h = self.widget_bottom.down(3)
video_h = self.wgt_video.height() video_h = self.wgt_video.height()
self.rate_bottom = float(h-6)/float(video_h) self.rate_bottom = float(h-6)/float(video_h)
def up_ocr_bottom_stop(self):
self.user_editing_content = False
self.up_ocr_bottom_timer.stop()
def down_ocr_bottom_stop(self):
self.user_editing_content = False
self.down_ocr_bottom_timer.stop()
def up_ocr_bottom(self):
self.user_editing_content = True
self.up_ocr_bottom_timer.start(50)
# self.widget_bottom.change_painter_flag(True)
# h = self.widget_bottom.up(3)
# video_h = self.wgt_video.height()
# self.rate_bottom = float(h-6)/float(video_h)
def down_ocr_bottom(self):
self.user_editing_content = True
self.down_ocr_bottom_timer.start(50)
# self.widget_bottom.change_painter_flag(True)
# h = self.widget_bottom.down(3)
# video_h = self.wgt_video.height()
# self.rate_bottom = float(h-6)/float(video_h)
def refresh_on_import(self): def refresh_on_import(self):
print(">>>>>>>refresh in") print(">>>>>>>refresh in")
self.refresh_tab_slot() self.refresh_tab_slot()
...@@ -877,6 +1046,16 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -877,6 +1046,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
""" """
self.check_if_over("旁白导入") self.check_if_over("旁白导入")
def check_if_location_ocr(self):
self.check_if_over("字幕定位")
alive = True
for t in self.threads:
alive = alive and t.is_alive()
if not alive:
self.location_ocr_timer.stop()
# self.auto_close_dialog.close()
self.threads = []
# type = 检测 或 合成 或 导出 # type = 检测 或 合成 或 导出
def check_if_over(self, type: str): def check_if_over(self, type: str):
"""确认传入的待检测任务是否完成 """确认传入的待检测任务是否完成
...@@ -904,9 +1083,16 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -904,9 +1083,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
elif type == "检测": elif type == "检测":
self.detect_timer.stop() self.detect_timer.stop()
self.refresh_tab_timer.stop() self.refresh_tab_timer.stop()
# t = ProcessErrThread(self)
# t.start()
# from narratage_detection import process_err
# process_err(self)
elif type == "旁白导入": elif type == "旁白导入":
self.import_excel_timer.stop() self.import_excel_timer.stop()
# self.refresh_tab_timer.stop() # self.refresh_tab_timer.stop()
elif type == "字幕定位":
self.location_ocr_timer.stop()
else: else:
self.export_timer.stop() self.export_timer.stop()
...@@ -921,6 +1107,7 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -921,6 +1107,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.progressBar.setValue(100) self.progressBar.setValue(100)
self.progressLabel.setText(f"100%") self.progressLabel.setText(f"100%")
self.projectContext.nd_process = 1 self.projectContext.nd_process = 1
self.threads = []
def deal_synthesis_callback_slot(self, threads, state): def deal_synthesis_callback_slot(self, threads, state):
"""实现旁白音频合成任务状态在界面中的实时显示,更新界面中的对应变量,每5s更新一次任务状态 """实现旁白音频合成任务状态在界面中的实时显示,更新界面中的对应变量,每5s更新一次任务状态
...@@ -1305,9 +1492,9 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -1305,9 +1492,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
item = QTableWidgetItem(text) item = QTableWidgetItem(text)
item.setTextAlignment(Qt.AlignCenter) item.setTextAlignment(Qt.AlignCenter)
# 设置为不可编辑 # 设置为不可编辑
if self.checkIfTableItemCanChange(table, idx, j) == False: # if self.checkIfTableItemCanChange(table, idx, j) == False:
# item.setFlags(Qt.ItemIsEnabled) # item.setFlags(Qt.ItemIsEnabled)
print(1) # print(">>>>>>>>>setElememtToTable")
table.setItem(idx, j, item) table.setItem(idx, j, item)
# 只有Content页的字幕列和 Aside页的字幕列 可编辑 # 只有Content页的字幕列和 Aside页的字幕列 可编辑
...@@ -1776,7 +1963,9 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -1776,7 +1963,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
pre_item = self.all_tableWidget.item(row, col - 1) pre_item = self.all_tableWidget.item(row, col - 1)
suggest = pre_item.text() suggest = pre_item.text()
if suggest != None and suggest != "": print(">>>>>>>>suggest:" + suggest)
if col == constant.Content.AsideColumnNumber and suggest != None and suggest != "":
arrays = suggest.split("/") arrays = suggest.split("/")
if len(arrays) == 2: if len(arrays) == 2:
suggest = str(len(text)) + "/" + arrays[1] suggest = str(len(text)) + "/" + arrays[1]
...@@ -1807,6 +1996,8 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -1807,6 +1996,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
# self.all_tableWidget.setItem( # self.all_tableWidget.setItem(
# int(idx), constant.Content.SpeedColumnNumber, QTableWidgetItem(text)) # int(idx), constant.Content.SpeedColumnNumber, QTableWidgetItem(text))
self.projectContext.refresh_speed(row, text) self.projectContext.refresh_speed(row, text)
elif col == constant.Content.SubtitleColumnNumber:
self.projectContext.refresh_subtitle(row, text)
# self.all_tableWidget_idx = int(row) # self.all_tableWidget_idx = int(row)
# self.set_table_to_window(False) # self.set_table_to_window(False)
...@@ -1942,8 +2133,13 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -1942,8 +2133,13 @@ class MainWindow(QMainWindow, Ui_MainWindow):
将表格内容更新至界面中,并保存当前工程内容 将表格内容更新至界面中,并保存当前工程内容
""" """
if not self.detect_lock:
self.refresh_flag = True
try:
self.set_table_to_window(need_refresh_all=False) self.set_table_to_window(need_refresh_all=False)
self.projectContext.save_project(False) self.projectContext.save_project(False)
finally:
self.refresh_flag = False
def refresh_all_tab_slot(self): def refresh_all_tab_slot(self):
"""刷新整个表格 """刷新整个表格
...@@ -2274,7 +2470,7 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -2274,7 +2470,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.prompt_dialog.show_with_msg("操作成功!!请查看变化") self.prompt_dialog.show_with_msg("操作成功!!请查看变化")
# 只有row起作用 # 只有row起作用
def del_line_operation_slot(self, row: int, start_time="0", end_time="0", subtitle="", suggest="", aside="", speed="", refresh_flag = True): def del_line_operation_slot(self, row: int, start_time="0", end_time="0", subtitle="", suggest="", aside="", speed="", refresh_flag = True, show_msg_flag = True):
"""删除一行 """删除一行
Args: Args:
...@@ -2313,6 +2509,7 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -2313,6 +2509,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.projectContext.all_elements.pop(int(row)-1) self.projectContext.all_elements.pop(int(row)-1)
if refresh_flag: if refresh_flag:
self.refresh_tab_slot() self.refresh_tab_slot()
if show_msg_flag:
self.prompt_dialog.show_with_msg("操作成功!!请查看变化") self.prompt_dialog.show_with_msg("操作成功!!请查看变化")
def pb_item_changed_by_double_clicked_slot(self, item): def pb_item_changed_by_double_clicked_slot(self, item):
...@@ -2418,4 +2615,17 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -2418,4 +2615,17 @@ class MainWindow(QMainWindow, Ui_MainWindow):
str(round(video_position/1000, 2))) str(round(video_position/1000, 2)))
self.import_excel_dialog.show_with_msg("定位成功:" + self.aside_head_time) self.import_excel_dialog.show_with_msg("定位成功:" + self.aside_head_time)
def confirm_ocr(self):
if self.rate == None:
self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
return
if self.rate_bottom == None:
self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
return
if not self.check_ocr_rate():
self.prompt_dialog.show_with_msg("字幕上边界不能低于下边界")
return
self.ocr_ranges.append([self.rate, self.rate_bottom])
self.prompt_dialog.show_with_msg(f"操作成功,如果电影存在多行字幕,请移动字幕上下边界,再次点击该按钮确认,目前已存在{len(self.ocr_ranges)}组字幕边界")
\ No newline at end of file
...@@ -18,6 +18,9 @@ class MyWidget(QWidget): ...@@ -18,6 +18,9 @@ class MyWidget(QWidget):
# def __init__(self, parent=None): # def __init__(self, parent=None):
# super(QWidget, self).__init__(parent) # super(QWidget, self).__init__(parent)
# self.painter_flag = True # self.painter_flag = True
def __init__(self, parent=None, color = Qt.red):
super(QWidget, self).__init__(parent)
self.color = color
def paintEvent(self, event): def paintEvent(self, event):
# print(">>>>>>>>into paint") # print(">>>>>>>>into paint")
...@@ -26,7 +29,7 @@ class MyWidget(QWidget): ...@@ -26,7 +29,7 @@ class MyWidget(QWidget):
lock.acquire() lock.acquire()
painter = QPainter(self) painter = QPainter(self)
painter.setRenderHint(QPainter.Antialiasing) # Optional: Enable anti-aliasing painter.setRenderHint(QPainter.Antialiasing) # Optional: Enable anti-aliasing
painter.setPen(QPen(Qt.red, 2, Qt.SolidLine)) painter.setPen(QPen(self.color, 2, Qt.SolidLine))
painter.drawLine(0, 1, 800, 1) painter.drawLine(0, 1, 800, 1)
painter.end() painter.end()
lock.release() lock.release()
...@@ -55,8 +58,17 @@ class MyWidget(QWidget): ...@@ -55,8 +58,17 @@ class MyWidget(QWidget):
# painter.setPen(QPen(Qt.red, 2, Qt.SolidLine)) # painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
# painter.drawLine(0, 1, 800, 1) # painter.drawLine(0, 1, 800, 1)
# painter.end() # painter.end()
print(">>>>>cur_y : " + str(self.y()))
return self.y() return self.y()
def setY(self, h):
print(">>>>>cur_y2 : " + str(self.y()))
self.move(0, h)
def get_h(self):
return self.y()
def down(self, mov_len): def down(self, mov_len):
print(">>>>>>>>>>>down" + str(mov_len)) print(">>>>>>>>>>>down" + str(mov_len))
self.move(0,self.y() + mov_len) self.move(0,self.y() + mov_len)
...@@ -314,6 +326,8 @@ class Ui_MainWindow(object): ...@@ -314,6 +326,8 @@ class Ui_MainWindow(object):
self.horizontalLayout_7.setObjectName("horizontalLayout_7") self.horizontalLayout_7.setObjectName("horizontalLayout_7")
self.up_ocr_btn = QtWidgets.QPushButton(self.centralwidget) self.up_ocr_btn = QtWidgets.QPushButton(self.centralwidget)
self.up_ocr_btn.setObjectName("up_ocr_btn") self.up_ocr_btn.setObjectName("up_ocr_btn")
# self.up_ocr_btn.setAutoRepeatDelay(False)
# self.up_ocr_btn.setAutoRepeat
self.horizontalLayout_7.addWidget(self.up_ocr_btn) self.horizontalLayout_7.addWidget(self.up_ocr_btn)
self.down_ocr_btn = QtWidgets.QPushButton(self.centralwidget) self.down_ocr_btn = QtWidgets.QPushButton(self.centralwidget)
self.down_ocr_btn.setObjectName("down_ocr_btn") self.down_ocr_btn.setObjectName("down_ocr_btn")
...@@ -324,10 +338,14 @@ class Ui_MainWindow(object): ...@@ -324,10 +338,14 @@ class Ui_MainWindow(object):
self.down_ocr_bottom_btn = QtWidgets.QPushButton(self.centralwidget) self.down_ocr_bottom_btn = QtWidgets.QPushButton(self.centralwidget)
self.down_ocr_bottom_btn.setObjectName("down_ocr_bottom_btn") self.down_ocr_bottom_btn.setObjectName("down_ocr_bottom_btn")
self.horizontalLayout_7.addWidget(self.down_ocr_bottom_btn) self.horizontalLayout_7.addWidget(self.down_ocr_bottom_btn)
self.confirm_ocr_btn = QtWidgets.QPushButton(self.centralwidget)
self.confirm_ocr_btn.setObjectName("confirm_ocr_btn")
self.horizontalLayout_7.addWidget(self.confirm_ocr_btn)
self.confirm_head_aside_btn = QtWidgets.QPushButton(self.centralwidget) self.confirm_head_aside_btn = QtWidgets.QPushButton(self.centralwidget)
self.confirm_head_aside_btn.setObjectName("confirm_head_aside_btn") self.confirm_head_aside_btn.setObjectName("confirm_head_aside_btn")
self.horizontalLayout_7.addWidget(self.confirm_head_aside_btn) self.horizontalLayout_7.addWidget(self.confirm_head_aside_btn)
self.horizontalLayout_8 = QtWidgets.QHBoxLayout() self.horizontalLayout_8 = QtWidgets.QHBoxLayout()
self.horizontalLayout_8.setObjectName("horizontalLayout_8") self.horizontalLayout_8.setObjectName("horizontalLayout_8")
self.detect_btn = QtWidgets.QPushButton(self.centralwidget) self.detect_btn = QtWidgets.QPushButton(self.centralwidget)
...@@ -523,7 +541,8 @@ class Ui_MainWindow(object): ...@@ -523,7 +541,8 @@ class Ui_MainWindow(object):
self.action_redo = QtWidgets.QAction(MainWindow) self.action_redo = QtWidgets.QAction(MainWindow)
# self.action_redo.setFont(font) # self.action_redo.setFont(font)
self.action_redo.setObjectName("action_redo") self.action_redo.setObjectName("action_redo")
self.action_3 = QtWidgets.QAction("旁白区间检测",self,triggered=self.show_detect_dialog) # self.action_3 = QtWidgets.QAction("旁白区间检测",self,triggered=self.show_detect_dialog)
self.action_3 = QtWidgets.QAction("旁白区间检测",self,triggered=self.show_confirmation_dialog)
self.action_3.setEnabled(False) self.action_3.setEnabled(False)
self.action_4 = QtWidgets.QAction("旁白音频合成",self,triggered=self.show_assemble_dialog) self.action_4 = QtWidgets.QAction("旁白音频合成",self,triggered=self.show_assemble_dialog)
self.action_4.setEnabled(False) self.action_4.setEnabled(False)
...@@ -539,7 +558,6 @@ class Ui_MainWindow(object): ...@@ -539,7 +558,6 @@ class Ui_MainWindow(object):
self.action_9.setEnabled(True) self.action_9.setEnabled(True)
self.action_10 = QtWidgets.QAction("片头旁白定位",self,triggered=self.confirm_head_aside) self.action_10 = QtWidgets.QAction("片头旁白定位",self,triggered=self.confirm_head_aside)
self.action_10.setEnabled(True) self.action_10.setEnabled(True)
# self.action_3.setObjectName("action_3") # self.action_3.setObjectName("action_3")
# self.action_4 = QtWidgets.QAction(MainWindow) # self.action_4 = QtWidgets.QAction(MainWindow)
# self.action_4.setObjectName("action_4") # self.action_4.setObjectName("action_4")
...@@ -604,6 +622,7 @@ class Ui_MainWindow(object): ...@@ -604,6 +622,7 @@ class Ui_MainWindow(object):
self.up_ocr_bottom_btn.setText(_translate("MainWindow", "字幕下边界上移")) self.up_ocr_bottom_btn.setText(_translate("MainWindow", "字幕下边界上移"))
self.down_ocr_bottom_btn.setText(_translate("MainWindow", "字幕下边界下移")) self.down_ocr_bottom_btn.setText(_translate("MainWindow", "字幕下边界下移"))
self.confirm_head_aside_btn.setText(_translate("MainWindow", "片头旁白定位")) self.confirm_head_aside_btn.setText(_translate("MainWindow", "片头旁白定位"))
self.confirm_ocr_btn.setText(_translate("MainWindow", "字幕边界确认"))
self.detect_btn.setText(_translate("MainWindow", "旁白区间检测")) self.detect_btn.setText(_translate("MainWindow", "旁白区间检测"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.all_tab), _translate("MainWindow", "字幕旁白")) self.tabWidget.setTabText(self.tabWidget.indexOf(self.all_tab), _translate("MainWindow", "字幕旁白"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.zm_tab), _translate("MainWindow", "字幕")) self.tabWidget.setTabText(self.tabWidget.indexOf(self.zm_tab), _translate("MainWindow", "字幕"))
......
...@@ -77,13 +77,14 @@ class OperateRecord: ...@@ -77,13 +77,14 @@ class OperateRecord:
# 每一行的具体信息,"起始时间", "终止时间", "字幕", '建议', '解说脚本' # 每一行的具体信息,"起始时间", "终止时间", "字幕", '建议', '解说脚本'
class Element: class Element:
def __init__(self, st_time_sec: str, ed_time_sec: str, subtitle, suggest, aside, speed = "1.00(4字/秒)"): def __init__(self, st_time_sec: str, ed_time_sec: str, subtitle, suggest, aside, speed = "1.00(4字/秒)",ocr_h = None):
self.st_time_sec = st_time_sec self.st_time_sec = st_time_sec
self.ed_time_sec = ed_time_sec self.ed_time_sec = ed_time_sec
self.subtitle = subtitle self.subtitle = subtitle
self.suggest = suggest self.suggest = suggest
self.aside = aside self.aside = aside
self.speed = speed self.speed = speed
self.ocr_h = ocr_h
# 判断当前元素是否是字幕 # 判断当前元素是否是字幕
def is_subtitle(self): def is_subtitle(self):
...@@ -263,6 +264,11 @@ class ProjectContext: ...@@ -263,6 +264,11 @@ class ProjectContext:
if not self.initial_ing: if not self.initial_ing:
save_excel_to_path(self.all_elements, self.excel_path, self.write_header, self.excel_sheet_name) save_excel_to_path(self.all_elements, self.excel_path, self.write_header, self.excel_sheet_name)
def refresh_subtitle(self, row, subtitle: str):
self.all_elements[int(row)].subtitle = subtitle
if not self.initial_ing:
save_excel_to_path(self.all_elements, self.excel_path, self.write_header, self.excel_sheet_name)
def refresh_speed(self, row, speed: str)->None: def refresh_speed(self, row, speed: str)->None:
self.all_elements[int(row)].speed = speed self.all_elements[int(row)].speed = speed
if not self.initial_ing: if not self.initial_ing:
...@@ -307,7 +313,7 @@ class ProjectContext: ...@@ -307,7 +313,7 @@ class ProjectContext:
if d["终止时间"][i] is None: if d["终止时间"][i] is None:
# 如果是最后一条 # 如果是最后一条
if i == len(d["字幕"]) - 1: if i == len(d["字幕"]) - 1:
print(1) print(">>>>>>>>>load_excel_from_path")
# ed_time_sec = "360000" if self.duration == 0 else self.duration # todo 默认最大时长是100h # ed_time_sec = "360000" if self.duration == 0 else self.duration # todo 默认最大时长是100h
else: else:
ed_time_sec = "%.2f"%(float(d["起始时间"][i + 1]) - 0.01) ed_time_sec = "%.2f"%(float(d["起始时间"][i + 1]) - 0.01)
...@@ -428,6 +434,8 @@ def save_excel_to_path(all_element, new_excel_path, header, excel_sheet_name): ...@@ -428,6 +434,8 @@ def save_excel_to_path(all_element, new_excel_path, header, excel_sheet_name):
backup_path = os.path.dirname(new_excel_path) + "/tmp_"+str(time.time())+".xlsx" backup_path = os.path.dirname(new_excel_path) + "/tmp_"+str(time.time())+".xlsx"
# os.remove(new_excel_path) # os.remove(new_excel_path)
os.rename(new_excel_path, backup_path) os.rename(new_excel_path, backup_path)
# print(">>>>>>new_excel_path:" + new_excel_path)
# print(">>>>>>>>>>backup_path:" + backup_path)
try: try:
create_sheet(new_excel_path, "旁白插入位置建议", [header]) create_sheet(new_excel_path, "旁白插入位置建议", [header])
# for element in all_element: # for element in all_element:
......
...@@ -63,6 +63,13 @@ def detect(video_path: str, start_time: float, end_time: float, book_path: str, ...@@ -63,6 +63,13 @@ def detect(video_path: str, start_time: float, end_time: float, book_path: str,
from detect_with_ocr import detect_with_ocr from detect_with_ocr import detect_with_ocr
detect_with_ocr(video_path, book_path, start_time, end_time, state, mainWindow) detect_with_ocr(video_path, book_path, start_time, end_time, state, mainWindow)
def process_err(mainWindow: MainWindow=None):
from detect_with_ocr import process_err_ocr
try:
process_err_ocr(mainWindow)
except Exception as e:
print("process_err err")
print(e)
if __name__ == '__main__': if __name__ == '__main__':
# 定义参数 # 定义参数
......
import re import re
import sys
import csv import csv
import jieba
import argparse import argparse
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity from sklearn.metrics.pairwise import cosine_similarity
from difflib import SequenceMatcher from difflib import SequenceMatcher
title = ['起始时间(转换后)', '终止时间(转换后)', '字幕'] from tqdm import tqdm
# title = ['起始时间(转换后)', '终止时间(转换后)', '字幕']
title = ['起始时间', '终止时间', '字幕']
def init(): def init():
# 获取中文停用词列表 # 获取中文停用词列表
...@@ -22,13 +27,32 @@ def change_to_second(time_str): ...@@ -22,13 +27,32 @@ def change_to_second(time_str):
time_obj.second + time_obj.microsecond / 1000000 time_obj.second + time_obj.microsecond / 1000000
return seconds return seconds
# 将中文句子划分,并且防止划分全部为停用词
def words_segment(str):
tmp = ','.join(jieba.cut(str))
# 将分割的句子差分成单词,也不进行划分
if is_all_stopwords(tmp) or len(list(jieba.cut(str))) == len(str) :
return str
return tmp
# 判断是否从中英文字幕中提取中文
def extract_info(str, has_english=False):
if not has_english:
return str
chinese_text = re.findall(r'[\u4e00-\u9fff]+', str)
return ' '.join(chinese_text)
# 计算字幕的相似度 # 计算字幕的相似度
def calculate_similarity(str1, str2, method='cosine'): def calculate_similarity(str1, str2, method='cosine'):
if method == 'cosine': if method == 'cosine':
tfidf_vectorizer = TfidfVectorizer() str1, str2 = words_segment(str1), words_segment(str2)
tfidf_matrix = tfidf_vectorizer.fit_transform([str1, str2]) tfidf_vectorizer = TfidfVectorizer(min_df=1)
tfidf_matrix = tfidf_vectorizer.fit_transform([str1, str2]) # shape=[2, N]
# print(np.array(tfidf_matrix.toarray()).shape, type(tfidf_matrix), tfidf_matrix.toarray())
similarity_matrix = cosine_similarity(tfidf_matrix) similarity_matrix = cosine_similarity(tfidf_matrix)
return similarity_matrix[0][1] return similarity_matrix[0][1]
elif method == 'distance':
return -String_edit_distance(str1, str2)
else : else :
return SequenceMatcher(None, str1, str2).ratio() return SequenceMatcher(None, str1, str2).ratio()
...@@ -37,23 +61,62 @@ def calculate_time_difference(time1, time2): ...@@ -37,23 +61,62 @@ def calculate_time_difference(time1, time2):
return abs(time2 - time1) return abs(time2 - time1)
def calculate_weight(x, y): def calculate_weight(x, y):
# weight = e^(-alpha * time_diff) # # weight = e^(-alpha * time_diff)
# 相差1s的系数为0.9 # # 相差1s的系数为0.9
alpha = 0.11 # alpha = 0.11
return 1 / (alpha * (x + y) + 1) # return 1 / (alpha * (x + y) + 1)
return 1.0 # 目前不考虑时间系数
# 检查句子中的每个单词是否都是停用词 # 检查句子中的每个单词是否都是停用词
def is_all_stopwords(sentence): def is_all_stopwords(sentence):
sentence = sentence.replace(' ', '')
return all(word in stop_words for word in sentence) return all(word in stop_words for word in sentence)
# 编辑距离算法 有问题!!!!!!
def String_edit_distance(str1, str2):
n, m = len(str1), len(str2)
dp = [[0 for _ in range(m+1)] for _ in range(n+1)]
for i in range(n+1):
dp[i][0] = i
for i in range(m+1):
dp[0][i] = i
dp[0][0] = 0
for i in range(1, n+1):
for j in range(1, m+1):
if str1[i-1] == str2[j-1]:
dp[i][j] = dp[i-1][j-1]
else :
dp[i][j] = min(dp[i-1][j-1], min(dp[i][j-1], dp[i-1][j])) + 1
# print(dp[n][m], n, m)
return 1.0 * dp[n][m] / max(n, m)
### 如果其中有-符号,可能在用excel打开时自动添加=变成公式,读取的时候没问题 ### 如果其中有-符号,可能在用excel打开时自动添加=变成公式,读取的时候没问题
def read_srt_to_csv(path_srt, path_output): def read_srt_to_csv(path_srt, path_output):
try:
with open(path_srt, 'r', encoding='utf-8-sig') as f: with open(path_srt, 'r', encoding='utf-8-sig') as f:
srt_content = f.read() # str srt_content = f.read() # str
except UnicodeDecodeError:
print(f"编码错误,已经切换到utf-16编码")
try:
with open(path_srt, 'r', encoding='utf-16') as f:
srt_content = f.read() # str
except:
print(f"请选择utf-8或utf-16编码形式的srt文件")
sys.exit(1)
# 使用正则表达式匹配时间码和字幕内容 # 使用正则表达式匹配时间码和字幕内容
pattern = re.compile(r'(\d+)\n([\d:,]+) --> ([\d:,]+)\n(.+?)(?=\n\d+\n|$)', re.DOTALL) pattern = re.compile(r'(\d+)\n([\d:,]+) --> ([\d:,]+)\n(.+?)(?=\n\d+\n|$)', re.DOTALL)
matches = pattern.findall(srt_content) matches = pattern.findall(srt_content)
has_english = []
for i in range(5):
idx = np.random.randint(len(matches))
pattern = re.compile(r'[a-zA-Z]')
has_english.append(bool(pattern.search(matches[idx][3])))
has_english = all(has_english)
print('!'*20, has_english)
# 写入 csv 文件 # 写入 csv 文件
with open(path_output, 'w', newline='', encoding='utf-8') as f: with open(path_output, 'w', newline='', encoding='utf-8') as f:
csv_writer = csv.writer(f) csv_writer = csv.writer(f)
...@@ -61,7 +124,7 @@ def read_srt_to_csv(path_srt, path_output): ...@@ -61,7 +124,7 @@ def read_srt_to_csv(path_srt, path_output):
for _, start, end, subtitle in matches: # 都是str格式 for _, start, end, subtitle in matches: # 都是str格式
subtitle = re.sub(r'\{[^}]*\}', '', subtitle) # 将srt文件前的加粗等格式去掉 subtitle = re.sub(r'\{[^}]*\}', '', subtitle) # 将srt文件前的加粗等格式去掉
csv_writer.writerow([start, end, subtitle.strip()]) csv_writer.writerow([start, end, extract_info(subtitle.strip(), has_english)])
def read_from_xlsx(path_xlsx='output.xlsx', path_output='deal.csv'): def read_from_xlsx(path_xlsx='output.xlsx', path_output='deal.csv'):
data = pd.read_excel(path_xlsx) data = pd.read_excel(path_xlsx)
...@@ -70,20 +133,19 @@ def read_from_xlsx(path_xlsx='output.xlsx', path_output='deal.csv'): ...@@ -70,20 +133,19 @@ def read_from_xlsx(path_xlsx='output.xlsx', path_output='deal.csv'):
csv_writer.writerow(title) csv_writer.writerow(title)
for _, data1 in data.iterrows(): for _, data1 in data.iterrows():
start, end, subtitle = data1[1], data1[3], data1[4] # print(data1[1])
start, end, subtitle = data1[0], data1[1], data1[2]
if isinstance(subtitle, float) and np.isnan(subtitle): if isinstance(subtitle, float) and np.isnan(subtitle):
continue continue
# 与srt文件格式同步 # 与srt文件格式同步
start = start.replace('.', ',') start = start.replace('.', ',')
end = end.replace('.', ',') end = end.replace('.', ',')
# print(start, end, subtitle,)
# print(type(start), type(end), type(subtitle))
csv_writer.writerow([start, end, subtitle.strip()]) csv_writer.writerow([start, end, subtitle.strip()])
### 对于srt中的字幕计算相似性度。从ocr中找到时间戳满足<=time_t的字幕, ### 对于srt中的字幕计算相似性度。从ocr中找到时间戳满足<=time_t的字幕,
### 然后计算字幕间的相似度,取一个最大的。字幕从start和end都匹配一遍 ### 然后计算字幕间的相似度,取一个最大的。字幕从start和end都匹配一遍
# time_threshold设置阈值,用于判断时间差是否可接受 # time_threshold设置阈值,用于判断时间差是否可接受
def measure_score(path_srt, path_ocr, time_threshold=5.0, method='cosine'): def measure_score(path_srt, path_ocr, time_threshold=5.0, time_threshold_re=False, method='cosine'):
data_srt, data_ocr = [], [] data_srt, data_ocr = [], []
with open(path_srt, 'r', encoding='utf-8') as file: with open(path_srt, 'r', encoding='utf-8') as file:
csv_reader = csv.reader(file) csv_reader = csv.reader(file)
...@@ -103,22 +165,36 @@ def measure_score(path_srt, path_ocr, time_threshold=5.0, method='cosine'): ...@@ -103,22 +165,36 @@ def measure_score(path_srt, path_ocr, time_threshold=5.0, method='cosine'):
# 计算相似度 # 计算相似度
total_similarity = 0.0 total_similarity = 0.0
total_weight = 0.0 total_weight = 0.0
txt1 = []
for sub in data_srt: for sub in tqdm(data_srt, desc="Processing", ncols=100):
max_similarity = 0.0 max_similarity = 0.0 if method != 'distance' else -1.0
# 去除srt中的停用词 # 去除srt中的停用词
if is_all_stopwords(sub[2]): if is_all_stopwords(sub[2]):
continue continue
subb = ""
for sub1 in data_ocr: for sub1 in data_ocr:
x, y = abs(sub[0] - sub1[0]), abs(sub[1] - sub1[1]) x, y = abs(sub[0] - sub1[0]), abs(sub[1] - sub1[1])
if min(x, y) <= time_threshold: if time_threshold_re:
# print(sub[2], sub1[2]) time_threshold_tmp = time_threshold
score = calculate_similarity(sub[2], sub1[2], 'cosine') else :
time_threshold_tmp = (sub[1] - sub[0]) * 0.3 # 10s允许3s的误差
if min(x, y) <= time_threshold_tmp:
score = calculate_similarity(sub[2], sub1[2], method)
if max_similarity <= score * calculate_weight(x, y):
subb = sub1[2]
max_similarity = max(max_similarity, score * calculate_weight(x, y)) max_similarity = max(max_similarity, score * calculate_weight(x, y))
if max_similarity <= -0.5:
# print(max_similarity, sub[2], subb, sub[0])
txt1.append(' !!! '.join([str(max_similarity), sub[2], subb, str(sub[0])]))
total_similarity += max_similarity total_similarity += max_similarity
total_weight += 1 total_weight += 1
if method == 'distance':
total_similarity = total_weight + total_similarity
with open('movie_pro.txt', 'w', encoding='utf-8') as f:
for i in txt1:
f.write(i + '\n')
# print(total_similarity, total_similarity / len(data_srt), total_similarity / total_weight)
return total_similarity / len(data_srt), total_similarity / total_weight return total_similarity / len(data_srt), total_similarity / total_weight
if __name__ == '__main__': if __name__ == '__main__':
...@@ -128,13 +204,23 @@ if __name__ == '__main__': ...@@ -128,13 +204,23 @@ if __name__ == '__main__':
# 添加命令行参数 # 添加命令行参数
parser.add_argument("--path_srt", required=True, type=str, help="Path of srt file, format is srt") parser.add_argument("--path_srt", required=True, type=str, help="Path of srt file, format is srt")
parser.add_argument("--path_ocr", required=True, type=str, help="Path of ocr file, format is xlsx") parser.add_argument("--path_ocr", required=True, type=str, help="Path of ocr file, format is xlsx")
parser.add_argument("--method", type=str, default='cosine', help="Select evaluation method") parser.add_argument("--time_threshold", type=float, default=5.0, help="Allowable time frame")
parser.add_argument("--time_threshold", type=float,default=5.0, help="Allowable time frame")
parser.add_argument("--method", type=str, default='distance',choices=['cosine', 'distance', 'sequence']
, help="Select evaluation method")
parser.add_argument("--time_threshold_re", type=bool, default=True, help="Specify whether \
time threshold is required")
args = parser.parse_args() args = parser.parse_args()
output_file_srt = 'deal_srt.csv' output_file_srt = 'deal_srt.csv'
output_file_ocr = 'deal_ocr.csv' output_file_ocr = 'deal_ocr.csv'
read_srt_to_csv(args.path_srt, output_file_srt) read_srt_to_csv(args.path_srt, output_file_srt)
read_from_xlsx(args.path_ocr, output_file_ocr) read_from_xlsx(args.path_ocr, output_file_ocr)
score = measure_score(output_file_srt, output_file_ocr, args.time_threshold, args.method) score = measure_score(output_file_srt, output_file_ocr, args.time_threshold, \
print(f'该评估算法得分: {score[1]:.5f}') args.time_threshold_re, args.method)
\ No newline at end of file print(f'该评估算法得分: {100 * score[1]:.3f}')
# python ocr_metric.py --path_srt test/new/movie_1.srt --path_ocr ../测试/the-swan-v3/The.Swan-zimu.xlsx --time_threshold 10
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment