Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_2
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
赵心治
accessibility_movie_2
Commits
2c4cd5c0
Commit
2c4cd5c0
authored
Nov 05, 2023
by
smile2019
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'refs/remotes/origin/feat_1' into feat_1
parents
d3fcd34f
7296e8d4
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
780 additions
and
167 deletions
+780
-167
.gitignore
.gitignore
+29
-2
constant.py
constant.py
+2
-1
detect_with_ocr.py
detect_with_ocr.py
+357
-102
main_window.py
main_window.py
+241
-31
main_window_ui.py
main_window_ui.py
+22
-3
management.py
management.py
+10
-2
narratage_detection.py
narratage_detection.py
+7
-0
ocr_metric.py
ocr_metric.py
+112
-26
No files found.
.gitignore
View file @
2c4cd5c0
...
...
@@ -13,4 +13,31 @@ res/ffmpeg-4.3.1/bin/output.mp4
res/ffmpeg-4.3.1/bin/qiji_local.mp4
venv/
venv37/
shenming_test
\ No newline at end of file
shenming_test
cap.png
requirements3.8.txt
venv3.8-new/
webrtcvad-2.0.10-cp38-abi3-win_amd64.whl
xlsx-resource/
deal_ocr.csv
deal_srt.csv
new.srt
shenhai1.xlsx
shenhai2.xlsx
test,py
"\346\267\261\346\265\267\347\237\255\347\211\2072.xlsx"
"\346\267\261\346\265\267\347\237\255\347\211\207origin.xlsx"
11.py
222.py
cap/
cap1597.png
cap831.png
deal.py
deal_movie.py
movie_1.txt
movie_pro.txt
res/.paddleocr/2.3.0.1/ocr/paddleocr/
script1.py
test/
\ No newline at end of file
constant.py
View file @
2c4cd5c0
...
...
@@ -12,10 +12,11 @@ import os
class
Content
:
StartTimeColumn
=
0
SubtitleColumnNumber
=
2
AsideColumnNumber
=
4
SpeedColumnNumber
=
5
# ActivateColumns = [2, 3]
ActivateColumns
=
[
4
,
5
]
ActivateColumns
=
[
2
,
4
,
5
]
# ColumnCount = 3
ObjectName
=
"all_tableWidget"
# TimeFormatColumns = [0]
...
...
detect_with_ocr.py
View file @
2c4cd5c0
...
...
@@ -30,6 +30,9 @@ from typing import Tuple, Union
from
utils
import
reverse_time_to_seconds
from
detect_with_asr
import
create_sheet
,
write_to_sheet
from
main_window
import
MainWindow
,
Element
import
time
import
numpy
as
np
import
copy
import
math
# 字幕的上下边界
...
...
@@ -42,10 +45,18 @@ cur_det_model_dir = paddle_dir + "det/ch/ch_PP-OCRv2_det_infer"
cur_rec_model_dir
=
paddle_dir
+
"rec/ch/ch_PP-OCRv2_rec_infer"
ocr
=
PaddleOCR
(
use_angle_cls
=
True
,
lang
=
"ch"
,
show_log
=
False
,
use_gpu
=
False
,
cls_model_dir
=
cur_cls_model_dir
,
det_model_dir
=
cur_det_model_dir
,
rec_model_dir
=
cur_rec_model_dir
)
# paddle_dir = "res/.paddleocr/2.3.0.1/ocr/paddleocr/"
# cur_det_model_dir = paddle_dir + "ch_PP-OCRv4_det_infer"
# cur_rec_model_dir = paddle_dir + "ch_PP-OCRv4_rec_infer"
# ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, det_model_dir=cur_det_model_dir, rec_model_dir=cur_rec_model_dir)
# 正常语速为4字/秒
normal_speed
=
4
table_index
=
0
ocr_h_map
=
{}
def
evaluate_position
(
video_path
:
str
,
start_time
:
float
)
->
Tuple
[
float
,
float
]:
print
(
">>>>>>video path:"
+
video_path
)
video
=
cv2
.
VideoCapture
(
video_path
)
fps
=
video
.
get
(
cv2
.
CAP_PROP_FPS
)
start
=
int
(
start_time
*
fps
)
...
...
@@ -183,7 +194,9 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
def
get_position
(
video_path
:
str
,
start_time
:
float
,
rate
:
float
,
rate_bottom
:
float
)
->
Tuple
[
float
,
float
]:
ocr_positions
=
[]
def
get_position
(
video_path
:
str
,
start_time
:
float
,
ocr_ranges
)
->
Tuple
[
float
,
float
]:
# return (885.0, 989.0)
"""根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
...
...
@@ -194,34 +207,36 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f
Returns:
Tuple[float, float]: 字幕在整个画面中的上下边界位置
"""
print
(
">>>>>>>>>>open"
)
print
(
"video_path:"
,
video_path
)
video
=
cv2
.
VideoCapture
(
video_path
)
# print("video:", video)
subtitle_position
=
{}
fps
=
video
.
get
(
cv2
.
CAP_PROP_FPS
)
start
=
int
(
start_time
*
fps
)
cnt
=
0
txt_cnt
=
0
pre_txt
=
None
video
.
set
(
cv2
.
CAP_PROP_POS_FRAMES
,
start
)
# height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
print
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
print
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
))
up
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
(
rate
))
down
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
(
rate_bottom
))
print
(
">>>>get posti"
)
print
(
ocr_ranges
)
for
i
in
range
(
len
(
ocr_ranges
)):
rate
=
ocr_ranges
[
i
][
0
]
rate_bottom
=
ocr_ranges
[
i
][
1
]
print
(
">>>>>>>>>>open"
)
print
(
"video_path:"
,
video_path
)
video
=
cv2
.
VideoCapture
(
video_path
)
# print("video:", video)
subtitle_position
=
{}
fps
=
video
.
get
(
cv2
.
CAP_PROP_FPS
)
start
=
int
(
start_time
*
fps
)
cnt
=
0
txt_cnt
=
0
pre_txt
=
None
video
.
set
(
cv2
.
CAP_PROP_POS_FRAMES
,
start
)
# height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
print
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
print
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
))
up
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
(
rate
))
down
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
(
rate_bottom
))
global
ocr_positions
print
(
"add positions"
)
print
(
up
)
ocr_positions
.
insert
(
0
,[
up
,
down
])
# down = up + 20
# down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
print
(
up
)
# print(down)
up_rate
,
down_rate
=
evaluate_position
(
video_path
,
0
)
up
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
up_rate
)
down
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
down_rate
)
return
int
(
up
),
int
(
down
)
# TODO 现阶段是主窗体设定字幕的开始位置和结束位置,传入该函数。现在希望做成自动检测的?
# print(up)
# # print(down)
# return int(up), int(down)
# while True:
# _, img = video.read()
# # print("img:", img)
...
...
@@ -339,7 +354,41 @@ def normalize(text: str) -> str:
return
text
def
detect_subtitle
(
img
:
np
.
ndarray
)
->
Tuple
[
Union
[
str
,
None
],
float
]:
def
resize_img
(
img
):
resize_height
=
152
height
,
width
=
img
.
shape
[:
2
]
if
resize_height
>
height
:
# 定义放大倍数
scale_factor
=
float
(
resize_height
/
height
)
# 计算新的宽度和高度
new_width
=
int
(
width
*
scale_factor
)
new_height
=
int
(
height
*
scale_factor
)
# 使用插值方法进行图像放大
enlarged_image
=
cv2
.
resize
(
img
,
(
new_width
,
new_height
),
interpolation
=
cv2
.
INTER_LINEAR
)
return
enlarged_image
else
:
# 定义缩小倍数
scale_factor
=
float
(
height
/
resize_height
)
# 0.5表示缩小为原来的一半大小
# 使用插值方法进行图像缩小
smaller_image
=
cv2
.
resize
(
img
,
None
,
fx
=
scale_factor
,
fy
=
scale_factor
,
interpolation
=
cv2
.
INTER_LINEAR
)
return
smaller_image
def
extract_white_prior
(
img
,
threshold
=
200
):
gray
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2GRAY
)
# 设定阈值,将非白色部分二值化为黑色
ret
,
binary_image
=
cv2
.
threshold
(
gray
,
threshold
,
255
,
cv2
.
THRESH_BINARY
)
return
binary_image
index
=
0
t
=
140
def
detect_subtitle
(
org_img
:
np
.
ndarray
,
lastSubTitle
,
last_confidence
)
->
Tuple
[
Union
[
str
,
None
],
float
]:
"""检测当前画面得到字幕信息
Args:
...
...
@@ -348,71 +397,151 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
Returns:
Tuple[Union[str, None]]: 字幕信息(没有字幕时返回None)和置信度
"""
subTitle
=
''
ocr_res
=
""
# up_b = 276
# down_b = 297
height
=
down_b
-
up_b
img
=
img
[
int
(
up_b
-
height
*
0.7
):
int
(
down_b
+
height
*
0.7
)]
# 针对低帧率的视频做图像放大处理
print
(
height
)
print
(
up_b
)
print
(
down_b
)
print
(
img
.
shape
)
if
img
.
shape
[
1
]
<
1000
:
img
=
cv2
.
resize
(
img
,
(
int
(
img
.
shape
[
1
]
*
1.5
),
int
(
img
.
shape
[
0
]
*
1.5
)))
cv2
.
imwrite
(
'./cap.png'
,
img
)
res
=
ocr
.
ocr
(
img
,
cls
=
True
)
print
(
'--------> res'
,
res
)
sorted
(
res
,
key
=
lambda
text
:
text
[
0
][
0
][
1
])
sorted
(
res
,
key
=
lambda
text
:
text
[
0
][
0
][
0
])
if
len
(
res
)
==
0
:
return
None
,
0
possible_txt
=
[]
conf
=
0
print
(
'res --------->'
,
res
)
for
x
in
res
:
# cv2.imshow("cut", img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
rect
,
(
txt
,
confidence
)
=
x
font_size
=
rect
[
2
][
1
]
-
rect
[
0
][
1
]
mid
=
(
rect
[
0
][
0
]
+
rect
[
1
][
0
])
/
2
gradient
=
np
.
arctan
(
abs
((
rect
[
1
][
1
]
-
rect
[
0
][
1
])
/
(
rect
[
1
][
0
]
-
rect
[
0
][
0
])))
# log.append("文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}".format(txt, confidence, mid / img.shape[1], gradient,
# font_size)) 置信度>0.7 & 斜率<0.1 & 字幕偏移量<=25 & 字幕中心在画面宽的0.4-0.6之间
print
(
"文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}"
.
format
(
txt
,
confidence
,
mid
/
img
.
shape
[
1
],
gradient
,
font_size
))
print
(
"字体大小差距: {}"
,
format
(
height
-
font_size
))
print
(
"高度中心:{}"
.
format
((
rect
[
0
][
1
]
+
rect
[
1
][
1
])
/
2
/
img
.
shape
[
0
]))
conf_thred1
=
0.7
conf_thred2
=
0.85
# conf_thred1 = 0.1
# conf_thred2 = 0.4
# conf_thred1 = 0.5
# conf_thred2 = 0.7
if
(
rect
[
0
][
1
]
+
rect
[
1
][
1
])
/
2
/
img
.
shape
[
0
]
>
0.5
or
(
rect
[
0
][
1
]
+
rect
[
1
][
1
])
/
2
/
img
.
shape
[
0
]
<=
0.1
:
global
ocr_positions
# ocr_positions.append([676, 712])
h
=
None
global
index
for
i
in
range
(
len
(
ocr_positions
)):
img
=
copy
.
deepcopy
(
org_img
)
up_b
=
ocr_positions
[
i
][
0
]
down_b
=
ocr_positions
[
i
][
1
]
height
=
down_b
-
up_b
if
len
(
ocr_positions
)
==
1
:
img
=
img
[
int
(
up_b
-
height
*
0.7
):
int
(
down_b
+
height
*
0.7
)]
else
:
# cropped_img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
cropped_img
=
img
[
int
(
up_b
):
int
(
down_b
)]
# cropped_img = resize_img(cropped_img)
# x = float(150 / height)
# img_h, img_w = cropped_img.shape[:2]
# img_h = int(img_h * x)
# img_w = int(img_w * x)
# cropped_img = cv2.resize(cropped_img, (img_w, img_h))
# 定义要添加的上下空白的高度
padding_top
=
height
*
0.7
padding_bottom
=
height
*
0.7
# padding_top = 150
# padding_bottom = 150
# 计算新图像的高度
new_height
=
cropped_img
.
shape
[
0
]
+
padding_top
+
padding_bottom
# 创建一个新的空白图像
img
=
np
.
zeros
((
int
(
new_height
),
cropped_img
.
shape
[
1
],
3
),
dtype
=
np
.
uint8
)
# 将裁剪后的图像放置在新图像中间
start_y
=
int
(
padding_top
)
end_y
=
start_y
+
cropped_img
.
shape
[
0
]
img
[
start_y
:
end_y
,
:]
=
cropped_img
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# global index
# cv2.imwrite(f'./cap/cap{index}.png', img)
# img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# # ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# # img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
# t = 230
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# cv2.imwrite(f'./test2.png', img)
# index = index + 1
# img = img[int(up_b - height*0.2):int(down_b + height*0.2)]
# 针对低帧率的视频做图像放大处理
print
(
height
)
print
(
up_b
)
print
(
down_b
)
print
(
img
.
shape
)
if
img
.
shape
[
1
]
<
1000
:
img
=
cv2
.
resize
(
img
,
(
int
(
img
.
shape
[
1
]
*
1.5
),
int
(
img
.
shape
[
0
]
*
1.5
)))
# img = extract_white_prior(img)
cv2
.
imwrite
(
f
'./cap/cap{index}.png'
,
img
)
index
=
index
+
1
print
(
">>>>>>>>>>>>>>>>>>>>>>>>>>>new log"
+
str
(
index
-
1
))
res
=
ocr
.
ocr
(
img
,
cls
=
True
)
print
(
'--------> res'
,
res
)
sorted
(
res
,
key
=
lambda
text
:
text
[
0
][
0
][
1
])
sorted
(
res
,
key
=
lambda
text
:
text
[
0
][
0
][
0
])
if
len
(
res
)
==
0
:
continue
# TODO 字幕去重算法改进
if
confidence
>
conf_thred1
and
gradient
<
0.1
and
0.4
<
mid
/
img
.
shape
[
1
]
<
0.6
:
subTitle
+=
txt
conf
=
max
(
conf
,
confidence
)
# possible_txt.append([txt, mid/img.shape[1]])
possible_txt
.
append
(
txt
)
# 如果字幕在一行中分为两个(或以上)对话文本
elif
confidence
>
conf_thred2
and
gradient
<
0.1
:
if
0.3
<
mid
/
img
.
shape
[
1
]
<
0.4
or
0.6
<
mid
/
img
.
shape
[
1
]
<
0.7
:
# return None, 0, None
possible_txt
=
[]
subTitle
=
''
conf
=
0
print
(
'res --------->'
,
res
)
res
.
sort
(
key
=
lambda
rect
:
rect
[
0
][
0
][
0
]
+
rect
[
0
][
1
][
0
])
# 按照中心点排序
for
x
in
res
:
# cv2.imshow("cut", img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
rect
,
(
txt
,
confidence
)
=
x
font_size
=
rect
[
2
][
1
]
-
rect
[
0
][
1
]
mid
=
(
rect
[
0
][
0
]
+
rect
[
1
][
0
])
/
2
gradient
=
np
.
arctan
(
abs
((
rect
[
1
][
1
]
-
rect
[
0
][
1
])
/
(
rect
[
1
][
0
]
-
rect
[
0
][
0
])))
# log.append("文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}".format(txt, confidence, mid / img.shape[1], gradient,
# font_size)) 置信度>0.7 & 斜率<0.1 & 字幕偏移量<=25 & 字幕中心在画面宽的0.4-0.6之间
print
(
"文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}"
.
format
(
txt
,
confidence
,
mid
/
img
.
shape
[
1
],
gradient
,
font_size
))
print
(
"字体大小差距: {}"
,
format
(
height
-
font_size
))
print
(
"高度中心:{}"
.
format
((
rect
[
0
][
1
]
+
rect
[
1
][
1
])
/
2
/
img
.
shape
[
0
]))
# if confidence < 0.95:
# # global t
# print("小于0.95,重新检测,阈值为:" + str(t))
# t = t + 20
# return detect_subtitle(org_img, lastSubTitle, last_confidence)
if
h
==
None
:
h
=
font_size
conf_thred1
=
0.7
conf_thred2
=
0.85
# conf_thred1 = 0.1
# conf_thred2 = 0.4
# conf_thred1 = 0.5
# conf_thred2 = 0.7
if
(
rect
[
0
][
1
]
+
rect
[
1
][
1
])
/
2
/
img
.
shape
[
0
]
>
0.5
or
(
rect
[
0
][
1
]
+
rect
[
1
][
1
])
/
2
/
img
.
shape
[
0
]
<=
0.1
:
continue
# TODO 字幕去重算法改进
if
confidence
>
conf_thred1
and
gradient
<
0.1
and
0.4
<
mid
/
img
.
shape
[
1
]
<
0.6
:
subTitle
+=
txt
conf
=
max
(
conf
,
confidence
)
# possible_txt.append([txt, mid/img.shape[1]])
possible_txt
.
append
(
txt
)
conf
=
max
(
conf
,
confidence
)
# sorted(possible_txt, key=lambda pos : pos[1])
# print(possible_txt)
if
len
(
possible_txt
)
>=
2
:
# subTitle = ' '.join([x[0] for x in possible_txt])
subTitle
=
' '
.
join
(
possible_txt
)
print
(
subTitle
,
conf
)
if
len
(
subTitle
)
>
0
:
return
subTitle
,
conf
return
None
,
0
# 如果字幕在一行中分为两个(或以上)对话文本
elif
confidence
>
conf_thred2
and
gradient
<
0.1
:
if
0.3
<
mid
/
img
.
shape
[
1
]
<
0.4
or
0.6
<
mid
/
img
.
shape
[
1
]
<
0.7
:
# possible_txt.append([txt, mid/img.shape[1]])
possible_txt
.
append
(
txt
)
conf
=
max
(
conf
,
confidence
)
# sorted(possible_txt, key=lambda pos : pos[1])
# print(possible_txt)
if
len
(
possible_txt
)
>=
2
:
# subTitle = ' '.join([x[0] for x in possible_txt])
subTitle
=
' '
.
join
(
possible_txt
)
print
(
subTitle
,
conf
)
if
len
(
subTitle
)
>
0
:
ocr_res
=
ocr_res
+
subTitle
if
(
len
(
ocr_res
))
>
0
:
print
(
">>>>>>>>>>>>cur subtitle:"
+
ocr_res
+
",confidence: "
+
str
(
confidence
)
+
",last_confidence: "
+
str
(
last_confidence
))
# if len(ocr_positions) == 1 and last_confidence != None and confidence != None and confidence < last_confidence:
# ocr_res = lastSubTitle
# confidence = last_confidence
# print(">>>>>>>>>>>>res subtitle:" + ocr_res + ",confidence: " + str(confidence) + ",last_confidence: " + str(last_confidence))
return
ocr_res
,
confidence
,
conf
,
h
if
check_have_ocr
(
img
):
return
"err"
,
None
,
0
,
None
return
None
,
None
,
0
,
None
def
process_video
(
video_path
:
str
,
begin
:
float
,
end
:
float
,
book_path
:
str
,
sheet_name
:
str
,
state
=
None
,
mainWindow
:
MainWindow
=
None
):
...
...
@@ -440,6 +569,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
video
=
cv2
.
VideoCapture
(
video_path
)
fps
=
video
.
get
(
cv2
.
CAP_PROP_FPS
)
lastSubTitle
=
None
last_confidence
=
None
lastConf
=
0
# res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析
res
=
[]
...
...
@@ -448,13 +578,16 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
end_time
=
0
video
.
set
(
cv2
.
CAP_PROP_POS_MSEC
,
begin
*
1000
)
pre_state
=
state
[
0
]
ocr_h
=
None
while
True
:
_
,
frame
=
video
.
read
()
if
frame
is
None
:
break
cnt
+=
1
cur_time
=
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
# 判断当前帧是否已超限制
# end 主要用来判断是否越界
if
cur_time
>
end
:
if
cur_time
-
end_time
>
1
:
print
(
'--------------------------------------------------'
)
...
...
@@ -466,7 +599,8 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
# 判断当前是否有字幕需要被保存下来
if
end_time
<
start_time
:
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end, 2), lastSubTitle, ''])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end
,
3
),
lastSubTitle
,
''
])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end
,
3
),
lastSubTitle
,
''
],
ocr_h
=
ocr_h
)
print
(
">>>>>>subtitle,ocr_h1:"
+
str
(
lastSubTitle
)
+
">>>"
+
str
(
ocr_h
))
break
# 每秒取4帧画面左右
# TODO 取帧算法优化
...
...
@@ -479,7 +613,10 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
mainWindow
.
projectContext
.
nd_process
=
state
[
0
]
mainWindow
.
projectContext
.
last_time
=
cur_time
subTitle
,
conf
=
detect_subtitle
(
frame
)
subTitle
,
confidence
,
conf
,
cur_ocr_h
=
detect_subtitle
(
frame
,
lastSubTitle
,
last_confidence
)
if
subTitle
==
"err"
:
continue
if
subTitle
is
not
None
:
subTitle
=
normalize
(
subTitle
)
...
...
@@ -488,6 +625,8 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
# 第一次找到字幕
if
lastSubTitle
is
None
and
subTitle
is
not
None
:
if
cur_ocr_h
!=
None
:
ocr_h
=
cur_ocr_h
start_time
=
cur_time
# 字幕消失
...
...
@@ -500,14 +639,17 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
(
res
[
-
1
][
0
]
-
res
[
-
2
][
1
])
*
normal_speed
)
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
add_to_list
(
mainWindow
,
"旁白"
,
[
''
,
''
,
''
,
'
%
d'
%
recommend_lens
])
add_to_list
(
mainWindow
,
"旁白"
,
[
''
,
''
,
''
,
'
%
d'
%
recommend_lens
]
,
ocr_h
)
print
(
start_time
,
end_time
,
lastSubTitle
)
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end_time
,
3
),
lastSubTitle
,
''
])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end_time
,
3
),
lastSubTitle
,
''
],
ocr_h
)
print
(
">>>>>>subtitle,ocr_h2:"
+
str
(
lastSubTitle
)
+
">>>"
+
str
(
ocr_h
))
elif
lastSubTitle
is
not
None
and
subTitle
is
not
None
:
# 两句话连在一起,但是两句话不一样
if
string_similar
(
lastSubTitle
,
subTitle
)
<
0.6
:
if
cur_ocr_h
!=
None
:
ocr_h
=
cur_ocr_h
end_time
=
cur_time
res
.
append
([
start_time
,
end_time
,
lastSubTitle
])
if
(
len
(
res
)
==
1
and
res
[
-
1
][
0
]
-
last_time
>=
1
)
or
(
len
(
res
)
>
1
and
res
[
-
1
][
0
]
-
res
[
-
2
][
1
])
>=
1
:
...
...
@@ -516,24 +658,98 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
(
res
[
-
1
][
0
]
-
res
[
-
2
][
1
])
*
normal_speed
)
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
add_to_list
(
mainWindow
,
"旁白"
,
[
''
,
''
,
''
,
'
%
d'
%
recommend_lens
])
add_to_list
(
mainWindow
,
"旁白"
,
[
''
,
''
,
''
,
'
%
d'
%
recommend_lens
]
,
ocr_h
)
print
(
start_time
,
end_time
,
lastSubTitle
)
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end_time
,
3
),
lastSubTitle
,
''
])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end_time
,
3
),
lastSubTitle
,
''
],
ocr_h
)
print
(
">>>>>>subtitle,ocr_h3:"
+
str
(
lastSubTitle
)
+
">>>"
+
str
(
ocr_h
))
start_time
=
end_time
else
:
lastSubTitle
=
subTitle
if
conf
>
lastConf
else
lastSubTitle
continue
# 当前字幕与上一段字幕不一样
lastSubTitle
=
subTitle
lastConf
=
conf
if
subTitle
!=
"err"
:
lastSubTitle
=
subTitle
last_confidence
=
confidence
lastConf
=
conf
print
(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>end 1"
)
# print(ocr_h_map)
# mainWindow.refresh_tab_slot()
while
(
mainWindow
.
refresh_flag
==
True
):
time
.
sleep
(
1
)
mainWindow
.
detect_lock
=
True
try
:
process_err_ocr
(
mainWindow
)
finally
:
mainWindow
.
detect_lock
=
False
# mainWindow.refresh_lock.acquire()
# try:
# process_err_ocr(mainWindow)
# finally:
# mainWindow.refresh_lock.release()
def
add_to_list
(
mainWindow
:
MainWindow
,
element_type
:
str
,
li
:
list
):
def
process_err_ocr
(
mainWindow
):
# if 1==1:
# return
for
i
in
range
(
len
(
mainWindow
.
projectContext
.
all_elements
)):
if
mainWindow
.
projectContext
.
all_elements
[
i
]
.
subtitle
!=
None
and
mainWindow
.
projectContext
.
all_elements
[
i
]
.
subtitle
!=
""
:
ocr_h_map
[
i
]
=
mainWindow
.
projectContext
.
all_elements
[
i
]
.
ocr_h
print
(
ocr_h_map
)
data
=
list
(
ocr_h_map
.
values
())
print
(
">>>>values"
)
print
(
ocr_h_map
.
values
())
table_indexs
=
list
(
ocr_h_map
.
keys
())
print
(
table_indexs
)
# 计算均值和标准差
mean
=
np
.
mean
(
data
)
std_dev
=
np
.
std
(
data
)
# 定义阈值(例如,可以选择 2 倍标准差作为阈值)
threshold
=
2.7
*
std_dev
print
(
mean
)
print
(
threshold
)
rm_list
=
[]
for
i
,
x
in
enumerate
(
data
):
if
abs
(
x
-
mean
)
>
threshold
:
rm_list
.
append
(
i
)
print
(
rm_list
)
# while(mainWindow.refresh_tab_timer.isActive()):
# time.sleep(1)
print
(
len
(
mainWindow
.
projectContext
.
all_elements
))
for
i
in
range
(
len
(
rm_list
)):
try
:
print
(
">>>>>>>>>will rm"
+
str
(
table_indexs
[
rm_list
[
i
]]
+
1
-
i
)
+
", subtitle:"
+
mainWindow
.
projectContext
.
all_elements
[
table_indexs
[
rm_list
[
i
]]
-
i
]
.
subtitle
)
print
(
table_indexs
[
rm_list
[
i
]])
mainWindow
.
del_line_operation_slot
(
row
=
table_indexs
[
rm_list
[
i
]]
+
1
-
i
,
show_msg_flag
=
False
)
time
.
sleep
(
0.5
)
except
Exception
as
e
:
print
(
">>>>>>>>>>>>>>>>>>>>>>del err"
)
print
(
e
)
# mainWindow.refresh_all_tab_slot()
def
add_to_list
(
mainWindow
:
MainWindow
,
element_type
:
str
,
li
:
list
,
ocr_h
:
int
=
None
):
# 默认使用配置文件中的语速
speed
=
mainWindow
.
projectContext
.
speaker_speed
aside_head_time
=
float
(
reverse_time_to_seconds
(
mainWindow
.
aside_head_time
))
if
mainWindow
.
aside_head_time
!=
None
else
float
(
0
)
st_time_sec
,
ed_time_sec
,
subtitle
,
suggest
=
li
# global table_index
# if ocr_h != None and element_type == "字幕":
# index = table_index
# ocr_h_map[index] = ocr_h
# print(">>>>>>>>>>>>>>>>>>ocr H map:" + str(index) + ",subtitle:" + subtitle)
# print(ocr_h_map)
print
(
">>>>>>>>start time:"
)
print
(
aside_head_time
)
print
(
st_time_sec
)
...
...
@@ -541,11 +757,12 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
print
(
">>>>>>need del"
)
print
(
st_time_sec
)
if
not
mainWindow
.
add_head_aside
:
new_element
=
Element
(
'0.00'
,
""
,
""
,
"0/100"
,
""
,
speed
)
new_element
=
Element
(
'0.00'
,
""
,
""
,
"0/100"
,
""
,
speed
,
ocr_h
)
mainWindow
.
projectContext
.
aside_list
.
append
(
new_element
)
mainWindow
.
projectContext
.
all_elements
.
append
(
mainWindow
.
projectContext
.
aside_list
[
-
1
])
mainWindow
.
last_aside_index
=
len
(
mainWindow
.
projectContext
.
all_elements
)
-
1
mainWindow
.
add_head_aside
=
True
# table_index = table_index + 1
return
st_time_sec
,
ed_time_sec
=
str
(
st_time_sec
),
str
(
ed_time_sec
)
...
...
@@ -553,7 +770,7 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
aside
=
""
i
=
len
(
mainWindow
.
projectContext
.
all_elements
)
if
element_type
==
"字幕"
:
new_element
=
Element
(
st_time_sec
,
ed_time_sec
,
subtitle
,
suggest
,
aside
,
speed
)
new_element
=
Element
(
st_time_sec
,
ed_time_sec
,
subtitle
,
suggest
,
aside
,
speed
,
ocr_h
)
new_element
.
print_self
()
if
mainWindow
.
last_aside_index
!=
None
and
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
==
""
and
new_element
.
ed_time_sec
!=
""
:
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
=
new_element
.
st_time_sec
...
...
@@ -561,9 +778,11 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
# print(">>>>>>>>>>>remove short aside")
mainWindow
.
projectContext
.
aside_list
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
mainWindow
.
projectContext
.
all_elements
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
# table_index = table_index - 1
mainWindow
.
last_aside_index
=
None
mainWindow
.
projectContext
.
subtitle_list
.
append
(
new_element
)
mainWindow
.
projectContext
.
all_elements
.
append
(
mainWindow
.
projectContext
.
subtitle_list
[
-
1
])
# table_index = table_index + 1
else
:
if
i
==
0
:
st_time_sec
=
"0.01"
...
...
@@ -574,7 +793,7 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
# 因为暂时没有用到ed_time_sec,所以直接赋值空吧
ed_time_sec
=
""
new_element
=
Element
(
st_time_sec
,
ed_time_sec
,
subtitle
,
suggest
,
aside
,
speed
)
new_element
=
Element
(
st_time_sec
,
ed_time_sec
,
subtitle
,
suggest
,
aside
,
speed
,
ocr_h
)
new_element
.
print_self
()
if
mainWindow
.
last_aside_index
!=
None
and
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
==
""
and
new_element
.
ed_time_sec
!=
""
:
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
=
new_element
.
st_time_sec
...
...
@@ -582,15 +801,18 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
# print(">>>>>>>>>>>remove short aside")
mainWindow
.
projectContext
.
aside_list
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
mainWindow
.
projectContext
.
all_elements
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
# table_index = table_index - 1
mainWindow
.
last_aside_index
=
None
new_element
.
suggest
=
"0/"
+
new_element
.
suggest
if
(
st_time_sec
!=
None
and
st_time_sec
!=
""
and
aside_head_time
>
float
(
st_time_sec
)):
return
mainWindow
.
projectContext
.
aside_list
.
append
(
new_element
)
mainWindow
.
projectContext
.
all_elements
.
append
(
mainWindow
.
projectContext
.
aside_list
[
-
1
])
# table_index = table_index + 1
mainWindow
.
last_aside_index
=
len
(
mainWindow
.
projectContext
.
all_elements
)
-
1
# end_time 主要用来判断是否越界
def
detect_with_ocr
(
video_path
:
str
,
book_path
:
str
,
start_time
:
float
,
end_time
:
float
,
state
=
None
,
mainWindow
:
MainWindow
=
None
):
"""使用ocr检测视频获取字幕并输出旁白推荐
...
...
@@ -616,7 +838,11 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
up_b
,
down_b
=
context
.
caption_boundings
[
0
],
context
.
caption_boundings
[
1
]
else
:
# 此处start_time + 300是为了节省用户调整视频开始时间的功夫(强行跳过前5分钟)
up_b
,
down_b
=
get_position
(
video_path
,
0
,
mainWindow
.
rate
,
mainWindow
.
rate_bottom
)
# up_b, down_b = get_position(video_path, 0, mainWindow.rate, mainWindow.rate_bottom)
get_position
(
video_path
,
0
,
mainWindow
.
ocr_ranges
)
print
(
">>>>>positions:"
)
global
ocr_positions
print
(
ocr_positions
)
context
.
caption_boundings
=
[
up_b
,
down_b
]
context
.
detected
=
True
...
...
@@ -627,8 +853,37 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
# print("process the total video at time: ", datetime.datetime.now())
process_video
(
video_path
,
start_time
,
end_time
,
book_name_xlsx
,
sheet_name_xlsx
,
state
,
mainWindow
)
def
check_have_ocr
(
img
):
new_img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2GRAY
)
t
=
230
_
,
new_img
=
cv2
.
threshold
(
new_img
,
t
,
255
,
cv2
.
THRESH_BINARY
)
if
np
.
any
(
new_img
==
255
):
return
True
else
:
return
False
if
__name__
==
'__main__'
:
# path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
path
=
"C:/Users/Smile/Desktop/accessibility-movie/"
# print("get_pos:", get_position(path, 0))
# evaluate_position("C:/Users/AIA/Desktop/1/1.mp4", 0)
# img = cv2.imread("./cap/cap879.png")
img
=
cv2
.
imread
(
"./cap/cap812.png"
,
cv2
.
IMREAD_GRAYSCALE
)
# img = cv2.equalizeHist(img)
t
=
230
_
,
img
=
cv2
.
threshold
(
img
,
t
,
255
,
cv2
.
THRESH_BINARY
)
# img = resize_img(img)
detect_subtitle
(
img
,
None
,
None
)
# img = cv2.equalizeHist(img)
# t = 120
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# # ret, binary_image = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# # binary_image = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
cv2
.
imwrite
(
f
'./binary2.png'
,
img
)
if
np
.
any
(
img
==
255
):
print
(
"111111"
)
else
:
print
(
"222222"
)
main_window.py
View file @
2c4cd5c0
...
...
@@ -13,7 +13,7 @@ import os
import
cv2
from
PyQt5
import
QtWidgets
from
PyQt5.QtWidgets
import
QMainWindow
,
QFileDialog
,
QTableWidget
,
QTableWidgetItem
,
QAbstractItemView
,
QProgressBar
,
QLabel
,
QApplication
,
QPushButton
,
QMenu
from
PyQt5.QtWidgets
import
QMainWindow
,
QFileDialog
,
QTableWidget
,
QTableWidgetItem
,
QAbstractItemView
,
QProgressBar
,
QLabel
,
QApplication
,
QPushButton
,
QMenu
,
QDialog
,
QVBoxLayout
from
PyQt5.QtCore
import
QUrl
,
Qt
,
QTimer
,
QRect
,
pyqtSignal
,
QPersistentModelIndex
from
PyQt5.QtMultimedia
import
*
from
PyQt5.QtGui
import
QIcon
...
...
@@ -43,6 +43,7 @@ from excel_utils import read_xls,read_xlsx
from
ffmpeg_util
import
adjust_audio_volume
from
PyQt5.QtCore
import
QThread
;
import
threading
# from emit_import_event import emit_import_event
...
...
@@ -71,13 +72,57 @@ class WorkerThread(QThread):
for
t
in
self
.
main_window
.
threads
:
t
.
start
()
def
refresh
(
self
):
self
.
main_window
.
import_process_sign
.
emit
(
self
.
elements
)
class
ProcessErrThread
(
QThread
):
def
__init__
(
self
,
main_window
):
super
()
.
__init__
()
self
.
main_window
=
main_window
def
run
(
self
):
from
narratage_detection
import
process_err
t
=
RunThread
(
funcName
=
process_err
,
args
=
(
self
.
main_window
,
),
name
=
"process_err"
)
t
.
setDaemon
(
True
)
t
.
start
()
class
AutoCloseDialog
(
QDialog
):
def
__init__
(
self
,
parent
=
None
):
super
(
AutoCloseDialog
,
self
)
.
__init__
(
parent
)
self
.
setWindowFlags
(
Qt
.
CustomizeWindowHint
|
Qt
.
NoDropShadowWindowHint
)
layout
=
QVBoxLayout
()
label
=
QLabel
(
"字幕边界定位中,请稍后!"
)
layout
.
addWidget
(
label
)
self
.
setLayout
(
layout
)
class
LocalOcrThread
(
QThread
):
def
__init__
(
self
,
main_window
,
path
):
super
()
.
__init__
()
self
.
main_window
=
main_window
self
.
path
=
path
def
run
(
self
):
# 在后台执行耗时操作
state
=
[
None
]
self
.
main_window
.
state
=
state
state
[
0
]
=
0.1
self
.
main_window
.
threads
=
[]
t
=
RunThread
(
funcName
=
self
.
main_window
.
auto_location_ocr
,
args
=
(
self
.
path
,
),
name
=
"auto_location_ocr"
)
t
.
setDaemon
(
True
)
self
.
main_window
.
threads
.
append
(
t
)
self
.
main_window
.
all_threads
.
append
(
t
)
for
t
in
self
.
main_window
.
threads
:
t
.
start
()
def
finish
(
self
):
self
.
main_window
.
location_ocr_sign
.
emit
()
class
CustomDelegate
(
QtWidgets
.
QStyledItemDelegate
):
def
paint
(
self
,
painter
,
option
,
index
):
# Customize the painting behavior for the specific column
...
...
@@ -92,9 +137,11 @@ class MainWindow(QMainWindow, Ui_MainWindow):
renew_signal
=
pyqtSignal
(
str
)
import_process_sign
=
pyqtSignal
(
list
)
refresh_sign
=
pyqtSignal
()
location_ocr_sign
=
pyqtSignal
()
def
__init__
(
self
,
project_path
):
super
(
MainWindow
,
self
)
.
__init__
()
self
.
location_ocr_sign
.
connect
(
self
.
finish_location_ocr
)
self
.
last_aside_index
=
None
self
.
setupUi
(
self
)
self
.
statusbar
.
showMessage
(
"hello"
,
5000
)
...
...
@@ -150,6 +197,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
# 所有QTimer集中管理
self
.
import_excel_timer
=
QTimer
()
self
.
import_excel_timer
.
timeout
.
connect
(
self
.
check_if_import_excel_over
)
self
.
location_ocr_timer
=
QTimer
()
self
.
location_ocr_timer
.
timeout
.
connect
(
self
.
check_if_location_ocr
)
self
.
detect_timer
=
QTimer
()
self
.
detect_timer
.
timeout
.
connect
(
self
.
check_if_detect_over_slot
)
self
.
synthesis_timer
=
QTimer
()
...
...
@@ -161,7 +210,14 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
video_timer
.
start
(
1000
)
# todo 作为参数配置
self
.
refresh_tab_timer
=
QTimer
()
self
.
refresh_tab_timer
.
timeout
.
connect
(
self
.
refresh_tab_slot
)
self
.
up_ocr_timer
=
QTimer
()
self
.
down_ocr_timer
=
QTimer
()
self
.
up_ocr_timer
.
timeout
.
connect
(
self
.
up_ocr_timer_func
)
self
.
down_ocr_timer
.
timeout
.
connect
(
self
.
down_ocr_timer_func
)
self
.
up_ocr_bottom_timer
=
QTimer
()
self
.
down_ocr_bottom_timer
=
QTimer
()
self
.
up_ocr_bottom_timer
.
timeout
.
connect
(
self
.
up_ocr_bottom_timer_func
)
self
.
down_ocr_bottom_timer
.
timeout
.
connect
(
self
.
down_ocr_bottom_timer_func
)
"""状态栏相关空间
"""
...
...
@@ -219,18 +275,28 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
insert_aside_from_now_btn
.
clicked
.
connect
(
self
.
insert_aside_from_now_slot
)
self
.
insert_aside_from_now_btn
.
setEnabled
(
False
)
self
.
up_ocr_btn
.
click
ed
.
connect
(
self
.
up_ocr_btn
.
press
ed
.
connect
(
self
.
up_ocr
)
self
.
down_ocr_btn
.
clicked
.
connect
(
self
.
up_ocr_btn
.
released
.
connect
(
self
.
up_ocr_stop
)
self
.
down_ocr_btn
.
pressed
.
connect
(
self
.
down_ocr
)
self
.
up_ocr_bottom_btn
.
clicked
.
connect
(
self
.
down_ocr_btn
.
released
.
connect
(
self
.
down_ocr_stop
)
self
.
up_ocr_bottom_btn
.
pressed
.
connect
(
self
.
up_ocr_bottom
)
self
.
down_ocr_bottom_btn
.
clicked
.
connect
(
self
.
up_ocr_bottom_btn
.
released
.
connect
(
self
.
up_ocr_bottom_stop
)
self
.
down_ocr_bottom_btn
.
pressed
.
connect
(
self
.
down_ocr_bottom
)
self
.
down_ocr_bottom_btn
.
released
.
connect
(
self
.
down_ocr_bottom_stop
)
self
.
confirm_ocr_btn
.
clicked
.
connect
(
self
.
confirm_ocr
)
self
.
confirm_head_aside_btn
.
clicked
.
connect
(
self
.
confirm_head_aside
)
self
.
detect_btn
.
clicked
.
connect
(
self
.
show_
detect
_dialog
)
self
.
show_
confirmation
_dialog
)
"""视频预览相关信息
...
...
@@ -398,6 +464,10 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
sld_video
.
setFocus
()
self
.
aside_head_time
=
None
self
.
add_head_aside
=
False
self
.
ocr_ranges
=
[]
self
.
refresh_lock
=
threading
.
Lock
()
self
.
detect_lock
=
False
self
.
refresh_flag
=
False
# 打印到log文件中
t
=
RunThread
(
funcName
=
make_print_to_file
,
args
=
os
.
path
.
join
(
os
.
getcwd
(),
'log'
),
name
=
"logging"
)
...
...
@@ -407,6 +477,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
get_focus_thread
.
setDaemon
(
True
)
get_focus_thread
.
start
()
def
finish_location_ocr
(
self
):
self
.
import_excel_dialog
.
show_with_msg
(
"字幕定位结束,请检查是否准确,并调整正确"
)
def
show_confirmation_dialog
(
self
):
confirm_box
=
QtWidgets
.
QMessageBox
.
question
(
self
,
u'警告'
,
u'确认已经校准OCR范围(只包含中文,不含英文)?'
,
QtWidgets
.
QMessageBox
.
Yes
|
QtWidgets
.
QMessageBox
.
No
)
if
confirm_box
==
QtWidgets
.
QMessageBox
.
Yes
:
self
.
show_detect_dialog
()
else
:
print
(
">>>>>>show_confirmation_dialog"
)
def
getFocus
(
self
):
while
(
True
):
...
...
@@ -496,11 +576,20 @@ class MainWindow(QMainWindow, Ui_MainWindow):
"""
if
self
.
rate
==
None
:
self
.
prompt_dialog
.
show_with_msg
(
"请选择字幕上边界范围"
)
return
# self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
h
=
self
.
widget
.
get_h
()
video_h
=
self
.
wgt_video
.
height
()
self
.
rate
=
float
(
h
-
10
)
/
float
(
video_h
)
if
self
.
rate_bottom
==
None
:
self
.
prompt_dialog
.
show_with_msg
(
"请选择字幕下边界范围"
)
# self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
h
=
self
.
widget_bottom
.
get_h
()
video_h
=
self
.
wgt_video
.
height
()
self
.
rate_bottom
=
float
(
h
-
6
)
/
float
(
video_h
)
if
not
self
.
check_ocr_rate
():
self
.
prompt_dialog
.
show_with_msg
(
"字幕上边界不能低于下边界"
)
return
if
len
(
self
.
ocr_ranges
)
==
0
:
self
.
ocr_ranges
.
append
([
self
.
rate
,
self
.
rate_bottom
])
self
.
detect_dialog
.
init_self
()
self
.
detect_dialog
.
show
()
...
...
@@ -601,9 +690,39 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
action_operate
.
setEnabled
(
True
)
self
.
action_insert_aside_from_now
.
setEnabled
(
True
)
self
.
insert_aside_from_now_btn
.
setEnabled
(
True
)
# self.import_excel_dialog.show_with_msg("正在自动定位字幕边界,请稍后!")
confirm_box
=
QtWidgets
.
QMessageBox
.
question
(
self
,
u'警告'
,
u'是否需要自动定位字幕边界?'
,
QtWidgets
.
QMessageBox
.
Yes
|
QtWidgets
.
QMessageBox
.
No
)
if
confirm_box
==
QtWidgets
.
QMessageBox
.
Yes
:
self
.
location_ocr_thread
=
LocalOcrThread
(
self
,
path
)
self
.
location_ocr_thread
.
start
()
self
.
location_ocr_timer
.
start
(
1000
)
# self.auto_close_dialog = AutoCloseDialog()
# self.auto_close_dialog.exec_()
else
:
print
(
">>>>>>>>>>>>>play_video"
)
def
up_ocr
(
self
):
self
.
widget
.
change_painter_flag
(
True
)
def
auto_location_ocr
(
self
,
path
):
from
detect_with_ocr
import
evaluate_position
print
(
">>>>>>>>>>>>>>>>>>>>>v_path"
+
path
.
path
()[
1
:])
y1
,
y2
=
evaluate_position
(
path
.
path
()[
1
:],
0
)
video_h
=
self
.
wgt_video
.
height
()
self
.
widget
.
setY
(
int
(
video_h
*
y1
)
+
6
)
time
.
sleep
(
1
)
self
.
widget_bottom
.
setY
(
int
(
video_h
*
y2
)
+
10
)
print
(
"y1:
%
d,y2:
%
d"
%
(
y1
,
y2
))
def
check_ocr_rate
(
self
):
if
self
.
rate
>
self
.
rate_bottom
:
return
False
else
:
return
True
def
up_ocr_timer_func
(
self
):
# self.widget.change_painter_flag(True)
h
=
self
.
widget
.
up
(
3
)
video_h
=
self
.
wgt_video
.
height
()
self
.
rate
=
float
(
h
-
10
)
/
float
(
video_h
)
...
...
@@ -612,9 +731,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
print
(
self
.
wgt_video
.
height
())
print
(
">>>>>>>>>rate"
+
str
(
self
.
rate
))
def
down_ocr
(
self
):
self
.
widget
.
change_painter_flag
(
True
)
def
down_ocr_timer_func
(
self
):
# self.widget.change_painter_flag(True)
h
=
self
.
widget
.
down
(
3
)
video_h
=
self
.
wgt_video
.
height
()
self
.
rate
=
float
(
h
-
10
)
/
float
(
video_h
)
...
...
@@ -622,19 +740,70 @@ class MainWindow(QMainWindow, Ui_MainWindow):
print
(
self
.
wgt_video
.
height
())
print
(
">>>>>>>>>rate"
+
str
(
self
.
rate
))
def
up_ocr_bottom
(
self
):
def
up_ocr
(
self
):
self
.
user_editing_content
=
True
self
.
up_ocr_timer
.
start
(
50
)
# self.widget.change_painter_flag(True)
# h = self.widget.up(3)
# video_h = self.wgt_video.height()
# self.rate = float(h-10)/float(video_h)
# print(">>>>>video_h: "+str(video_h))
# print(">>>>>up h:" + str(h))
# print(self.wgt_video.height())
# print(">>>>>>>>>rate" + str(self.rate))
def
up_ocr_stop
(
self
):
self
.
user_editing_content
=
False
self
.
up_ocr_timer
.
stop
()
def
down_ocr_stop
(
self
):
self
.
user_editing_content
=
False
self
.
down_ocr_timer
.
stop
()
def
down_ocr
(
self
):
self
.
user_editing_content
=
True
self
.
down_ocr_timer
.
start
(
50
)
# self.widget.change_painter_flag(True)
# h = self.widget.down(3)
# video_h = self.wgt_video.height()
# self.rate = float(h-10)/float(video_h)
# print(">>>>>down h:" + str(h))
# print(self.wgt_video.height())
# print(">>>>>>>>>rate" + str(self.rate))
def
up_ocr_bottom_timer_func
(
self
):
self
.
widget_bottom
.
change_painter_flag
(
True
)
h
=
self
.
widget_bottom
.
up
(
3
)
video_h
=
self
.
wgt_video
.
height
()
self
.
rate_bottom
=
float
(
h
-
6
)
/
float
(
video_h
)
def
down_ocr_bottom
(
self
):
def
down_ocr_bottom_timer_func
(
self
):
self
.
widget_bottom
.
change_painter_flag
(
True
)
h
=
self
.
widget_bottom
.
down
(
3
)
video_h
=
self
.
wgt_video
.
height
()
self
.
rate_bottom
=
float
(
h
-
6
)
/
float
(
video_h
)
def
up_ocr_bottom_stop
(
self
):
self
.
user_editing_content
=
False
self
.
up_ocr_bottom_timer
.
stop
()
def
down_ocr_bottom_stop
(
self
):
self
.
user_editing_content
=
False
self
.
down_ocr_bottom_timer
.
stop
()
def
up_ocr_bottom
(
self
):
self
.
user_editing_content
=
True
self
.
up_ocr_bottom_timer
.
start
(
50
)
# self.widget_bottom.change_painter_flag(True)
# h = self.widget_bottom.up(3)
# video_h = self.wgt_video.height()
# self.rate_bottom = float(h-6)/float(video_h)
def
down_ocr_bottom
(
self
):
self
.
user_editing_content
=
True
self
.
down_ocr_bottom_timer
.
start
(
50
)
# self.widget_bottom.change_painter_flag(True)
# h = self.widget_bottom.down(3)
# video_h = self.wgt_video.height()
# self.rate_bottom = float(h-6)/float(video_h)
def
refresh_on_import
(
self
):
print
(
">>>>>>>refresh in"
)
self
.
refresh_tab_slot
()
...
...
@@ -877,6 +1046,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
"""
self
.
check_if_over
(
"旁白导入"
)
def
check_if_location_ocr
(
self
):
self
.
check_if_over
(
"字幕定位"
)
alive
=
True
for
t
in
self
.
threads
:
alive
=
alive
and
t
.
is_alive
()
if
not
alive
:
self
.
location_ocr_timer
.
stop
()
# self.auto_close_dialog.close()
self
.
threads
=
[]
# type = 检测 或 合成 或 导出
def
check_if_over
(
self
,
type
:
str
):
"""确认传入的待检测任务是否完成
...
...
@@ -904,9 +1083,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
elif
type
==
"检测"
:
self
.
detect_timer
.
stop
()
self
.
refresh_tab_timer
.
stop
()
# t = ProcessErrThread(self)
# t.start()
# from narratage_detection import process_err
# process_err(self)
elif
type
==
"旁白导入"
:
self
.
import_excel_timer
.
stop
()
# self.refresh_tab_timer.stop()
elif
type
==
"字幕定位"
:
self
.
location_ocr_timer
.
stop
()
else
:
self
.
export_timer
.
stop
()
...
...
@@ -921,6 +1107,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
progressBar
.
setValue
(
100
)
self
.
progressLabel
.
setText
(
f
"100
%
"
)
self
.
projectContext
.
nd_process
=
1
self
.
threads
=
[]
def
deal_synthesis_callback_slot
(
self
,
threads
,
state
):
"""实现旁白音频合成任务状态在界面中的实时显示,更新界面中的对应变量,每5s更新一次任务状态
...
...
@@ -1305,9 +1492,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
item
=
QTableWidgetItem
(
text
)
item
.
setTextAlignment
(
Qt
.
AlignCenter
)
# 设置为不可编辑
if
self
.
checkIfTableItemCanChange
(
table
,
idx
,
j
)
==
False
:
#
if self.checkIfTableItemCanChange(table, idx, j) == False:
# item.setFlags(Qt.ItemIsEnabled)
print
(
1
)
# print(">>>>>>>>>setElememtToTable"
)
table
.
setItem
(
idx
,
j
,
item
)
# 只有Content页的字幕列和 Aside页的字幕列 可编辑
...
...
@@ -1776,7 +1963,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
pre_item
=
self
.
all_tableWidget
.
item
(
row
,
col
-
1
)
suggest
=
pre_item
.
text
()
if
suggest
!=
None
and
suggest
!=
""
:
print
(
">>>>>>>>suggest:"
+
suggest
)
if
col
==
constant
.
Content
.
AsideColumnNumber
and
suggest
!=
None
and
suggest
!=
""
:
arrays
=
suggest
.
split
(
"/"
)
if
len
(
arrays
)
==
2
:
suggest
=
str
(
len
(
text
))
+
"/"
+
arrays
[
1
]
...
...
@@ -1807,6 +1996,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
# self.all_tableWidget.setItem(
# int(idx), constant.Content.SpeedColumnNumber, QTableWidgetItem(text))
self
.
projectContext
.
refresh_speed
(
row
,
text
)
elif
col
==
constant
.
Content
.
SubtitleColumnNumber
:
self
.
projectContext
.
refresh_subtitle
(
row
,
text
)
# self.all_tableWidget_idx = int(row)
# self.set_table_to_window(False)
...
...
@@ -1942,8 +2133,13 @@ class MainWindow(QMainWindow, Ui_MainWindow):
将表格内容更新至界面中,并保存当前工程内容
"""
self
.
set_table_to_window
(
need_refresh_all
=
False
)
self
.
projectContext
.
save_project
(
False
)
if
not
self
.
detect_lock
:
self
.
refresh_flag
=
True
try
:
self
.
set_table_to_window
(
need_refresh_all
=
False
)
self
.
projectContext
.
save_project
(
False
)
finally
:
self
.
refresh_flag
=
False
def
refresh_all_tab_slot
(
self
):
"""刷新整个表格
...
...
@@ -2274,7 +2470,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
prompt_dialog
.
show_with_msg
(
"操作成功!!请查看变化"
)
# 只有row起作用
def
del_line_operation_slot
(
self
,
row
:
int
,
start_time
=
"0"
,
end_time
=
"0"
,
subtitle
=
""
,
suggest
=
""
,
aside
=
""
,
speed
=
""
,
refresh_flag
=
True
):
def
del_line_operation_slot
(
self
,
row
:
int
,
start_time
=
"0"
,
end_time
=
"0"
,
subtitle
=
""
,
suggest
=
""
,
aside
=
""
,
speed
=
""
,
refresh_flag
=
True
,
show_msg_flag
=
True
):
"""删除一行
Args:
...
...
@@ -2313,7 +2509,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
projectContext
.
all_elements
.
pop
(
int
(
row
)
-
1
)
if
refresh_flag
:
self
.
refresh_tab_slot
()
self
.
prompt_dialog
.
show_with_msg
(
"操作成功!!请查看变化"
)
if
show_msg_flag
:
self
.
prompt_dialog
.
show_with_msg
(
"操作成功!!请查看变化"
)
def
pb_item_changed_by_double_clicked_slot
(
self
,
item
):
"""双击后修改旁白文本
...
...
@@ -2418,4 +2615,17 @@ class MainWindow(QMainWindow, Ui_MainWindow):
str
(
round
(
video_position
/
1000
,
2
)))
self
.
import_excel_dialog
.
show_with_msg
(
"定位成功:"
+
self
.
aside_head_time
)
\ No newline at end of file
def
confirm_ocr
(
self
):
if
self
.
rate
==
None
:
self
.
prompt_dialog
.
show_with_msg
(
"请选择字幕上边界范围"
)
return
if
self
.
rate_bottom
==
None
:
self
.
prompt_dialog
.
show_with_msg
(
"请选择字幕下边界范围"
)
return
if
not
self
.
check_ocr_rate
():
self
.
prompt_dialog
.
show_with_msg
(
"字幕上边界不能低于下边界"
)
return
self
.
ocr_ranges
.
append
([
self
.
rate
,
self
.
rate_bottom
])
self
.
prompt_dialog
.
show_with_msg
(
f
"操作成功,如果电影存在多行字幕,请移动字幕上下边界,再次点击该按钮确认,目前已存在{len(self.ocr_ranges)}组字幕边界"
)
\ No newline at end of file
main_window_ui.py
View file @
2c4cd5c0
...
...
@@ -18,6 +18,9 @@ class MyWidget(QWidget):
# def __init__(self, parent=None):
# super(QWidget, self).__init__(parent)
# self.painter_flag = True
def
__init__
(
self
,
parent
=
None
,
color
=
Qt
.
red
):
super
(
QWidget
,
self
)
.
__init__
(
parent
)
self
.
color
=
color
def
paintEvent
(
self
,
event
):
# print(">>>>>>>>into paint")
...
...
@@ -26,7 +29,7 @@ class MyWidget(QWidget):
lock
.
acquire
()
painter
=
QPainter
(
self
)
painter
.
setRenderHint
(
QPainter
.
Antialiasing
)
# Optional: Enable anti-aliasing
painter
.
setPen
(
QPen
(
Qt
.
red
,
2
,
Qt
.
SolidLine
))
painter
.
setPen
(
QPen
(
self
.
color
,
2
,
Qt
.
SolidLine
))
painter
.
drawLine
(
0
,
1
,
800
,
1
)
painter
.
end
()
lock
.
release
()
...
...
@@ -55,8 +58,17 @@ class MyWidget(QWidget):
# painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
# painter.drawLine(0, 1, 800, 1)
# painter.end()
print
(
">>>>>cur_y : "
+
str
(
self
.
y
()))
return
self
.
y
()
def
setY
(
self
,
h
):
print
(
">>>>>cur_y2 : "
+
str
(
self
.
y
()))
self
.
move
(
0
,
h
)
def
get_h
(
self
):
return
self
.
y
()
def
down
(
self
,
mov_len
):
print
(
">>>>>>>>>>>down"
+
str
(
mov_len
))
self
.
move
(
0
,
self
.
y
()
+
mov_len
)
...
...
@@ -314,6 +326,8 @@ class Ui_MainWindow(object):
self
.
horizontalLayout_7
.
setObjectName
(
"horizontalLayout_7"
)
self
.
up_ocr_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
up_ocr_btn
.
setObjectName
(
"up_ocr_btn"
)
# self.up_ocr_btn.setAutoRepeatDelay(False)
# self.up_ocr_btn.setAutoRepeat
self
.
horizontalLayout_7
.
addWidget
(
self
.
up_ocr_btn
)
self
.
down_ocr_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
down_ocr_btn
.
setObjectName
(
"down_ocr_btn"
)
...
...
@@ -324,9 +338,13 @@ class Ui_MainWindow(object):
self
.
down_ocr_bottom_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
down_ocr_bottom_btn
.
setObjectName
(
"down_ocr_bottom_btn"
)
self
.
horizontalLayout_7
.
addWidget
(
self
.
down_ocr_bottom_btn
)
self
.
confirm_ocr_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
confirm_ocr_btn
.
setObjectName
(
"confirm_ocr_btn"
)
self
.
horizontalLayout_7
.
addWidget
(
self
.
confirm_ocr_btn
)
self
.
confirm_head_aside_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
confirm_head_aside_btn
.
setObjectName
(
"confirm_head_aside_btn"
)
self
.
horizontalLayout_7
.
addWidget
(
self
.
confirm_head_aside_btn
)
self
.
horizontalLayout_8
=
QtWidgets
.
QHBoxLayout
()
self
.
horizontalLayout_8
.
setObjectName
(
"horizontalLayout_8"
)
...
...
@@ -523,7 +541,8 @@ class Ui_MainWindow(object):
self
.
action_redo
=
QtWidgets
.
QAction
(
MainWindow
)
# self.action_redo.setFont(font)
self
.
action_redo
.
setObjectName
(
"action_redo"
)
self
.
action_3
=
QtWidgets
.
QAction
(
"旁白区间检测"
,
self
,
triggered
=
self
.
show_detect_dialog
)
# self.action_3 = QtWidgets.QAction("旁白区间检测",self,triggered=self.show_detect_dialog)
self
.
action_3
=
QtWidgets
.
QAction
(
"旁白区间检测"
,
self
,
triggered
=
self
.
show_confirmation_dialog
)
self
.
action_3
.
setEnabled
(
False
)
self
.
action_4
=
QtWidgets
.
QAction
(
"旁白音频合成"
,
self
,
triggered
=
self
.
show_assemble_dialog
)
self
.
action_4
.
setEnabled
(
False
)
...
...
@@ -539,7 +558,6 @@ class Ui_MainWindow(object):
self
.
action_9
.
setEnabled
(
True
)
self
.
action_10
=
QtWidgets
.
QAction
(
"片头旁白定位"
,
self
,
triggered
=
self
.
confirm_head_aside
)
self
.
action_10
.
setEnabled
(
True
)
# self.action_3.setObjectName("action_3")
# self.action_4 = QtWidgets.QAction(MainWindow)
# self.action_4.setObjectName("action_4")
...
...
@@ -604,6 +622,7 @@ class Ui_MainWindow(object):
self
.
up_ocr_bottom_btn
.
setText
(
_translate
(
"MainWindow"
,
"字幕下边界上移"
))
self
.
down_ocr_bottom_btn
.
setText
(
_translate
(
"MainWindow"
,
"字幕下边界下移"
))
self
.
confirm_head_aside_btn
.
setText
(
_translate
(
"MainWindow"
,
"片头旁白定位"
))
self
.
confirm_ocr_btn
.
setText
(
_translate
(
"MainWindow"
,
"字幕边界确认"
))
self
.
detect_btn
.
setText
(
_translate
(
"MainWindow"
,
"旁白区间检测"
))
self
.
tabWidget
.
setTabText
(
self
.
tabWidget
.
indexOf
(
self
.
all_tab
),
_translate
(
"MainWindow"
,
"字幕旁白"
))
self
.
tabWidget
.
setTabText
(
self
.
tabWidget
.
indexOf
(
self
.
zm_tab
),
_translate
(
"MainWindow"
,
"字幕"
))
...
...
management.py
View file @
2c4cd5c0
...
...
@@ -77,13 +77,14 @@ class OperateRecord:
# 每一行的具体信息,"起始时间", "终止时间", "字幕", '建议', '解说脚本'
class
Element
:
def
__init__
(
self
,
st_time_sec
:
str
,
ed_time_sec
:
str
,
subtitle
,
suggest
,
aside
,
speed
=
"1.00(4字/秒)"
):
def
__init__
(
self
,
st_time_sec
:
str
,
ed_time_sec
:
str
,
subtitle
,
suggest
,
aside
,
speed
=
"1.00(4字/秒)"
,
ocr_h
=
None
):
self
.
st_time_sec
=
st_time_sec
self
.
ed_time_sec
=
ed_time_sec
self
.
subtitle
=
subtitle
self
.
suggest
=
suggest
self
.
aside
=
aside
self
.
speed
=
speed
self
.
ocr_h
=
ocr_h
# 判断当前元素是否是字幕
def
is_subtitle
(
self
):
...
...
@@ -263,6 +264,11 @@ class ProjectContext:
if
not
self
.
initial_ing
:
save_excel_to_path
(
self
.
all_elements
,
self
.
excel_path
,
self
.
write_header
,
self
.
excel_sheet_name
)
def
refresh_subtitle
(
self
,
row
,
subtitle
:
str
):
self
.
all_elements
[
int
(
row
)]
.
subtitle
=
subtitle
if
not
self
.
initial_ing
:
save_excel_to_path
(
self
.
all_elements
,
self
.
excel_path
,
self
.
write_header
,
self
.
excel_sheet_name
)
def
refresh_speed
(
self
,
row
,
speed
:
str
)
->
None
:
self
.
all_elements
[
int
(
row
)]
.
speed
=
speed
if
not
self
.
initial_ing
:
...
...
@@ -307,7 +313,7 @@ class ProjectContext:
if
d
[
"终止时间"
][
i
]
is
None
:
# 如果是最后一条
if
i
==
len
(
d
[
"字幕"
])
-
1
:
print
(
1
)
print
(
">>>>>>>>>load_excel_from_path"
)
# ed_time_sec = "360000" if self.duration == 0 else self.duration # todo 默认最大时长是100h
else
:
ed_time_sec
=
"
%.2
f"
%
(
float
(
d
[
"起始时间"
][
i
+
1
])
-
0.01
)
...
...
@@ -428,6 +434,8 @@ def save_excel_to_path(all_element, new_excel_path, header, excel_sheet_name):
backup_path
=
os
.
path
.
dirname
(
new_excel_path
)
+
"/tmp_"
+
str
(
time
.
time
())
+
".xlsx"
# os.remove(new_excel_path)
os
.
rename
(
new_excel_path
,
backup_path
)
# print(">>>>>>new_excel_path:" + new_excel_path)
# print(">>>>>>>>>>backup_path:" + backup_path)
try
:
create_sheet
(
new_excel_path
,
"旁白插入位置建议"
,
[
header
])
# for element in all_element:
...
...
narratage_detection.py
View file @
2c4cd5c0
...
...
@@ -63,6 +63,13 @@ def detect(video_path: str, start_time: float, end_time: float, book_path: str,
from
detect_with_ocr
import
detect_with_ocr
detect_with_ocr
(
video_path
,
book_path
,
start_time
,
end_time
,
state
,
mainWindow
)
def
process_err
(
mainWindow
:
MainWindow
=
None
):
from
detect_with_ocr
import
process_err_ocr
try
:
process_err_ocr
(
mainWindow
)
except
Exception
as
e
:
print
(
"process_err err"
)
print
(
e
)
if
__name__
==
'__main__'
:
# 定义参数
...
...
ocr_metric.py
View file @
2c4cd5c0
import
re
import
sys
import
csv
import
jieba
import
argparse
import
pandas
as
pd
import
numpy
as
np
from
sklearn.feature_extraction.text
import
TfidfVectorizer
from
sklearn.metrics.pairwise
import
cosine_similarity
from
difflib
import
SequenceMatcher
title
=
[
'起始时间(转换后)'
,
'终止时间(转换后)'
,
'字幕'
]
from
tqdm
import
tqdm
# title = ['起始时间(转换后)', '终止时间(转换后)', '字幕']
title
=
[
'起始时间'
,
'终止时间'
,
'字幕'
]
def
init
():
# 获取中文停用词列表
...
...
@@ -22,13 +27,32 @@ def change_to_second(time_str):
time_obj
.
second
+
time_obj
.
microsecond
/
1000000
return
seconds
# 将中文句子划分,并且防止划分全部为停用词
def
words_segment
(
str
):
tmp
=
','
.
join
(
jieba
.
cut
(
str
))
# 将分割的句子差分成单词,也不进行划分
if
is_all_stopwords
(
tmp
)
or
len
(
list
(
jieba
.
cut
(
str
)))
==
len
(
str
)
:
return
str
return
tmp
# 判断是否从中英文字幕中提取中文
def
extract_info
(
str
,
has_english
=
False
):
if
not
has_english
:
return
str
chinese_text
=
re
.
findall
(
r'[\u4e00-\u9fff]+'
,
str
)
return
' '
.
join
(
chinese_text
)
# 计算字幕的相似度
def
calculate_similarity
(
str1
,
str2
,
method
=
'cosine'
):
if
method
==
'cosine'
:
tfidf_vectorizer
=
TfidfVectorizer
()
tfidf_matrix
=
tfidf_vectorizer
.
fit_transform
([
str1
,
str2
])
str1
,
str2
=
words_segment
(
str1
),
words_segment
(
str2
)
tfidf_vectorizer
=
TfidfVectorizer
(
min_df
=
1
)
tfidf_matrix
=
tfidf_vectorizer
.
fit_transform
([
str1
,
str2
])
# shape=[2, N]
# print(np.array(tfidf_matrix.toarray()).shape, type(tfidf_matrix), tfidf_matrix.toarray())
similarity_matrix
=
cosine_similarity
(
tfidf_matrix
)
return
similarity_matrix
[
0
][
1
]
elif
method
==
'distance'
:
return
-
String_edit_distance
(
str1
,
str2
)
else
:
return
SequenceMatcher
(
None
,
str1
,
str2
)
.
ratio
()
...
...
@@ -37,23 +61,62 @@ def calculate_time_difference(time1, time2):
return
abs
(
time2
-
time1
)
def
calculate_weight
(
x
,
y
):
# weight = e^(-alpha * time_diff)
# 相差1s的系数为0.9
alpha
=
0.11
return
1
/
(
alpha
*
(
x
+
y
)
+
1
)
# # weight = e^(-alpha * time_diff)
# # 相差1s的系数为0.9
# alpha = 0.11
# return 1 / (alpha * (x + y) + 1)
return
1.0
# 目前不考虑时间系数
# 检查句子中的每个单词是否都是停用词
def
is_all_stopwords
(
sentence
):
sentence
=
sentence
.
replace
(
' '
,
''
)
return
all
(
word
in
stop_words
for
word
in
sentence
)
# 编辑距离算法 有问题!!!!!!
def
String_edit_distance
(
str1
,
str2
):
n
,
m
=
len
(
str1
),
len
(
str2
)
dp
=
[[
0
for
_
in
range
(
m
+
1
)]
for
_
in
range
(
n
+
1
)]
for
i
in
range
(
n
+
1
):
dp
[
i
][
0
]
=
i
for
i
in
range
(
m
+
1
):
dp
[
0
][
i
]
=
i
dp
[
0
][
0
]
=
0
for
i
in
range
(
1
,
n
+
1
):
for
j
in
range
(
1
,
m
+
1
):
if
str1
[
i
-
1
]
==
str2
[
j
-
1
]:
dp
[
i
][
j
]
=
dp
[
i
-
1
][
j
-
1
]
else
:
dp
[
i
][
j
]
=
min
(
dp
[
i
-
1
][
j
-
1
],
min
(
dp
[
i
][
j
-
1
],
dp
[
i
-
1
][
j
]))
+
1
# print(dp[n][m], n, m)
return
1.0
*
dp
[
n
][
m
]
/
max
(
n
,
m
)
### 如果其中有-符号,可能在用excel打开时自动添加=变成公式,读取的时候没问题
def
read_srt_to_csv
(
path_srt
,
path_output
):
with
open
(
path_srt
,
'r'
,
encoding
=
'utf-8-sig'
)
as
f
:
srt_content
=
f
.
read
()
# str
try
:
with
open
(
path_srt
,
'r'
,
encoding
=
'utf-8-sig'
)
as
f
:
srt_content
=
f
.
read
()
# str
except
UnicodeDecodeError
:
print
(
f
"编码错误,已经切换到utf-16编码"
)
try
:
with
open
(
path_srt
,
'r'
,
encoding
=
'utf-16'
)
as
f
:
srt_content
=
f
.
read
()
# str
except
:
print
(
f
"请选择utf-8或utf-16编码形式的srt文件"
)
sys
.
exit
(
1
)
# 使用正则表达式匹配时间码和字幕内容
pattern
=
re
.
compile
(
r'(\d+)\n([\d:,]+) --> ([\d:,]+)\n(.+?)(?=\n\d+\n|$)'
,
re
.
DOTALL
)
matches
=
pattern
.
findall
(
srt_content
)
has_english
=
[]
for
i
in
range
(
5
):
idx
=
np
.
random
.
randint
(
len
(
matches
))
pattern
=
re
.
compile
(
r'[a-zA-Z]'
)
has_english
.
append
(
bool
(
pattern
.
search
(
matches
[
idx
][
3
])))
has_english
=
all
(
has_english
)
print
(
'!'
*
20
,
has_english
)
# 写入 csv 文件
with
open
(
path_output
,
'w'
,
newline
=
''
,
encoding
=
'utf-8'
)
as
f
:
csv_writer
=
csv
.
writer
(
f
)
...
...
@@ -61,7 +124,7 @@ def read_srt_to_csv(path_srt, path_output):
for
_
,
start
,
end
,
subtitle
in
matches
:
# 都是str格式
subtitle
=
re
.
sub
(
r'\{[^}]*\}'
,
''
,
subtitle
)
# 将srt文件前的加粗等格式去掉
csv_writer
.
writerow
([
start
,
end
,
subtitle
.
strip
(
)])
csv_writer
.
writerow
([
start
,
end
,
extract_info
(
subtitle
.
strip
(),
has_english
)])
def
read_from_xlsx
(
path_xlsx
=
'output.xlsx'
,
path_output
=
'deal.csv'
):
data
=
pd
.
read_excel
(
path_xlsx
)
...
...
@@ -70,20 +133,19 @@ def read_from_xlsx(path_xlsx='output.xlsx', path_output='deal.csv'):
csv_writer
.
writerow
(
title
)
for
_
,
data1
in
data
.
iterrows
():
start
,
end
,
subtitle
=
data1
[
1
],
data1
[
3
],
data1
[
4
]
# print(data1[1])
start
,
end
,
subtitle
=
data1
[
0
],
data1
[
1
],
data1
[
2
]
if
isinstance
(
subtitle
,
float
)
and
np
.
isnan
(
subtitle
):
continue
# 与srt文件格式同步
start
=
start
.
replace
(
'.'
,
','
)
end
=
end
.
replace
(
'.'
,
','
)
# print(start, end, subtitle,)
# print(type(start), type(end), type(subtitle))
csv_writer
.
writerow
([
start
,
end
,
subtitle
.
strip
()])
### 对于srt中的字幕计算相似性度。从ocr中找到时间戳满足<=time_t的字幕,
### 然后计算字幕间的相似度,取一个最大的。字幕从start和end都匹配一遍
# time_threshold设置阈值,用于判断时间差是否可接受
def
measure_score
(
path_srt
,
path_ocr
,
time_threshold
=
5.0
,
method
=
'cosine'
):
def
measure_score
(
path_srt
,
path_ocr
,
time_threshold
=
5.0
,
time_threshold_re
=
False
,
method
=
'cosine'
):
data_srt
,
data_ocr
=
[],
[]
with
open
(
path_srt
,
'r'
,
encoding
=
'utf-8'
)
as
file
:
csv_reader
=
csv
.
reader
(
file
)
...
...
@@ -103,22 +165,36 @@ def measure_score(path_srt, path_ocr, time_threshold=5.0, method='cosine'):
# 计算相似度
total_similarity
=
0.0
total_weight
=
0.0
for
sub
in
data_srt
:
max_similarity
=
0.0
txt1
=
[]
for
sub
in
tqdm
(
data_srt
,
desc
=
"Processing"
,
ncols
=
100
)
:
max_similarity
=
0.0
if
method
!=
'distance'
else
-
1.0
# 去除srt中的停用词
if
is_all_stopwords
(
sub
[
2
]):
continue
subb
=
""
for
sub1
in
data_ocr
:
x
,
y
=
abs
(
sub
[
0
]
-
sub1
[
0
]),
abs
(
sub
[
1
]
-
sub1
[
1
])
if
min
(
x
,
y
)
<=
time_threshold
:
# print(sub[2], sub1[2])
score
=
calculate_similarity
(
sub
[
2
],
sub1
[
2
],
'cosine'
)
if
time_threshold_re
:
time_threshold_tmp
=
time_threshold
else
:
time_threshold_tmp
=
(
sub
[
1
]
-
sub
[
0
])
*
0.3
# 10s允许3s的误差
if
min
(
x
,
y
)
<=
time_threshold_tmp
:
score
=
calculate_similarity
(
sub
[
2
],
sub1
[
2
],
method
)
if
max_similarity
<=
score
*
calculate_weight
(
x
,
y
):
subb
=
sub1
[
2
]
max_similarity
=
max
(
max_similarity
,
score
*
calculate_weight
(
x
,
y
))
if
max_similarity
<=
-
0.5
:
# print(max_similarity, sub[2], subb, sub[0])
txt1
.
append
(
' !!! '
.
join
([
str
(
max_similarity
),
sub
[
2
],
subb
,
str
(
sub
[
0
])]))
total_similarity
+=
max_similarity
total_weight
+=
1
if
method
==
'distance'
:
total_similarity
=
total_weight
+
total_similarity
# print(total_similarity, total_similarity / len(data_srt), total_similarity / total_weight)
with
open
(
'movie_pro.txt'
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
for
i
in
txt1
:
f
.
write
(
i
+
'
\n
'
)
return
total_similarity
/
len
(
data_srt
),
total_similarity
/
total_weight
if
__name__
==
'__main__'
:
...
...
@@ -128,13 +204,23 @@ if __name__ == '__main__':
# 添加命令行参数
parser
.
add_argument
(
"--path_srt"
,
required
=
True
,
type
=
str
,
help
=
"Path of srt file, format is srt"
)
parser
.
add_argument
(
"--path_ocr"
,
required
=
True
,
type
=
str
,
help
=
"Path of ocr file, format is xlsx"
)
parser
.
add_argument
(
"--method"
,
type
=
str
,
default
=
'cosine'
,
help
=
"Select evaluation method"
)
parser
.
add_argument
(
"--time_threshold"
,
type
=
float
,
default
=
5.0
,
help
=
"Allowable time frame"
)
parser
.
add_argument
(
"--time_threshold"
,
type
=
float
,
default
=
5.0
,
help
=
"Allowable time frame"
)
parser
.
add_argument
(
"--method"
,
type
=
str
,
default
=
'distance'
,
choices
=
[
'cosine'
,
'distance'
,
'sequence'
]
,
help
=
"Select evaluation method"
)
parser
.
add_argument
(
"--time_threshold_re"
,
type
=
bool
,
default
=
True
,
help
=
"Specify whether
\
time threshold is required"
)
args
=
parser
.
parse_args
()
output_file_srt
=
'deal_srt.csv'
output_file_ocr
=
'deal_ocr.csv'
read_srt_to_csv
(
args
.
path_srt
,
output_file_srt
)
read_from_xlsx
(
args
.
path_ocr
,
output_file_ocr
)
score
=
measure_score
(
output_file_srt
,
output_file_ocr
,
args
.
time_threshold
,
args
.
method
)
print
(
f
'该评估算法得分: {score[1]:.5f}'
)
\ No newline at end of file
score
=
measure_score
(
output_file_srt
,
output_file_ocr
,
args
.
time_threshold
,
\
args
.
time_threshold_re
,
args
.
method
)
print
(
f
'该评估算法得分: {100 * score[1]:.3f}'
)
# python ocr_metric.py --path_srt test/new/movie_1.srt --path_ocr ../测试/the-swan-v3/The.Swan-zimu.xlsx --time_threshold 10
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment