Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_2
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
赵心治
accessibility_movie_2
Commits
2c4cd5c0
Commit
2c4cd5c0
authored
Nov 05, 2023
by
smile2019
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'refs/remotes/origin/feat_1' into feat_1
parents
d3fcd34f
7296e8d4
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
698 additions
and
83 deletions
+698
-83
.gitignore
.gitignore
+28
-0
constant.py
constant.py
+2
-1
detect_with_ocr.py
detect_with_ocr.py
+284
-29
main_window.py
main_window.py
+235
-24
main_window_ui.py
main_window_ui.py
+22
-3
management.py
management.py
+10
-2
narratage_detection.py
narratage_detection.py
+7
-0
ocr_metric.py
ocr_metric.py
+110
-24
No files found.
.gitignore
View file @
2c4cd5c0
...
@@ -14,3 +14,30 @@ res/ffmpeg-4.3.1/bin/qiji_local.mp4
...
@@ -14,3 +14,30 @@ res/ffmpeg-4.3.1/bin/qiji_local.mp4
venv/
venv/
venv37/
venv37/
shenming_test
shenming_test
cap.png
requirements3.8.txt
venv3.8-new/
webrtcvad-2.0.10-cp38-abi3-win_amd64.whl
xlsx-resource/
deal_ocr.csv
deal_srt.csv
new.srt
shenhai1.xlsx
shenhai2.xlsx
test,py
"\346\267\261\346\265\267\347\237\255\347\211\2072.xlsx"
"\346\267\261\346\265\267\347\237\255\347\211\207origin.xlsx"
11.py
222.py
cap/
cap1597.png
cap831.png
deal.py
deal_movie.py
movie_1.txt
movie_pro.txt
res/.paddleocr/2.3.0.1/ocr/paddleocr/
script1.py
test/
\ No newline at end of file
constant.py
View file @
2c4cd5c0
...
@@ -12,10 +12,11 @@ import os
...
@@ -12,10 +12,11 @@ import os
class
Content
:
class
Content
:
StartTimeColumn
=
0
StartTimeColumn
=
0
SubtitleColumnNumber
=
2
AsideColumnNumber
=
4
AsideColumnNumber
=
4
SpeedColumnNumber
=
5
SpeedColumnNumber
=
5
# ActivateColumns = [2, 3]
# ActivateColumns = [2, 3]
ActivateColumns
=
[
4
,
5
]
ActivateColumns
=
[
2
,
4
,
5
]
# ColumnCount = 3
# ColumnCount = 3
ObjectName
=
"all_tableWidget"
ObjectName
=
"all_tableWidget"
# TimeFormatColumns = [0]
# TimeFormatColumns = [0]
...
...
detect_with_ocr.py
View file @
2c4cd5c0
...
@@ -30,6 +30,9 @@ from typing import Tuple, Union
...
@@ -30,6 +30,9 @@ from typing import Tuple, Union
from
utils
import
reverse_time_to_seconds
from
utils
import
reverse_time_to_seconds
from
detect_with_asr
import
create_sheet
,
write_to_sheet
from
detect_with_asr
import
create_sheet
,
write_to_sheet
from
main_window
import
MainWindow
,
Element
from
main_window
import
MainWindow
,
Element
import
time
import
numpy
as
np
import
copy
import
math
import
math
# 字幕的上下边界
# 字幕的上下边界
...
@@ -42,10 +45,18 @@ cur_det_model_dir = paddle_dir + "det/ch/ch_PP-OCRv2_det_infer"
...
@@ -42,10 +45,18 @@ cur_det_model_dir = paddle_dir + "det/ch/ch_PP-OCRv2_det_infer"
cur_rec_model_dir
=
paddle_dir
+
"rec/ch/ch_PP-OCRv2_rec_infer"
cur_rec_model_dir
=
paddle_dir
+
"rec/ch/ch_PP-OCRv2_rec_infer"
ocr
=
PaddleOCR
(
use_angle_cls
=
True
,
lang
=
"ch"
,
show_log
=
False
,
use_gpu
=
False
,
cls_model_dir
=
cur_cls_model_dir
,
det_model_dir
=
cur_det_model_dir
,
rec_model_dir
=
cur_rec_model_dir
)
ocr
=
PaddleOCR
(
use_angle_cls
=
True
,
lang
=
"ch"
,
show_log
=
False
,
use_gpu
=
False
,
cls_model_dir
=
cur_cls_model_dir
,
det_model_dir
=
cur_det_model_dir
,
rec_model_dir
=
cur_rec_model_dir
)
# paddle_dir = "res/.paddleocr/2.3.0.1/ocr/paddleocr/"
# cur_det_model_dir = paddle_dir + "ch_PP-OCRv4_det_infer"
# cur_rec_model_dir = paddle_dir + "ch_PP-OCRv4_rec_infer"
# ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, det_model_dir=cur_det_model_dir, rec_model_dir=cur_rec_model_dir)
# 正常语速为4字/秒
# 正常语速为4字/秒
normal_speed
=
4
normal_speed
=
4
table_index
=
0
ocr_h_map
=
{}
def
evaluate_position
(
video_path
:
str
,
start_time
:
float
)
->
Tuple
[
float
,
float
]:
def
evaluate_position
(
video_path
:
str
,
start_time
:
float
)
->
Tuple
[
float
,
float
]:
print
(
">>>>>>video path:"
+
video_path
)
video
=
cv2
.
VideoCapture
(
video_path
)
video
=
cv2
.
VideoCapture
(
video_path
)
fps
=
video
.
get
(
cv2
.
CAP_PROP_FPS
)
fps
=
video
.
get
(
cv2
.
CAP_PROP_FPS
)
start
=
int
(
start_time
*
fps
)
start
=
int
(
start_time
*
fps
)
...
@@ -183,7 +194,9 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
...
@@ -183,7 +194,9 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
def
get_position
(
video_path
:
str
,
start_time
:
float
,
rate
:
float
,
rate_bottom
:
float
)
->
Tuple
[
float
,
float
]:
ocr_positions
=
[]
def
get_position
(
video_path
:
str
,
start_time
:
float
,
ocr_ranges
)
->
Tuple
[
float
,
float
]:
# return (885.0, 989.0)
# return (885.0, 989.0)
"""根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
"""根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
...
@@ -194,6 +207,11 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f
...
@@ -194,6 +207,11 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f
Returns:
Returns:
Tuple[float, float]: 字幕在整个画面中的上下边界位置
Tuple[float, float]: 字幕在整个画面中的上下边界位置
"""
"""
print
(
">>>>get posti"
)
print
(
ocr_ranges
)
for
i
in
range
(
len
(
ocr_ranges
)):
rate
=
ocr_ranges
[
i
][
0
]
rate_bottom
=
ocr_ranges
[
i
][
1
]
print
(
">>>>>>>>>>open"
)
print
(
">>>>>>>>>>open"
)
print
(
"video_path:"
,
video_path
)
print
(
"video_path:"
,
video_path
)
video
=
cv2
.
VideoCapture
(
video_path
)
video
=
cv2
.
VideoCapture
(
video_path
)
...
@@ -210,18 +228,15 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f
...
@@ -210,18 +228,15 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f
print
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
))
print
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
))
up
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
(
rate
))
up
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
(
rate
))
down
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
(
rate_bottom
))
down
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
(
rate_bottom
))
global
ocr_positions
print
(
"add positions"
)
print
(
up
)
ocr_positions
.
insert
(
0
,[
up
,
down
])
# down = up + 20
# down = up + 20
# down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
# down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
print
(
up
)
# print(up)
# print(down)
# # print(down)
# return int(up), int(down)
up_rate
,
down_rate
=
evaluate_position
(
video_path
,
0
)
up
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
up_rate
)
down
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
down_rate
)
return
int
(
up
),
int
(
down
)
# TODO 现阶段是主窗体设定字幕的开始位置和结束位置,传入该函数。现在希望做成自动检测的?
# while True:
# while True:
# _, img = video.read()
# _, img = video.read()
# # print("img:", img)
# # print("img:", img)
...
@@ -339,7 +354,41 @@ def normalize(text: str) -> str:
...
@@ -339,7 +354,41 @@ def normalize(text: str) -> str:
return
text
return
text
def
detect_subtitle
(
img
:
np
.
ndarray
)
->
Tuple
[
Union
[
str
,
None
],
float
]:
def
resize_img
(
img
):
resize_height
=
152
height
,
width
=
img
.
shape
[:
2
]
if
resize_height
>
height
:
# 定义放大倍数
scale_factor
=
float
(
resize_height
/
height
)
# 计算新的宽度和高度
new_width
=
int
(
width
*
scale_factor
)
new_height
=
int
(
height
*
scale_factor
)
# 使用插值方法进行图像放大
enlarged_image
=
cv2
.
resize
(
img
,
(
new_width
,
new_height
),
interpolation
=
cv2
.
INTER_LINEAR
)
return
enlarged_image
else
:
# 定义缩小倍数
scale_factor
=
float
(
height
/
resize_height
)
# 0.5表示缩小为原来的一半大小
# 使用插值方法进行图像缩小
smaller_image
=
cv2
.
resize
(
img
,
None
,
fx
=
scale_factor
,
fy
=
scale_factor
,
interpolation
=
cv2
.
INTER_LINEAR
)
return
smaller_image
def
extract_white_prior
(
img
,
threshold
=
200
):
gray
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2GRAY
)
# 设定阈值,将非白色部分二值化为黑色
ret
,
binary_image
=
cv2
.
threshold
(
gray
,
threshold
,
255
,
cv2
.
THRESH_BINARY
)
return
binary_image
index
=
0
t
=
140
def
detect_subtitle
(
org_img
:
np
.
ndarray
,
lastSubTitle
,
last_confidence
)
->
Tuple
[
Union
[
str
,
None
],
float
]:
"""检测当前画面得到字幕信息
"""检测当前画面得到字幕信息
Args:
Args:
...
@@ -348,12 +397,65 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
...
@@ -348,12 +397,65 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
Returns:
Returns:
Tuple[Union[str, None]]: 字幕信息(没有字幕时返回None)和置信度
Tuple[Union[str, None]]: 字幕信息(没有字幕时返回None)和置信度
"""
"""
subTitle
=
''
ocr_res
=
""
# up_b = 276
# up_b = 276
# down_b = 297
# down_b = 297
global
ocr_positions
# ocr_positions.append([676, 712])
h
=
None
global
index
for
i
in
range
(
len
(
ocr_positions
)):
img
=
copy
.
deepcopy
(
org_img
)
up_b
=
ocr_positions
[
i
][
0
]
down_b
=
ocr_positions
[
i
][
1
]
height
=
down_b
-
up_b
height
=
down_b
-
up_b
if
len
(
ocr_positions
)
==
1
:
img
=
img
[
int
(
up_b
-
height
*
0.7
):
int
(
down_b
+
height
*
0.7
)]
img
=
img
[
int
(
up_b
-
height
*
0.7
):
int
(
down_b
+
height
*
0.7
)]
else
:
# cropped_img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
cropped_img
=
img
[
int
(
up_b
):
int
(
down_b
)]
# cropped_img = resize_img(cropped_img)
# x = float(150 / height)
# img_h, img_w = cropped_img.shape[:2]
# img_h = int(img_h * x)
# img_w = int(img_w * x)
# cropped_img = cv2.resize(cropped_img, (img_w, img_h))
# 定义要添加的上下空白的高度
padding_top
=
height
*
0.7
padding_bottom
=
height
*
0.7
# padding_top = 150
# padding_bottom = 150
# 计算新图像的高度
new_height
=
cropped_img
.
shape
[
0
]
+
padding_top
+
padding_bottom
# 创建一个新的空白图像
img
=
np
.
zeros
((
int
(
new_height
),
cropped_img
.
shape
[
1
],
3
),
dtype
=
np
.
uint8
)
# 将裁剪后的图像放置在新图像中间
start_y
=
int
(
padding_top
)
end_y
=
start_y
+
cropped_img
.
shape
[
0
]
img
[
start_y
:
end_y
,
:]
=
cropped_img
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# global index
# cv2.imwrite(f'./cap/cap{index}.png', img)
# img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# # ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# # img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
# t = 230
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# cv2.imwrite(f'./test2.png', img)
# index = index + 1
# img = img[int(up_b - height*0.2):int(down_b + height*0.2)]
# 针对低帧率的视频做图像放大处理
# 针对低帧率的视频做图像放大处理
print
(
height
)
print
(
height
)
print
(
up_b
)
print
(
up_b
)
...
@@ -361,16 +463,26 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
...
@@ -361,16 +463,26 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
print
(
img
.
shape
)
print
(
img
.
shape
)
if
img
.
shape
[
1
]
<
1000
:
if
img
.
shape
[
1
]
<
1000
:
img
=
cv2
.
resize
(
img
,
(
int
(
img
.
shape
[
1
]
*
1.5
),
int
(
img
.
shape
[
0
]
*
1.5
)))
img
=
cv2
.
resize
(
img
,
(
int
(
img
.
shape
[
1
]
*
1.5
),
int
(
img
.
shape
[
0
]
*
1.5
)))
cv2
.
imwrite
(
'./cap.png'
,
img
)
# img = extract_white_prior(img)
cv2
.
imwrite
(
f
'./cap/cap{index}.png'
,
img
)
index
=
index
+
1
print
(
">>>>>>>>>>>>>>>>>>>>>>>>>>>new log"
+
str
(
index
-
1
))
res
=
ocr
.
ocr
(
img
,
cls
=
True
)
res
=
ocr
.
ocr
(
img
,
cls
=
True
)
print
(
'--------> res'
,
res
)
print
(
'--------> res'
,
res
)
sorted
(
res
,
key
=
lambda
text
:
text
[
0
][
0
][
1
])
sorted
(
res
,
key
=
lambda
text
:
text
[
0
][
0
][
1
])
sorted
(
res
,
key
=
lambda
text
:
text
[
0
][
0
][
0
])
sorted
(
res
,
key
=
lambda
text
:
text
[
0
][
0
][
0
])
if
len
(
res
)
==
0
:
if
len
(
res
)
==
0
:
return
None
,
0
continue
# return None, 0, None
possible_txt
=
[]
possible_txt
=
[]
subTitle
=
''
conf
=
0
conf
=
0
print
(
'res --------->'
,
res
)
print
(
'res --------->'
,
res
)
res
.
sort
(
key
=
lambda
rect
:
rect
[
0
][
0
][
0
]
+
rect
[
0
][
1
][
0
])
# 按照中心点排序
for
x
in
res
:
for
x
in
res
:
# cv2.imshow("cut", img)
# cv2.imshow("cut", img)
# cv2.waitKey(0)
# cv2.waitKey(0)
...
@@ -384,6 +496,13 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
...
@@ -384,6 +496,13 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
print
(
"文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}"
.
format
(
txt
,
confidence
,
mid
/
img
.
shape
[
1
],
gradient
,
font_size
))
print
(
"文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}"
.
format
(
txt
,
confidence
,
mid
/
img
.
shape
[
1
],
gradient
,
font_size
))
print
(
"字体大小差距: {}"
,
format
(
height
-
font_size
))
print
(
"字体大小差距: {}"
,
format
(
height
-
font_size
))
print
(
"高度中心:{}"
.
format
((
rect
[
0
][
1
]
+
rect
[
1
][
1
])
/
2
/
img
.
shape
[
0
]))
print
(
"高度中心:{}"
.
format
((
rect
[
0
][
1
]
+
rect
[
1
][
1
])
/
2
/
img
.
shape
[
0
]))
# if confidence < 0.95:
# # global t
# print("小于0.95,重新检测,阈值为:" + str(t))
# t = t + 20
# return detect_subtitle(org_img, lastSubTitle, last_confidence)
if
h
==
None
:
h
=
font_size
conf_thred1
=
0.7
conf_thred1
=
0.7
conf_thred2
=
0.85
conf_thred2
=
0.85
# conf_thred1 = 0.1
# conf_thred1 = 0.1
...
@@ -411,8 +530,18 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
...
@@ -411,8 +530,18 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
subTitle
=
' '
.
join
(
possible_txt
)
subTitle
=
' '
.
join
(
possible_txt
)
print
(
subTitle
,
conf
)
print
(
subTitle
,
conf
)
if
len
(
subTitle
)
>
0
:
if
len
(
subTitle
)
>
0
:
return
subTitle
,
conf
ocr_res
=
ocr_res
+
subTitle
return
None
,
0
if
(
len
(
ocr_res
))
>
0
:
print
(
">>>>>>>>>>>>cur subtitle:"
+
ocr_res
+
",confidence: "
+
str
(
confidence
)
+
",last_confidence: "
+
str
(
last_confidence
))
# if len(ocr_positions) == 1 and last_confidence != None and confidence != None and confidence < last_confidence:
# ocr_res = lastSubTitle
# confidence = last_confidence
# print(">>>>>>>>>>>>res subtitle:" + ocr_res + ",confidence: " + str(confidence) + ",last_confidence: " + str(last_confidence))
return
ocr_res
,
confidence
,
conf
,
h
if
check_have_ocr
(
img
):
return
"err"
,
None
,
0
,
None
return
None
,
None
,
0
,
None
def
process_video
(
video_path
:
str
,
begin
:
float
,
end
:
float
,
book_path
:
str
,
sheet_name
:
str
,
state
=
None
,
mainWindow
:
MainWindow
=
None
):
def
process_video
(
video_path
:
str
,
begin
:
float
,
end
:
float
,
book_path
:
str
,
sheet_name
:
str
,
state
=
None
,
mainWindow
:
MainWindow
=
None
):
...
@@ -440,6 +569,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
...
@@ -440,6 +569,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
video
=
cv2
.
VideoCapture
(
video_path
)
video
=
cv2
.
VideoCapture
(
video_path
)
fps
=
video
.
get
(
cv2
.
CAP_PROP_FPS
)
fps
=
video
.
get
(
cv2
.
CAP_PROP_FPS
)
lastSubTitle
=
None
lastSubTitle
=
None
last_confidence
=
None
lastConf
=
0
lastConf
=
0
# res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析
# res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析
res
=
[]
res
=
[]
...
@@ -448,13 +578,16 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
...
@@ -448,13 +578,16 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
end_time
=
0
end_time
=
0
video
.
set
(
cv2
.
CAP_PROP_POS_MSEC
,
begin
*
1000
)
video
.
set
(
cv2
.
CAP_PROP_POS_MSEC
,
begin
*
1000
)
pre_state
=
state
[
0
]
pre_state
=
state
[
0
]
ocr_h
=
None
while
True
:
while
True
:
_
,
frame
=
video
.
read
()
_
,
frame
=
video
.
read
()
if
frame
is
None
:
if
frame
is
None
:
break
break
cnt
+=
1
cnt
+=
1
cur_time
=
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
cur_time
=
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
# 判断当前帧是否已超限制
# 判断当前帧是否已超限制
# end 主要用来判断是否越界
if
cur_time
>
end
:
if
cur_time
>
end
:
if
cur_time
-
end_time
>
1
:
if
cur_time
-
end_time
>
1
:
print
(
'--------------------------------------------------'
)
print
(
'--------------------------------------------------'
)
...
@@ -466,7 +599,8 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
...
@@ -466,7 +599,8 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
# 判断当前是否有字幕需要被保存下来
# 判断当前是否有字幕需要被保存下来
if
end_time
<
start_time
:
if
end_time
<
start_time
:
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end, 2), lastSubTitle, ''])
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end, 2), lastSubTitle, ''])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end
,
3
),
lastSubTitle
,
''
])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end
,
3
),
lastSubTitle
,
''
],
ocr_h
=
ocr_h
)
print
(
">>>>>>subtitle,ocr_h1:"
+
str
(
lastSubTitle
)
+
">>>"
+
str
(
ocr_h
))
break
break
# 每秒取4帧画面左右
# 每秒取4帧画面左右
# TODO 取帧算法优化
# TODO 取帧算法优化
...
@@ -479,7 +613,10 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
...
@@ -479,7 +613,10 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
mainWindow
.
projectContext
.
nd_process
=
state
[
0
]
mainWindow
.
projectContext
.
nd_process
=
state
[
0
]
mainWindow
.
projectContext
.
last_time
=
cur_time
mainWindow
.
projectContext
.
last_time
=
cur_time
subTitle
,
conf
=
detect_subtitle
(
frame
)
subTitle
,
confidence
,
conf
,
cur_ocr_h
=
detect_subtitle
(
frame
,
lastSubTitle
,
last_confidence
)
if
subTitle
==
"err"
:
continue
if
subTitle
is
not
None
:
if
subTitle
is
not
None
:
subTitle
=
normalize
(
subTitle
)
subTitle
=
normalize
(
subTitle
)
...
@@ -488,6 +625,8 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
...
@@ -488,6 +625,8 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
# 第一次找到字幕
# 第一次找到字幕
if
lastSubTitle
is
None
and
subTitle
is
not
None
:
if
lastSubTitle
is
None
and
subTitle
is
not
None
:
if
cur_ocr_h
!=
None
:
ocr_h
=
cur_ocr_h
start_time
=
cur_time
start_time
=
cur_time
# 字幕消失
# 字幕消失
...
@@ -500,14 +639,17 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
...
@@ -500,14 +639,17 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
(
res
[
-
1
][
0
]
-
res
[
-
2
][
1
])
*
normal_speed
)
(
res
[
-
1
][
0
]
-
res
[
-
2
][
1
])
*
normal_speed
)
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
add_to_list
(
mainWindow
,
"旁白"
,
[
''
,
''
,
''
,
'
%
d'
%
recommend_lens
])
add_to_list
(
mainWindow
,
"旁白"
,
[
''
,
''
,
''
,
'
%
d'
%
recommend_lens
]
,
ocr_h
)
print
(
start_time
,
end_time
,
lastSubTitle
)
print
(
start_time
,
end_time
,
lastSubTitle
)
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end_time
,
3
),
lastSubTitle
,
''
])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end_time
,
3
),
lastSubTitle
,
''
],
ocr_h
)
print
(
">>>>>>subtitle,ocr_h2:"
+
str
(
lastSubTitle
)
+
">>>"
+
str
(
ocr_h
))
elif
lastSubTitle
is
not
None
and
subTitle
is
not
None
:
elif
lastSubTitle
is
not
None
and
subTitle
is
not
None
:
# 两句话连在一起,但是两句话不一样
# 两句话连在一起,但是两句话不一样
if
string_similar
(
lastSubTitle
,
subTitle
)
<
0.6
:
if
string_similar
(
lastSubTitle
,
subTitle
)
<
0.6
:
if
cur_ocr_h
!=
None
:
ocr_h
=
cur_ocr_h
end_time
=
cur_time
end_time
=
cur_time
res
.
append
([
start_time
,
end_time
,
lastSubTitle
])
res
.
append
([
start_time
,
end_time
,
lastSubTitle
])
if
(
len
(
res
)
==
1
and
res
[
-
1
][
0
]
-
last_time
>=
1
)
or
(
len
(
res
)
>
1
and
res
[
-
1
][
0
]
-
res
[
-
2
][
1
])
>=
1
:
if
(
len
(
res
)
==
1
and
res
[
-
1
][
0
]
-
last_time
>=
1
)
or
(
len
(
res
)
>
1
and
res
[
-
1
][
0
]
-
res
[
-
2
][
1
])
>=
1
:
...
@@ -516,24 +658,98 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
...
@@ -516,24 +658,98 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
(
res
[
-
1
][
0
]
-
res
[
-
2
][
1
])
*
normal_speed
)
(
res
[
-
1
][
0
]
-
res
[
-
2
][
1
])
*
normal_speed
)
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
add_to_list
(
mainWindow
,
"旁白"
,
[
''
,
''
,
''
,
'
%
d'
%
recommend_lens
])
add_to_list
(
mainWindow
,
"旁白"
,
[
''
,
''
,
''
,
'
%
d'
%
recommend_lens
]
,
ocr_h
)
print
(
start_time
,
end_time
,
lastSubTitle
)
print
(
start_time
,
end_time
,
lastSubTitle
)
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end_time
,
3
),
lastSubTitle
,
''
])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end_time
,
3
),
lastSubTitle
,
''
],
ocr_h
)
print
(
">>>>>>subtitle,ocr_h3:"
+
str
(
lastSubTitle
)
+
">>>"
+
str
(
ocr_h
))
start_time
=
end_time
start_time
=
end_time
else
:
else
:
lastSubTitle
=
subTitle
if
conf
>
lastConf
else
lastSubTitle
lastSubTitle
=
subTitle
if
conf
>
lastConf
else
lastSubTitle
continue
continue
# 当前字幕与上一段字幕不一样
# 当前字幕与上一段字幕不一样
if
subTitle
!=
"err"
:
lastSubTitle
=
subTitle
lastSubTitle
=
subTitle
last_confidence
=
confidence
lastConf
=
conf
lastConf
=
conf
print
(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>end 1"
)
def
add_to_list
(
mainWindow
:
MainWindow
,
element_type
:
str
,
li
:
list
):
# print(ocr_h_map)
# mainWindow.refresh_tab_slot()
while
(
mainWindow
.
refresh_flag
==
True
):
time
.
sleep
(
1
)
mainWindow
.
detect_lock
=
True
try
:
process_err_ocr
(
mainWindow
)
finally
:
mainWindow
.
detect_lock
=
False
# mainWindow.refresh_lock.acquire()
# try:
# process_err_ocr(mainWindow)
# finally:
# mainWindow.refresh_lock.release()
def
process_err_ocr
(
mainWindow
):
# if 1==1:
# return
for
i
in
range
(
len
(
mainWindow
.
projectContext
.
all_elements
)):
if
mainWindow
.
projectContext
.
all_elements
[
i
]
.
subtitle
!=
None
and
mainWindow
.
projectContext
.
all_elements
[
i
]
.
subtitle
!=
""
:
ocr_h_map
[
i
]
=
mainWindow
.
projectContext
.
all_elements
[
i
]
.
ocr_h
print
(
ocr_h_map
)
data
=
list
(
ocr_h_map
.
values
())
print
(
">>>>values"
)
print
(
ocr_h_map
.
values
())
table_indexs
=
list
(
ocr_h_map
.
keys
())
print
(
table_indexs
)
# 计算均值和标准差
mean
=
np
.
mean
(
data
)
std_dev
=
np
.
std
(
data
)
# 定义阈值(例如,可以选择 2 倍标准差作为阈值)
threshold
=
2.7
*
std_dev
print
(
mean
)
print
(
threshold
)
rm_list
=
[]
for
i
,
x
in
enumerate
(
data
):
if
abs
(
x
-
mean
)
>
threshold
:
rm_list
.
append
(
i
)
print
(
rm_list
)
# while(mainWindow.refresh_tab_timer.isActive()):
# time.sleep(1)
print
(
len
(
mainWindow
.
projectContext
.
all_elements
))
for
i
in
range
(
len
(
rm_list
)):
try
:
print
(
">>>>>>>>>will rm"
+
str
(
table_indexs
[
rm_list
[
i
]]
+
1
-
i
)
+
", subtitle:"
+
mainWindow
.
projectContext
.
all_elements
[
table_indexs
[
rm_list
[
i
]]
-
i
]
.
subtitle
)
print
(
table_indexs
[
rm_list
[
i
]])
mainWindow
.
del_line_operation_slot
(
row
=
table_indexs
[
rm_list
[
i
]]
+
1
-
i
,
show_msg_flag
=
False
)
time
.
sleep
(
0.5
)
except
Exception
as
e
:
print
(
">>>>>>>>>>>>>>>>>>>>>>del err"
)
print
(
e
)
# mainWindow.refresh_all_tab_slot()
def
add_to_list
(
mainWindow
:
MainWindow
,
element_type
:
str
,
li
:
list
,
ocr_h
:
int
=
None
):
# 默认使用配置文件中的语速
# 默认使用配置文件中的语速
speed
=
mainWindow
.
projectContext
.
speaker_speed
speed
=
mainWindow
.
projectContext
.
speaker_speed
aside_head_time
=
float
(
reverse_time_to_seconds
(
mainWindow
.
aside_head_time
))
if
mainWindow
.
aside_head_time
!=
None
else
float
(
0
)
aside_head_time
=
float
(
reverse_time_to_seconds
(
mainWindow
.
aside_head_time
))
if
mainWindow
.
aside_head_time
!=
None
else
float
(
0
)
st_time_sec
,
ed_time_sec
,
subtitle
,
suggest
=
li
st_time_sec
,
ed_time_sec
,
subtitle
,
suggest
=
li
# global table_index
# if ocr_h != None and element_type == "字幕":
# index = table_index
# ocr_h_map[index] = ocr_h
# print(">>>>>>>>>>>>>>>>>>ocr H map:" + str(index) + ",subtitle:" + subtitle)
# print(ocr_h_map)
print
(
">>>>>>>>start time:"
)
print
(
">>>>>>>>start time:"
)
print
(
aside_head_time
)
print
(
aside_head_time
)
print
(
st_time_sec
)
print
(
st_time_sec
)
...
@@ -541,11 +757,12 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
...
@@ -541,11 +757,12 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
print
(
">>>>>>need del"
)
print
(
">>>>>>need del"
)
print
(
st_time_sec
)
print
(
st_time_sec
)
if
not
mainWindow
.
add_head_aside
:
if
not
mainWindow
.
add_head_aside
:
new_element
=
Element
(
'0.00'
,
""
,
""
,
"0/100"
,
""
,
speed
)
new_element
=
Element
(
'0.00'
,
""
,
""
,
"0/100"
,
""
,
speed
,
ocr_h
)
mainWindow
.
projectContext
.
aside_list
.
append
(
new_element
)
mainWindow
.
projectContext
.
aside_list
.
append
(
new_element
)
mainWindow
.
projectContext
.
all_elements
.
append
(
mainWindow
.
projectContext
.
aside_list
[
-
1
])
mainWindow
.
projectContext
.
all_elements
.
append
(
mainWindow
.
projectContext
.
aside_list
[
-
1
])
mainWindow
.
last_aside_index
=
len
(
mainWindow
.
projectContext
.
all_elements
)
-
1
mainWindow
.
last_aside_index
=
len
(
mainWindow
.
projectContext
.
all_elements
)
-
1
mainWindow
.
add_head_aside
=
True
mainWindow
.
add_head_aside
=
True
# table_index = table_index + 1
return
return
st_time_sec
,
ed_time_sec
=
str
(
st_time_sec
),
str
(
ed_time_sec
)
st_time_sec
,
ed_time_sec
=
str
(
st_time_sec
),
str
(
ed_time_sec
)
...
@@ -553,7 +770,7 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
...
@@ -553,7 +770,7 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
aside
=
""
aside
=
""
i
=
len
(
mainWindow
.
projectContext
.
all_elements
)
i
=
len
(
mainWindow
.
projectContext
.
all_elements
)
if
element_type
==
"字幕"
:
if
element_type
==
"字幕"
:
new_element
=
Element
(
st_time_sec
,
ed_time_sec
,
subtitle
,
suggest
,
aside
,
speed
)
new_element
=
Element
(
st_time_sec
,
ed_time_sec
,
subtitle
,
suggest
,
aside
,
speed
,
ocr_h
)
new_element
.
print_self
()
new_element
.
print_self
()
if
mainWindow
.
last_aside_index
!=
None
and
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
==
""
and
new_element
.
ed_time_sec
!=
""
:
if
mainWindow
.
last_aside_index
!=
None
and
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
==
""
and
new_element
.
ed_time_sec
!=
""
:
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
=
new_element
.
st_time_sec
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
=
new_element
.
st_time_sec
...
@@ -561,9 +778,11 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
...
@@ -561,9 +778,11 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
# print(">>>>>>>>>>>remove short aside")
# print(">>>>>>>>>>>remove short aside")
mainWindow
.
projectContext
.
aside_list
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
mainWindow
.
projectContext
.
aside_list
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
mainWindow
.
projectContext
.
all_elements
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
mainWindow
.
projectContext
.
all_elements
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
# table_index = table_index - 1
mainWindow
.
last_aside_index
=
None
mainWindow
.
last_aside_index
=
None
mainWindow
.
projectContext
.
subtitle_list
.
append
(
new_element
)
mainWindow
.
projectContext
.
subtitle_list
.
append
(
new_element
)
mainWindow
.
projectContext
.
all_elements
.
append
(
mainWindow
.
projectContext
.
subtitle_list
[
-
1
])
mainWindow
.
projectContext
.
all_elements
.
append
(
mainWindow
.
projectContext
.
subtitle_list
[
-
1
])
# table_index = table_index + 1
else
:
else
:
if
i
==
0
:
if
i
==
0
:
st_time_sec
=
"0.01"
st_time_sec
=
"0.01"
...
@@ -574,7 +793,7 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
...
@@ -574,7 +793,7 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
# 因为暂时没有用到ed_time_sec,所以直接赋值空吧
# 因为暂时没有用到ed_time_sec,所以直接赋值空吧
ed_time_sec
=
""
ed_time_sec
=
""
new_element
=
Element
(
st_time_sec
,
ed_time_sec
,
subtitle
,
suggest
,
aside
,
speed
)
new_element
=
Element
(
st_time_sec
,
ed_time_sec
,
subtitle
,
suggest
,
aside
,
speed
,
ocr_h
)
new_element
.
print_self
()
new_element
.
print_self
()
if
mainWindow
.
last_aside_index
!=
None
and
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
==
""
and
new_element
.
ed_time_sec
!=
""
:
if
mainWindow
.
last_aside_index
!=
None
and
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
==
""
and
new_element
.
ed_time_sec
!=
""
:
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
=
new_element
.
st_time_sec
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
=
new_element
.
st_time_sec
...
@@ -582,15 +801,18 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
...
@@ -582,15 +801,18 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
# print(">>>>>>>>>>>remove short aside")
# print(">>>>>>>>>>>remove short aside")
mainWindow
.
projectContext
.
aside_list
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
mainWindow
.
projectContext
.
aside_list
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
mainWindow
.
projectContext
.
all_elements
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
mainWindow
.
projectContext
.
all_elements
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
# table_index = table_index - 1
mainWindow
.
last_aside_index
=
None
mainWindow
.
last_aside_index
=
None
new_element
.
suggest
=
"0/"
+
new_element
.
suggest
new_element
.
suggest
=
"0/"
+
new_element
.
suggest
if
(
st_time_sec
!=
None
and
st_time_sec
!=
""
and
aside_head_time
>
float
(
st_time_sec
)):
if
(
st_time_sec
!=
None
and
st_time_sec
!=
""
and
aside_head_time
>
float
(
st_time_sec
)):
return
return
mainWindow
.
projectContext
.
aside_list
.
append
(
new_element
)
mainWindow
.
projectContext
.
aside_list
.
append
(
new_element
)
mainWindow
.
projectContext
.
all_elements
.
append
(
mainWindow
.
projectContext
.
aside_list
[
-
1
])
mainWindow
.
projectContext
.
all_elements
.
append
(
mainWindow
.
projectContext
.
aside_list
[
-
1
])
# table_index = table_index + 1
mainWindow
.
last_aside_index
=
len
(
mainWindow
.
projectContext
.
all_elements
)
-
1
mainWindow
.
last_aside_index
=
len
(
mainWindow
.
projectContext
.
all_elements
)
-
1
# end_time 主要用来判断是否越界
def
detect_with_ocr
(
video_path
:
str
,
book_path
:
str
,
start_time
:
float
,
end_time
:
float
,
state
=
None
,
mainWindow
:
MainWindow
=
None
):
def
detect_with_ocr
(
video_path
:
str
,
book_path
:
str
,
start_time
:
float
,
end_time
:
float
,
state
=
None
,
mainWindow
:
MainWindow
=
None
):
"""使用ocr检测视频获取字幕并输出旁白推荐
"""使用ocr检测视频获取字幕并输出旁白推荐
...
@@ -616,7 +838,11 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
...
@@ -616,7 +838,11 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
up_b
,
down_b
=
context
.
caption_boundings
[
0
],
context
.
caption_boundings
[
1
]
up_b
,
down_b
=
context
.
caption_boundings
[
0
],
context
.
caption_boundings
[
1
]
else
:
else
:
# 此处start_time + 300是为了节省用户调整视频开始时间的功夫(强行跳过前5分钟)
# 此处start_time + 300是为了节省用户调整视频开始时间的功夫(强行跳过前5分钟)
up_b
,
down_b
=
get_position
(
video_path
,
0
,
mainWindow
.
rate
,
mainWindow
.
rate_bottom
)
# up_b, down_b = get_position(video_path, 0, mainWindow.rate, mainWindow.rate_bottom)
get_position
(
video_path
,
0
,
mainWindow
.
ocr_ranges
)
print
(
">>>>>positions:"
)
global
ocr_positions
print
(
ocr_positions
)
context
.
caption_boundings
=
[
up_b
,
down_b
]
context
.
caption_boundings
=
[
up_b
,
down_b
]
context
.
detected
=
True
context
.
detected
=
True
...
@@ -627,8 +853,37 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
...
@@ -627,8 +853,37 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
# print("process the total video at time: ", datetime.datetime.now())
# print("process the total video at time: ", datetime.datetime.now())
process_video
(
video_path
,
start_time
,
end_time
,
book_name_xlsx
,
sheet_name_xlsx
,
state
,
mainWindow
)
process_video
(
video_path
,
start_time
,
end_time
,
book_name_xlsx
,
sheet_name_xlsx
,
state
,
mainWindow
)
def
check_have_ocr
(
img
):
new_img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2GRAY
)
t
=
230
_
,
new_img
=
cv2
.
threshold
(
new_img
,
t
,
255
,
cv2
.
THRESH_BINARY
)
if
np
.
any
(
new_img
==
255
):
return
True
else
:
return
False
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
# path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
# path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
path
=
"C:/Users/Smile/Desktop/accessibility-movie/"
path
=
"C:/Users/Smile/Desktop/accessibility-movie/"
# print("get_pos:", get_position(path, 0))
# print("get_pos:", get_position(path, 0))
# evaluate_position("C:/Users/AIA/Desktop/1/1.mp4", 0)
# img = cv2.imread("./cap/cap879.png")
img
=
cv2
.
imread
(
"./cap/cap812.png"
,
cv2
.
IMREAD_GRAYSCALE
)
# img = cv2.equalizeHist(img)
t
=
230
_
,
img
=
cv2
.
threshold
(
img
,
t
,
255
,
cv2
.
THRESH_BINARY
)
# img = resize_img(img)
detect_subtitle
(
img
,
None
,
None
)
# img = cv2.equalizeHist(img)
# t = 120
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# # ret, binary_image = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# # binary_image = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
cv2
.
imwrite
(
f
'./binary2.png'
,
img
)
if
np
.
any
(
img
==
255
):
print
(
"111111"
)
else
:
print
(
"222222"
)
main_window.py
View file @
2c4cd5c0
...
@@ -13,7 +13,7 @@ import os
...
@@ -13,7 +13,7 @@ import os
import
cv2
import
cv2
from
PyQt5
import
QtWidgets
from
PyQt5
import
QtWidgets
from
PyQt5.QtWidgets
import
QMainWindow
,
QFileDialog
,
QTableWidget
,
QTableWidgetItem
,
QAbstractItemView
,
QProgressBar
,
QLabel
,
QApplication
,
QPushButton
,
QMenu
from
PyQt5.QtWidgets
import
QMainWindow
,
QFileDialog
,
QTableWidget
,
QTableWidgetItem
,
QAbstractItemView
,
QProgressBar
,
QLabel
,
QApplication
,
QPushButton
,
QMenu
,
QDialog
,
QVBoxLayout
from
PyQt5.QtCore
import
QUrl
,
Qt
,
QTimer
,
QRect
,
pyqtSignal
,
QPersistentModelIndex
from
PyQt5.QtCore
import
QUrl
,
Qt
,
QTimer
,
QRect
,
pyqtSignal
,
QPersistentModelIndex
from
PyQt5.QtMultimedia
import
*
from
PyQt5.QtMultimedia
import
*
from
PyQt5.QtGui
import
QIcon
from
PyQt5.QtGui
import
QIcon
...
@@ -43,6 +43,7 @@ from excel_utils import read_xls,read_xlsx
...
@@ -43,6 +43,7 @@ from excel_utils import read_xls,read_xlsx
from
ffmpeg_util
import
adjust_audio_volume
from
ffmpeg_util
import
adjust_audio_volume
from
PyQt5.QtCore
import
QThread
;
from
PyQt5.QtCore
import
QThread
;
import
threading
# from emit_import_event import emit_import_event
# from emit_import_event import emit_import_event
...
@@ -71,12 +72,56 @@ class WorkerThread(QThread):
...
@@ -71,12 +72,56 @@ class WorkerThread(QThread):
for
t
in
self
.
main_window
.
threads
:
for
t
in
self
.
main_window
.
threads
:
t
.
start
()
t
.
start
()
def
refresh
(
self
):
self
.
main_window
.
import_process_sign
.
emit
(
self
.
elements
)
class
ProcessErrThread
(
QThread
):
def
__init__
(
self
,
main_window
):
super
()
.
__init__
()
self
.
main_window
=
main_window
def
run
(
self
):
from
narratage_detection
import
process_err
t
=
RunThread
(
funcName
=
process_err
,
args
=
(
self
.
main_window
,
),
name
=
"process_err"
)
t
.
setDaemon
(
True
)
t
.
start
()
class
AutoCloseDialog
(
QDialog
):
def
__init__
(
self
,
parent
=
None
):
super
(
AutoCloseDialog
,
self
)
.
__init__
(
parent
)
self
.
setWindowFlags
(
Qt
.
CustomizeWindowHint
|
Qt
.
NoDropShadowWindowHint
)
layout
=
QVBoxLayout
()
label
=
QLabel
(
"字幕边界定位中,请稍后!"
)
layout
.
addWidget
(
label
)
self
.
setLayout
(
layout
)
def
refresh
(
self
):
class
LocalOcrThread
(
QThread
):
self
.
main_window
.
import_process_sign
.
emit
(
self
.
elements
)
def
__init__
(
self
,
main_window
,
path
):
super
()
.
__init__
()
self
.
main_window
=
main_window
self
.
path
=
path
def
run
(
self
):
# 在后台执行耗时操作
state
=
[
None
]
self
.
main_window
.
state
=
state
state
[
0
]
=
0.1
self
.
main_window
.
threads
=
[]
t
=
RunThread
(
funcName
=
self
.
main_window
.
auto_location_ocr
,
args
=
(
self
.
path
,
),
name
=
"auto_location_ocr"
)
t
.
setDaemon
(
True
)
self
.
main_window
.
threads
.
append
(
t
)
self
.
main_window
.
all_threads
.
append
(
t
)
for
t
in
self
.
main_window
.
threads
:
t
.
start
()
def
finish
(
self
):
self
.
main_window
.
location_ocr_sign
.
emit
()
class
CustomDelegate
(
QtWidgets
.
QStyledItemDelegate
):
class
CustomDelegate
(
QtWidgets
.
QStyledItemDelegate
):
def
paint
(
self
,
painter
,
option
,
index
):
def
paint
(
self
,
painter
,
option
,
index
):
...
@@ -92,9 +137,11 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -92,9 +137,11 @@ class MainWindow(QMainWindow, Ui_MainWindow):
renew_signal
=
pyqtSignal
(
str
)
renew_signal
=
pyqtSignal
(
str
)
import_process_sign
=
pyqtSignal
(
list
)
import_process_sign
=
pyqtSignal
(
list
)
refresh_sign
=
pyqtSignal
()
refresh_sign
=
pyqtSignal
()
location_ocr_sign
=
pyqtSignal
()
def
__init__
(
self
,
project_path
):
def
__init__
(
self
,
project_path
):
super
(
MainWindow
,
self
)
.
__init__
()
super
(
MainWindow
,
self
)
.
__init__
()
self
.
location_ocr_sign
.
connect
(
self
.
finish_location_ocr
)
self
.
last_aside_index
=
None
self
.
last_aside_index
=
None
self
.
setupUi
(
self
)
self
.
setupUi
(
self
)
self
.
statusbar
.
showMessage
(
"hello"
,
5000
)
self
.
statusbar
.
showMessage
(
"hello"
,
5000
)
...
@@ -150,6 +197,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -150,6 +197,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
# 所有QTimer集中管理
# 所有QTimer集中管理
self
.
import_excel_timer
=
QTimer
()
self
.
import_excel_timer
=
QTimer
()
self
.
import_excel_timer
.
timeout
.
connect
(
self
.
check_if_import_excel_over
)
self
.
import_excel_timer
.
timeout
.
connect
(
self
.
check_if_import_excel_over
)
self
.
location_ocr_timer
=
QTimer
()
self
.
location_ocr_timer
.
timeout
.
connect
(
self
.
check_if_location_ocr
)
self
.
detect_timer
=
QTimer
()
self
.
detect_timer
=
QTimer
()
self
.
detect_timer
.
timeout
.
connect
(
self
.
check_if_detect_over_slot
)
self
.
detect_timer
.
timeout
.
connect
(
self
.
check_if_detect_over_slot
)
self
.
synthesis_timer
=
QTimer
()
self
.
synthesis_timer
=
QTimer
()
...
@@ -161,7 +210,14 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -161,7 +210,14 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
video_timer
.
start
(
1000
)
# todo 作为参数配置
self
.
video_timer
.
start
(
1000
)
# todo 作为参数配置
self
.
refresh_tab_timer
=
QTimer
()
self
.
refresh_tab_timer
=
QTimer
()
self
.
refresh_tab_timer
.
timeout
.
connect
(
self
.
refresh_tab_slot
)
self
.
refresh_tab_timer
.
timeout
.
connect
(
self
.
refresh_tab_slot
)
self
.
up_ocr_timer
=
QTimer
()
self
.
down_ocr_timer
=
QTimer
()
self
.
up_ocr_timer
.
timeout
.
connect
(
self
.
up_ocr_timer_func
)
self
.
down_ocr_timer
.
timeout
.
connect
(
self
.
down_ocr_timer_func
)
self
.
up_ocr_bottom_timer
=
QTimer
()
self
.
down_ocr_bottom_timer
=
QTimer
()
self
.
up_ocr_bottom_timer
.
timeout
.
connect
(
self
.
up_ocr_bottom_timer_func
)
self
.
down_ocr_bottom_timer
.
timeout
.
connect
(
self
.
down_ocr_bottom_timer_func
)
"""状态栏相关空间
"""状态栏相关空间
"""
"""
...
@@ -219,18 +275,28 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -219,18 +275,28 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
insert_aside_from_now_btn
.
clicked
.
connect
(
self
.
insert_aside_from_now_btn
.
clicked
.
connect
(
self
.
insert_aside_from_now_slot
)
self
.
insert_aside_from_now_slot
)
self
.
insert_aside_from_now_btn
.
setEnabled
(
False
)
self
.
insert_aside_from_now_btn
.
setEnabled
(
False
)
self
.
up_ocr_btn
.
click
ed
.
connect
(
self
.
up_ocr_btn
.
press
ed
.
connect
(
self
.
up_ocr
)
self
.
up_ocr
)
self
.
down_ocr_btn
.
clicked
.
connect
(
self
.
up_ocr_btn
.
released
.
connect
(
self
.
up_ocr_stop
)
self
.
down_ocr_btn
.
pressed
.
connect
(
self
.
down_ocr
)
self
.
down_ocr
)
self
.
up_ocr_bottom_btn
.
clicked
.
connect
(
self
.
down_ocr_btn
.
released
.
connect
(
self
.
down_ocr_stop
)
self
.
up_ocr_bottom_btn
.
pressed
.
connect
(
self
.
up_ocr_bottom
)
self
.
up_ocr_bottom
)
self
.
down_ocr_bottom_btn
.
clicked
.
connect
(
self
.
up_ocr_bottom_btn
.
released
.
connect
(
self
.
up_ocr_bottom_stop
)
self
.
down_ocr_bottom_btn
.
pressed
.
connect
(
self
.
down_ocr_bottom
)
self
.
down_ocr_bottom
)
self
.
down_ocr_bottom_btn
.
released
.
connect
(
self
.
down_ocr_bottom_stop
)
self
.
confirm_ocr_btn
.
clicked
.
connect
(
self
.
confirm_ocr
)
self
.
confirm_head_aside_btn
.
clicked
.
connect
(
self
.
confirm_head_aside_btn
.
clicked
.
connect
(
self
.
confirm_head_aside
)
self
.
confirm_head_aside
)
self
.
detect_btn
.
clicked
.
connect
(
self
.
detect_btn
.
clicked
.
connect
(
self
.
show_
detect
_dialog
)
self
.
show_
confirmation
_dialog
)
"""视频预览相关信息
"""视频预览相关信息
...
@@ -398,6 +464,10 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -398,6 +464,10 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
sld_video
.
setFocus
()
self
.
sld_video
.
setFocus
()
self
.
aside_head_time
=
None
self
.
aside_head_time
=
None
self
.
add_head_aside
=
False
self
.
add_head_aside
=
False
self
.
ocr_ranges
=
[]
self
.
refresh_lock
=
threading
.
Lock
()
self
.
detect_lock
=
False
self
.
refresh_flag
=
False
# 打印到log文件中
# 打印到log文件中
t
=
RunThread
(
funcName
=
make_print_to_file
,
args
=
os
.
path
.
join
(
os
.
getcwd
(),
'log'
),
name
=
"logging"
)
t
=
RunThread
(
funcName
=
make_print_to_file
,
args
=
os
.
path
.
join
(
os
.
getcwd
(),
'log'
),
name
=
"logging"
)
...
@@ -407,6 +477,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -407,6 +477,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
get_focus_thread
.
setDaemon
(
True
)
get_focus_thread
.
setDaemon
(
True
)
get_focus_thread
.
start
()
get_focus_thread
.
start
()
def
finish_location_ocr
(
self
):
self
.
import_excel_dialog
.
show_with_msg
(
"字幕定位结束,请检查是否准确,并调整正确"
)
def
show_confirmation_dialog
(
self
):
confirm_box
=
QtWidgets
.
QMessageBox
.
question
(
self
,
u'警告'
,
u'确认已经校准OCR范围(只包含中文,不含英文)?'
,
QtWidgets
.
QMessageBox
.
Yes
|
QtWidgets
.
QMessageBox
.
No
)
if
confirm_box
==
QtWidgets
.
QMessageBox
.
Yes
:
self
.
show_detect_dialog
()
else
:
print
(
">>>>>>show_confirmation_dialog"
)
def
getFocus
(
self
):
def
getFocus
(
self
):
while
(
True
):
while
(
True
):
...
@@ -496,11 +576,20 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -496,11 +576,20 @@ class MainWindow(QMainWindow, Ui_MainWindow):
"""
"""
if
self
.
rate
==
None
:
if
self
.
rate
==
None
:
self
.
prompt_dialog
.
show_with_msg
(
"请选择字幕上边界范围"
)
# self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
return
h
=
self
.
widget
.
get_h
()
video_h
=
self
.
wgt_video
.
height
()
self
.
rate
=
float
(
h
-
10
)
/
float
(
video_h
)
if
self
.
rate_bottom
==
None
:
if
self
.
rate_bottom
==
None
:
self
.
prompt_dialog
.
show_with_msg
(
"请选择字幕下边界范围"
)
# self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
h
=
self
.
widget_bottom
.
get_h
()
video_h
=
self
.
wgt_video
.
height
()
self
.
rate_bottom
=
float
(
h
-
6
)
/
float
(
video_h
)
if
not
self
.
check_ocr_rate
():
self
.
prompt_dialog
.
show_with_msg
(
"字幕上边界不能低于下边界"
)
return
return
if
len
(
self
.
ocr_ranges
)
==
0
:
self
.
ocr_ranges
.
append
([
self
.
rate
,
self
.
rate_bottom
])
self
.
detect_dialog
.
init_self
()
self
.
detect_dialog
.
init_self
()
self
.
detect_dialog
.
show
()
self
.
detect_dialog
.
show
()
...
@@ -601,9 +690,39 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -601,9 +690,39 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
action_operate
.
setEnabled
(
True
)
self
.
action_operate
.
setEnabled
(
True
)
self
.
action_insert_aside_from_now
.
setEnabled
(
True
)
self
.
action_insert_aside_from_now
.
setEnabled
(
True
)
self
.
insert_aside_from_now_btn
.
setEnabled
(
True
)
self
.
insert_aside_from_now_btn
.
setEnabled
(
True
)
# self.import_excel_dialog.show_with_msg("正在自动定位字幕边界,请稍后!")
confirm_box
=
QtWidgets
.
QMessageBox
.
question
(
self
,
u'警告'
,
u'是否需要自动定位字幕边界?'
,
QtWidgets
.
QMessageBox
.
Yes
|
QtWidgets
.
QMessageBox
.
No
)
if
confirm_box
==
QtWidgets
.
QMessageBox
.
Yes
:
self
.
location_ocr_thread
=
LocalOcrThread
(
self
,
path
)
self
.
location_ocr_thread
.
start
()
self
.
location_ocr_timer
.
start
(
1000
)
# self.auto_close_dialog = AutoCloseDialog()
# self.auto_close_dialog.exec_()
else
:
print
(
">>>>>>>>>>>>>play_video"
)
def
up_ocr
(
self
):
self
.
widget
.
change_painter_flag
(
True
)
def
auto_location_ocr
(
self
,
path
):
from
detect_with_ocr
import
evaluate_position
print
(
">>>>>>>>>>>>>>>>>>>>>v_path"
+
path
.
path
()[
1
:])
y1
,
y2
=
evaluate_position
(
path
.
path
()[
1
:],
0
)
video_h
=
self
.
wgt_video
.
height
()
self
.
widget
.
setY
(
int
(
video_h
*
y1
)
+
6
)
time
.
sleep
(
1
)
self
.
widget_bottom
.
setY
(
int
(
video_h
*
y2
)
+
10
)
print
(
"y1:
%
d,y2:
%
d"
%
(
y1
,
y2
))
def
check_ocr_rate
(
self
):
if
self
.
rate
>
self
.
rate_bottom
:
return
False
else
:
return
True
def
up_ocr_timer_func
(
self
):
# self.widget.change_painter_flag(True)
h
=
self
.
widget
.
up
(
3
)
h
=
self
.
widget
.
up
(
3
)
video_h
=
self
.
wgt_video
.
height
()
video_h
=
self
.
wgt_video
.
height
()
self
.
rate
=
float
(
h
-
10
)
/
float
(
video_h
)
self
.
rate
=
float
(
h
-
10
)
/
float
(
video_h
)
...
@@ -612,9 +731,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -612,9 +731,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
print
(
self
.
wgt_video
.
height
())
print
(
self
.
wgt_video
.
height
())
print
(
">>>>>>>>>rate"
+
str
(
self
.
rate
))
print
(
">>>>>>>>>rate"
+
str
(
self
.
rate
))
def
down_ocr_timer_func
(
self
):
def
down_ocr
(
self
):
# self.widget.change_painter_flag(True)
self
.
widget
.
change_painter_flag
(
True
)
h
=
self
.
widget
.
down
(
3
)
h
=
self
.
widget
.
down
(
3
)
video_h
=
self
.
wgt_video
.
height
()
video_h
=
self
.
wgt_video
.
height
()
self
.
rate
=
float
(
h
-
10
)
/
float
(
video_h
)
self
.
rate
=
float
(
h
-
10
)
/
float
(
video_h
)
...
@@ -622,19 +740,70 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -622,19 +740,70 @@ class MainWindow(QMainWindow, Ui_MainWindow):
print
(
self
.
wgt_video
.
height
())
print
(
self
.
wgt_video
.
height
())
print
(
">>>>>>>>>rate"
+
str
(
self
.
rate
))
print
(
">>>>>>>>>rate"
+
str
(
self
.
rate
))
def
up_ocr_bottom
(
self
):
def
up_ocr
(
self
):
self
.
user_editing_content
=
True
self
.
up_ocr_timer
.
start
(
50
)
# self.widget.change_painter_flag(True)
# h = self.widget.up(3)
# video_h = self.wgt_video.height()
# self.rate = float(h-10)/float(video_h)
# print(">>>>>video_h: "+str(video_h))
# print(">>>>>up h:" + str(h))
# print(self.wgt_video.height())
# print(">>>>>>>>>rate" + str(self.rate))
def
up_ocr_stop
(
self
):
self
.
user_editing_content
=
False
self
.
up_ocr_timer
.
stop
()
def
down_ocr_stop
(
self
):
self
.
user_editing_content
=
False
self
.
down_ocr_timer
.
stop
()
def
down_ocr
(
self
):
self
.
user_editing_content
=
True
self
.
down_ocr_timer
.
start
(
50
)
# self.widget.change_painter_flag(True)
# h = self.widget.down(3)
# video_h = self.wgt_video.height()
# self.rate = float(h-10)/float(video_h)
# print(">>>>>down h:" + str(h))
# print(self.wgt_video.height())
# print(">>>>>>>>>rate" + str(self.rate))
def
up_ocr_bottom_timer_func
(
self
):
self
.
widget_bottom
.
change_painter_flag
(
True
)
self
.
widget_bottom
.
change_painter_flag
(
True
)
h
=
self
.
widget_bottom
.
up
(
3
)
h
=
self
.
widget_bottom
.
up
(
3
)
video_h
=
self
.
wgt_video
.
height
()
video_h
=
self
.
wgt_video
.
height
()
self
.
rate_bottom
=
float
(
h
-
6
)
/
float
(
video_h
)
self
.
rate_bottom
=
float
(
h
-
6
)
/
float
(
video_h
)
def
down_ocr_bottom_timer_func
(
self
):
def
down_ocr_bottom
(
self
):
self
.
widget_bottom
.
change_painter_flag
(
True
)
self
.
widget_bottom
.
change_painter_flag
(
True
)
h
=
self
.
widget_bottom
.
down
(
3
)
h
=
self
.
widget_bottom
.
down
(
3
)
video_h
=
self
.
wgt_video
.
height
()
video_h
=
self
.
wgt_video
.
height
()
self
.
rate_bottom
=
float
(
h
-
6
)
/
float
(
video_h
)
self
.
rate_bottom
=
float
(
h
-
6
)
/
float
(
video_h
)
def
up_ocr_bottom_stop
(
self
):
self
.
user_editing_content
=
False
self
.
up_ocr_bottom_timer
.
stop
()
def
down_ocr_bottom_stop
(
self
):
self
.
user_editing_content
=
False
self
.
down_ocr_bottom_timer
.
stop
()
def
up_ocr_bottom
(
self
):
self
.
user_editing_content
=
True
self
.
up_ocr_bottom_timer
.
start
(
50
)
# self.widget_bottom.change_painter_flag(True)
# h = self.widget_bottom.up(3)
# video_h = self.wgt_video.height()
# self.rate_bottom = float(h-6)/float(video_h)
def
down_ocr_bottom
(
self
):
self
.
user_editing_content
=
True
self
.
down_ocr_bottom_timer
.
start
(
50
)
# self.widget_bottom.change_painter_flag(True)
# h = self.widget_bottom.down(3)
# video_h = self.wgt_video.height()
# self.rate_bottom = float(h-6)/float(video_h)
def
refresh_on_import
(
self
):
def
refresh_on_import
(
self
):
print
(
">>>>>>>refresh in"
)
print
(
">>>>>>>refresh in"
)
self
.
refresh_tab_slot
()
self
.
refresh_tab_slot
()
...
@@ -877,6 +1046,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -877,6 +1046,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
"""
"""
self
.
check_if_over
(
"旁白导入"
)
self
.
check_if_over
(
"旁白导入"
)
def
check_if_location_ocr
(
self
):
self
.
check_if_over
(
"字幕定位"
)
alive
=
True
for
t
in
self
.
threads
:
alive
=
alive
and
t
.
is_alive
()
if
not
alive
:
self
.
location_ocr_timer
.
stop
()
# self.auto_close_dialog.close()
self
.
threads
=
[]
# type = 检测 或 合成 或 导出
# type = 检测 或 合成 或 导出
def
check_if_over
(
self
,
type
:
str
):
def
check_if_over
(
self
,
type
:
str
):
"""确认传入的待检测任务是否完成
"""确认传入的待检测任务是否完成
...
@@ -904,9 +1083,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -904,9 +1083,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
elif
type
==
"检测"
:
elif
type
==
"检测"
:
self
.
detect_timer
.
stop
()
self
.
detect_timer
.
stop
()
self
.
refresh_tab_timer
.
stop
()
self
.
refresh_tab_timer
.
stop
()
# t = ProcessErrThread(self)
# t.start()
# from narratage_detection import process_err
# process_err(self)
elif
type
==
"旁白导入"
:
elif
type
==
"旁白导入"
:
self
.
import_excel_timer
.
stop
()
self
.
import_excel_timer
.
stop
()
# self.refresh_tab_timer.stop()
# self.refresh_tab_timer.stop()
elif
type
==
"字幕定位"
:
self
.
location_ocr_timer
.
stop
()
else
:
else
:
self
.
export_timer
.
stop
()
self
.
export_timer
.
stop
()
...
@@ -921,6 +1107,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -921,6 +1107,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
progressBar
.
setValue
(
100
)
self
.
progressBar
.
setValue
(
100
)
self
.
progressLabel
.
setText
(
f
"100
%
"
)
self
.
progressLabel
.
setText
(
f
"100
%
"
)
self
.
projectContext
.
nd_process
=
1
self
.
projectContext
.
nd_process
=
1
self
.
threads
=
[]
def
deal_synthesis_callback_slot
(
self
,
threads
,
state
):
def
deal_synthesis_callback_slot
(
self
,
threads
,
state
):
"""实现旁白音频合成任务状态在界面中的实时显示,更新界面中的对应变量,每5s更新一次任务状态
"""实现旁白音频合成任务状态在界面中的实时显示,更新界面中的对应变量,每5s更新一次任务状态
...
@@ -1305,9 +1492,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -1305,9 +1492,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
item
=
QTableWidgetItem
(
text
)
item
=
QTableWidgetItem
(
text
)
item
.
setTextAlignment
(
Qt
.
AlignCenter
)
item
.
setTextAlignment
(
Qt
.
AlignCenter
)
# 设置为不可编辑
# 设置为不可编辑
if
self
.
checkIfTableItemCanChange
(
table
,
idx
,
j
)
==
False
:
#
if self.checkIfTableItemCanChange(table, idx, j) == False:
# item.setFlags(Qt.ItemIsEnabled)
# item.setFlags(Qt.ItemIsEnabled)
print
(
1
)
# print(">>>>>>>>>setElememtToTable"
)
table
.
setItem
(
idx
,
j
,
item
)
table
.
setItem
(
idx
,
j
,
item
)
# 只有Content页的字幕列和 Aside页的字幕列 可编辑
# 只有Content页的字幕列和 Aside页的字幕列 可编辑
...
@@ -1776,7 +1963,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -1776,7 +1963,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
pre_item
=
self
.
all_tableWidget
.
item
(
row
,
col
-
1
)
pre_item
=
self
.
all_tableWidget
.
item
(
row
,
col
-
1
)
suggest
=
pre_item
.
text
()
suggest
=
pre_item
.
text
()
if
suggest
!=
None
and
suggest
!=
""
:
print
(
">>>>>>>>suggest:"
+
suggest
)
if
col
==
constant
.
Content
.
AsideColumnNumber
and
suggest
!=
None
and
suggest
!=
""
:
arrays
=
suggest
.
split
(
"/"
)
arrays
=
suggest
.
split
(
"/"
)
if
len
(
arrays
)
==
2
:
if
len
(
arrays
)
==
2
:
suggest
=
str
(
len
(
text
))
+
"/"
+
arrays
[
1
]
suggest
=
str
(
len
(
text
))
+
"/"
+
arrays
[
1
]
...
@@ -1807,6 +1996,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -1807,6 +1996,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
# self.all_tableWidget.setItem(
# self.all_tableWidget.setItem(
# int(idx), constant.Content.SpeedColumnNumber, QTableWidgetItem(text))
# int(idx), constant.Content.SpeedColumnNumber, QTableWidgetItem(text))
self
.
projectContext
.
refresh_speed
(
row
,
text
)
self
.
projectContext
.
refresh_speed
(
row
,
text
)
elif
col
==
constant
.
Content
.
SubtitleColumnNumber
:
self
.
projectContext
.
refresh_subtitle
(
row
,
text
)
# self.all_tableWidget_idx = int(row)
# self.all_tableWidget_idx = int(row)
# self.set_table_to_window(False)
# self.set_table_to_window(False)
...
@@ -1942,8 +2133,13 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -1942,8 +2133,13 @@ class MainWindow(QMainWindow, Ui_MainWindow):
将表格内容更新至界面中,并保存当前工程内容
将表格内容更新至界面中,并保存当前工程内容
"""
"""
if
not
self
.
detect_lock
:
self
.
refresh_flag
=
True
try
:
self
.
set_table_to_window
(
need_refresh_all
=
False
)
self
.
set_table_to_window
(
need_refresh_all
=
False
)
self
.
projectContext
.
save_project
(
False
)
self
.
projectContext
.
save_project
(
False
)
finally
:
self
.
refresh_flag
=
False
def
refresh_all_tab_slot
(
self
):
def
refresh_all_tab_slot
(
self
):
"""刷新整个表格
"""刷新整个表格
...
@@ -2274,7 +2470,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -2274,7 +2470,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
prompt_dialog
.
show_with_msg
(
"操作成功!!请查看变化"
)
self
.
prompt_dialog
.
show_with_msg
(
"操作成功!!请查看变化"
)
# 只有row起作用
# 只有row起作用
def
del_line_operation_slot
(
self
,
row
:
int
,
start_time
=
"0"
,
end_time
=
"0"
,
subtitle
=
""
,
suggest
=
""
,
aside
=
""
,
speed
=
""
,
refresh_flag
=
True
):
def
del_line_operation_slot
(
self
,
row
:
int
,
start_time
=
"0"
,
end_time
=
"0"
,
subtitle
=
""
,
suggest
=
""
,
aside
=
""
,
speed
=
""
,
refresh_flag
=
True
,
show_msg_flag
=
True
):
"""删除一行
"""删除一行
Args:
Args:
...
@@ -2313,6 +2509,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -2313,6 +2509,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
projectContext
.
all_elements
.
pop
(
int
(
row
)
-
1
)
self
.
projectContext
.
all_elements
.
pop
(
int
(
row
)
-
1
)
if
refresh_flag
:
if
refresh_flag
:
self
.
refresh_tab_slot
()
self
.
refresh_tab_slot
()
if
show_msg_flag
:
self
.
prompt_dialog
.
show_with_msg
(
"操作成功!!请查看变化"
)
self
.
prompt_dialog
.
show_with_msg
(
"操作成功!!请查看变化"
)
def
pb_item_changed_by_double_clicked_slot
(
self
,
item
):
def
pb_item_changed_by_double_clicked_slot
(
self
,
item
):
...
@@ -2418,4 +2615,17 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -2418,4 +2615,17 @@ class MainWindow(QMainWindow, Ui_MainWindow):
str
(
round
(
video_position
/
1000
,
2
)))
str
(
round
(
video_position
/
1000
,
2
)))
self
.
import_excel_dialog
.
show_with_msg
(
"定位成功:"
+
self
.
aside_head_time
)
self
.
import_excel_dialog
.
show_with_msg
(
"定位成功:"
+
self
.
aside_head_time
)
def
confirm_ocr
(
self
):
if
self
.
rate
==
None
:
self
.
prompt_dialog
.
show_with_msg
(
"请选择字幕上边界范围"
)
return
if
self
.
rate_bottom
==
None
:
self
.
prompt_dialog
.
show_with_msg
(
"请选择字幕下边界范围"
)
return
if
not
self
.
check_ocr_rate
():
self
.
prompt_dialog
.
show_with_msg
(
"字幕上边界不能低于下边界"
)
return
self
.
ocr_ranges
.
append
([
self
.
rate
,
self
.
rate_bottom
])
self
.
prompt_dialog
.
show_with_msg
(
f
"操作成功,如果电影存在多行字幕,请移动字幕上下边界,再次点击该按钮确认,目前已存在{len(self.ocr_ranges)}组字幕边界"
)
\ No newline at end of file
main_window_ui.py
View file @
2c4cd5c0
...
@@ -18,6 +18,9 @@ class MyWidget(QWidget):
...
@@ -18,6 +18,9 @@ class MyWidget(QWidget):
# def __init__(self, parent=None):
# def __init__(self, parent=None):
# super(QWidget, self).__init__(parent)
# super(QWidget, self).__init__(parent)
# self.painter_flag = True
# self.painter_flag = True
def
__init__
(
self
,
parent
=
None
,
color
=
Qt
.
red
):
super
(
QWidget
,
self
)
.
__init__
(
parent
)
self
.
color
=
color
def
paintEvent
(
self
,
event
):
def
paintEvent
(
self
,
event
):
# print(">>>>>>>>into paint")
# print(">>>>>>>>into paint")
...
@@ -26,7 +29,7 @@ class MyWidget(QWidget):
...
@@ -26,7 +29,7 @@ class MyWidget(QWidget):
lock
.
acquire
()
lock
.
acquire
()
painter
=
QPainter
(
self
)
painter
=
QPainter
(
self
)
painter
.
setRenderHint
(
QPainter
.
Antialiasing
)
# Optional: Enable anti-aliasing
painter
.
setRenderHint
(
QPainter
.
Antialiasing
)
# Optional: Enable anti-aliasing
painter
.
setPen
(
QPen
(
Qt
.
red
,
2
,
Qt
.
SolidLine
))
painter
.
setPen
(
QPen
(
self
.
color
,
2
,
Qt
.
SolidLine
))
painter
.
drawLine
(
0
,
1
,
800
,
1
)
painter
.
drawLine
(
0
,
1
,
800
,
1
)
painter
.
end
()
painter
.
end
()
lock
.
release
()
lock
.
release
()
...
@@ -55,8 +58,17 @@ class MyWidget(QWidget):
...
@@ -55,8 +58,17 @@ class MyWidget(QWidget):
# painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
# painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
# painter.drawLine(0, 1, 800, 1)
# painter.drawLine(0, 1, 800, 1)
# painter.end()
# painter.end()
print
(
">>>>>cur_y : "
+
str
(
self
.
y
()))
return
self
.
y
()
return
self
.
y
()
def
setY
(
self
,
h
):
print
(
">>>>>cur_y2 : "
+
str
(
self
.
y
()))
self
.
move
(
0
,
h
)
def
get_h
(
self
):
return
self
.
y
()
def
down
(
self
,
mov_len
):
def
down
(
self
,
mov_len
):
print
(
">>>>>>>>>>>down"
+
str
(
mov_len
))
print
(
">>>>>>>>>>>down"
+
str
(
mov_len
))
self
.
move
(
0
,
self
.
y
()
+
mov_len
)
self
.
move
(
0
,
self
.
y
()
+
mov_len
)
...
@@ -314,6 +326,8 @@ class Ui_MainWindow(object):
...
@@ -314,6 +326,8 @@ class Ui_MainWindow(object):
self
.
horizontalLayout_7
.
setObjectName
(
"horizontalLayout_7"
)
self
.
horizontalLayout_7
.
setObjectName
(
"horizontalLayout_7"
)
self
.
up_ocr_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
up_ocr_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
up_ocr_btn
.
setObjectName
(
"up_ocr_btn"
)
self
.
up_ocr_btn
.
setObjectName
(
"up_ocr_btn"
)
# self.up_ocr_btn.setAutoRepeatDelay(False)
# self.up_ocr_btn.setAutoRepeat
self
.
horizontalLayout_7
.
addWidget
(
self
.
up_ocr_btn
)
self
.
horizontalLayout_7
.
addWidget
(
self
.
up_ocr_btn
)
self
.
down_ocr_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
down_ocr_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
down_ocr_btn
.
setObjectName
(
"down_ocr_btn"
)
self
.
down_ocr_btn
.
setObjectName
(
"down_ocr_btn"
)
...
@@ -324,10 +338,14 @@ class Ui_MainWindow(object):
...
@@ -324,10 +338,14 @@ class Ui_MainWindow(object):
self
.
down_ocr_bottom_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
down_ocr_bottom_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
down_ocr_bottom_btn
.
setObjectName
(
"down_ocr_bottom_btn"
)
self
.
down_ocr_bottom_btn
.
setObjectName
(
"down_ocr_bottom_btn"
)
self
.
horizontalLayout_7
.
addWidget
(
self
.
down_ocr_bottom_btn
)
self
.
horizontalLayout_7
.
addWidget
(
self
.
down_ocr_bottom_btn
)
self
.
confirm_ocr_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
confirm_ocr_btn
.
setObjectName
(
"confirm_ocr_btn"
)
self
.
horizontalLayout_7
.
addWidget
(
self
.
confirm_ocr_btn
)
self
.
confirm_head_aside_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
confirm_head_aside_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
confirm_head_aside_btn
.
setObjectName
(
"confirm_head_aside_btn"
)
self
.
confirm_head_aside_btn
.
setObjectName
(
"confirm_head_aside_btn"
)
self
.
horizontalLayout_7
.
addWidget
(
self
.
confirm_head_aside_btn
)
self
.
horizontalLayout_7
.
addWidget
(
self
.
confirm_head_aside_btn
)
self
.
horizontalLayout_8
=
QtWidgets
.
QHBoxLayout
()
self
.
horizontalLayout_8
=
QtWidgets
.
QHBoxLayout
()
self
.
horizontalLayout_8
.
setObjectName
(
"horizontalLayout_8"
)
self
.
horizontalLayout_8
.
setObjectName
(
"horizontalLayout_8"
)
self
.
detect_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
self
.
detect_btn
=
QtWidgets
.
QPushButton
(
self
.
centralwidget
)
...
@@ -523,7 +541,8 @@ class Ui_MainWindow(object):
...
@@ -523,7 +541,8 @@ class Ui_MainWindow(object):
self
.
action_redo
=
QtWidgets
.
QAction
(
MainWindow
)
self
.
action_redo
=
QtWidgets
.
QAction
(
MainWindow
)
# self.action_redo.setFont(font)
# self.action_redo.setFont(font)
self
.
action_redo
.
setObjectName
(
"action_redo"
)
self
.
action_redo
.
setObjectName
(
"action_redo"
)
self
.
action_3
=
QtWidgets
.
QAction
(
"旁白区间检测"
,
self
,
triggered
=
self
.
show_detect_dialog
)
# self.action_3 = QtWidgets.QAction("旁白区间检测",self,triggered=self.show_detect_dialog)
self
.
action_3
=
QtWidgets
.
QAction
(
"旁白区间检测"
,
self
,
triggered
=
self
.
show_confirmation_dialog
)
self
.
action_3
.
setEnabled
(
False
)
self
.
action_3
.
setEnabled
(
False
)
self
.
action_4
=
QtWidgets
.
QAction
(
"旁白音频合成"
,
self
,
triggered
=
self
.
show_assemble_dialog
)
self
.
action_4
=
QtWidgets
.
QAction
(
"旁白音频合成"
,
self
,
triggered
=
self
.
show_assemble_dialog
)
self
.
action_4
.
setEnabled
(
False
)
self
.
action_4
.
setEnabled
(
False
)
...
@@ -539,7 +558,6 @@ class Ui_MainWindow(object):
...
@@ -539,7 +558,6 @@ class Ui_MainWindow(object):
self
.
action_9
.
setEnabled
(
True
)
self
.
action_9
.
setEnabled
(
True
)
self
.
action_10
=
QtWidgets
.
QAction
(
"片头旁白定位"
,
self
,
triggered
=
self
.
confirm_head_aside
)
self
.
action_10
=
QtWidgets
.
QAction
(
"片头旁白定位"
,
self
,
triggered
=
self
.
confirm_head_aside
)
self
.
action_10
.
setEnabled
(
True
)
self
.
action_10
.
setEnabled
(
True
)
# self.action_3.setObjectName("action_3")
# self.action_3.setObjectName("action_3")
# self.action_4 = QtWidgets.QAction(MainWindow)
# self.action_4 = QtWidgets.QAction(MainWindow)
# self.action_4.setObjectName("action_4")
# self.action_4.setObjectName("action_4")
...
@@ -604,6 +622,7 @@ class Ui_MainWindow(object):
...
@@ -604,6 +622,7 @@ class Ui_MainWindow(object):
self
.
up_ocr_bottom_btn
.
setText
(
_translate
(
"MainWindow"
,
"字幕下边界上移"
))
self
.
up_ocr_bottom_btn
.
setText
(
_translate
(
"MainWindow"
,
"字幕下边界上移"
))
self
.
down_ocr_bottom_btn
.
setText
(
_translate
(
"MainWindow"
,
"字幕下边界下移"
))
self
.
down_ocr_bottom_btn
.
setText
(
_translate
(
"MainWindow"
,
"字幕下边界下移"
))
self
.
confirm_head_aside_btn
.
setText
(
_translate
(
"MainWindow"
,
"片头旁白定位"
))
self
.
confirm_head_aside_btn
.
setText
(
_translate
(
"MainWindow"
,
"片头旁白定位"
))
self
.
confirm_ocr_btn
.
setText
(
_translate
(
"MainWindow"
,
"字幕边界确认"
))
self
.
detect_btn
.
setText
(
_translate
(
"MainWindow"
,
"旁白区间检测"
))
self
.
detect_btn
.
setText
(
_translate
(
"MainWindow"
,
"旁白区间检测"
))
self
.
tabWidget
.
setTabText
(
self
.
tabWidget
.
indexOf
(
self
.
all_tab
),
_translate
(
"MainWindow"
,
"字幕旁白"
))
self
.
tabWidget
.
setTabText
(
self
.
tabWidget
.
indexOf
(
self
.
all_tab
),
_translate
(
"MainWindow"
,
"字幕旁白"
))
self
.
tabWidget
.
setTabText
(
self
.
tabWidget
.
indexOf
(
self
.
zm_tab
),
_translate
(
"MainWindow"
,
"字幕"
))
self
.
tabWidget
.
setTabText
(
self
.
tabWidget
.
indexOf
(
self
.
zm_tab
),
_translate
(
"MainWindow"
,
"字幕"
))
...
...
management.py
View file @
2c4cd5c0
...
@@ -77,13 +77,14 @@ class OperateRecord:
...
@@ -77,13 +77,14 @@ class OperateRecord:
# 每一行的具体信息,"起始时间", "终止时间", "字幕", '建议', '解说脚本'
# 每一行的具体信息,"起始时间", "终止时间", "字幕", '建议', '解说脚本'
class
Element
:
class
Element
:
def
__init__
(
self
,
st_time_sec
:
str
,
ed_time_sec
:
str
,
subtitle
,
suggest
,
aside
,
speed
=
"1.00(4字/秒)"
):
def
__init__
(
self
,
st_time_sec
:
str
,
ed_time_sec
:
str
,
subtitle
,
suggest
,
aside
,
speed
=
"1.00(4字/秒)"
,
ocr_h
=
None
):
self
.
st_time_sec
=
st_time_sec
self
.
st_time_sec
=
st_time_sec
self
.
ed_time_sec
=
ed_time_sec
self
.
ed_time_sec
=
ed_time_sec
self
.
subtitle
=
subtitle
self
.
subtitle
=
subtitle
self
.
suggest
=
suggest
self
.
suggest
=
suggest
self
.
aside
=
aside
self
.
aside
=
aside
self
.
speed
=
speed
self
.
speed
=
speed
self
.
ocr_h
=
ocr_h
# 判断当前元素是否是字幕
# 判断当前元素是否是字幕
def
is_subtitle
(
self
):
def
is_subtitle
(
self
):
...
@@ -263,6 +264,11 @@ class ProjectContext:
...
@@ -263,6 +264,11 @@ class ProjectContext:
if
not
self
.
initial_ing
:
if
not
self
.
initial_ing
:
save_excel_to_path
(
self
.
all_elements
,
self
.
excel_path
,
self
.
write_header
,
self
.
excel_sheet_name
)
save_excel_to_path
(
self
.
all_elements
,
self
.
excel_path
,
self
.
write_header
,
self
.
excel_sheet_name
)
def
refresh_subtitle
(
self
,
row
,
subtitle
:
str
):
self
.
all_elements
[
int
(
row
)]
.
subtitle
=
subtitle
if
not
self
.
initial_ing
:
save_excel_to_path
(
self
.
all_elements
,
self
.
excel_path
,
self
.
write_header
,
self
.
excel_sheet_name
)
def
refresh_speed
(
self
,
row
,
speed
:
str
)
->
None
:
def
refresh_speed
(
self
,
row
,
speed
:
str
)
->
None
:
self
.
all_elements
[
int
(
row
)]
.
speed
=
speed
self
.
all_elements
[
int
(
row
)]
.
speed
=
speed
if
not
self
.
initial_ing
:
if
not
self
.
initial_ing
:
...
@@ -307,7 +313,7 @@ class ProjectContext:
...
@@ -307,7 +313,7 @@ class ProjectContext:
if
d
[
"终止时间"
][
i
]
is
None
:
if
d
[
"终止时间"
][
i
]
is
None
:
# 如果是最后一条
# 如果是最后一条
if
i
==
len
(
d
[
"字幕"
])
-
1
:
if
i
==
len
(
d
[
"字幕"
])
-
1
:
print
(
1
)
print
(
">>>>>>>>>load_excel_from_path"
)
# ed_time_sec = "360000" if self.duration == 0 else self.duration # todo 默认最大时长是100h
# ed_time_sec = "360000" if self.duration == 0 else self.duration # todo 默认最大时长是100h
else
:
else
:
ed_time_sec
=
"
%.2
f"
%
(
float
(
d
[
"起始时间"
][
i
+
1
])
-
0.01
)
ed_time_sec
=
"
%.2
f"
%
(
float
(
d
[
"起始时间"
][
i
+
1
])
-
0.01
)
...
@@ -428,6 +434,8 @@ def save_excel_to_path(all_element, new_excel_path, header, excel_sheet_name):
...
@@ -428,6 +434,8 @@ def save_excel_to_path(all_element, new_excel_path, header, excel_sheet_name):
backup_path
=
os
.
path
.
dirname
(
new_excel_path
)
+
"/tmp_"
+
str
(
time
.
time
())
+
".xlsx"
backup_path
=
os
.
path
.
dirname
(
new_excel_path
)
+
"/tmp_"
+
str
(
time
.
time
())
+
".xlsx"
# os.remove(new_excel_path)
# os.remove(new_excel_path)
os
.
rename
(
new_excel_path
,
backup_path
)
os
.
rename
(
new_excel_path
,
backup_path
)
# print(">>>>>>new_excel_path:" + new_excel_path)
# print(">>>>>>>>>>backup_path:" + backup_path)
try
:
try
:
create_sheet
(
new_excel_path
,
"旁白插入位置建议"
,
[
header
])
create_sheet
(
new_excel_path
,
"旁白插入位置建议"
,
[
header
])
# for element in all_element:
# for element in all_element:
...
...
narratage_detection.py
View file @
2c4cd5c0
...
@@ -63,6 +63,13 @@ def detect(video_path: str, start_time: float, end_time: float, book_path: str,
...
@@ -63,6 +63,13 @@ def detect(video_path: str, start_time: float, end_time: float, book_path: str,
from
detect_with_ocr
import
detect_with_ocr
from
detect_with_ocr
import
detect_with_ocr
detect_with_ocr
(
video_path
,
book_path
,
start_time
,
end_time
,
state
,
mainWindow
)
detect_with_ocr
(
video_path
,
book_path
,
start_time
,
end_time
,
state
,
mainWindow
)
def
process_err
(
mainWindow
:
MainWindow
=
None
):
from
detect_with_ocr
import
process_err_ocr
try
:
process_err_ocr
(
mainWindow
)
except
Exception
as
e
:
print
(
"process_err err"
)
print
(
e
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
# 定义参数
# 定义参数
...
...
ocr_metric.py
View file @
2c4cd5c0
import
re
import
re
import
sys
import
csv
import
csv
import
jieba
import
argparse
import
argparse
import
pandas
as
pd
import
pandas
as
pd
import
numpy
as
np
import
numpy
as
np
from
sklearn.feature_extraction.text
import
TfidfVectorizer
from
sklearn.feature_extraction.text
import
TfidfVectorizer
from
sklearn.metrics.pairwise
import
cosine_similarity
from
sklearn.metrics.pairwise
import
cosine_similarity
from
difflib
import
SequenceMatcher
from
difflib
import
SequenceMatcher
title
=
[
'起始时间(转换后)'
,
'终止时间(转换后)'
,
'字幕'
]
from
tqdm
import
tqdm
# title = ['起始时间(转换后)', '终止时间(转换后)', '字幕']
title
=
[
'起始时间'
,
'终止时间'
,
'字幕'
]
def
init
():
def
init
():
# 获取中文停用词列表
# 获取中文停用词列表
...
@@ -22,13 +27,32 @@ def change_to_second(time_str):
...
@@ -22,13 +27,32 @@ def change_to_second(time_str):
time_obj
.
second
+
time_obj
.
microsecond
/
1000000
time_obj
.
second
+
time_obj
.
microsecond
/
1000000
return
seconds
return
seconds
# 将中文句子划分,并且防止划分全部为停用词
def
words_segment
(
str
):
tmp
=
','
.
join
(
jieba
.
cut
(
str
))
# 将分割的句子差分成单词,也不进行划分
if
is_all_stopwords
(
tmp
)
or
len
(
list
(
jieba
.
cut
(
str
)))
==
len
(
str
)
:
return
str
return
tmp
# 判断是否从中英文字幕中提取中文
def
extract_info
(
str
,
has_english
=
False
):
if
not
has_english
:
return
str
chinese_text
=
re
.
findall
(
r'[\u4e00-\u9fff]+'
,
str
)
return
' '
.
join
(
chinese_text
)
# 计算字幕的相似度
# 计算字幕的相似度
def
calculate_similarity
(
str1
,
str2
,
method
=
'cosine'
):
def
calculate_similarity
(
str1
,
str2
,
method
=
'cosine'
):
if
method
==
'cosine'
:
if
method
==
'cosine'
:
tfidf_vectorizer
=
TfidfVectorizer
()
str1
,
str2
=
words_segment
(
str1
),
words_segment
(
str2
)
tfidf_matrix
=
tfidf_vectorizer
.
fit_transform
([
str1
,
str2
])
tfidf_vectorizer
=
TfidfVectorizer
(
min_df
=
1
)
tfidf_matrix
=
tfidf_vectorizer
.
fit_transform
([
str1
,
str2
])
# shape=[2, N]
# print(np.array(tfidf_matrix.toarray()).shape, type(tfidf_matrix), tfidf_matrix.toarray())
similarity_matrix
=
cosine_similarity
(
tfidf_matrix
)
similarity_matrix
=
cosine_similarity
(
tfidf_matrix
)
return
similarity_matrix
[
0
][
1
]
return
similarity_matrix
[
0
][
1
]
elif
method
==
'distance'
:
return
-
String_edit_distance
(
str1
,
str2
)
else
:
else
:
return
SequenceMatcher
(
None
,
str1
,
str2
)
.
ratio
()
return
SequenceMatcher
(
None
,
str1
,
str2
)
.
ratio
()
...
@@ -37,23 +61,62 @@ def calculate_time_difference(time1, time2):
...
@@ -37,23 +61,62 @@ def calculate_time_difference(time1, time2):
return
abs
(
time2
-
time1
)
return
abs
(
time2
-
time1
)
def
calculate_weight
(
x
,
y
):
def
calculate_weight
(
x
,
y
):
# weight = e^(-alpha * time_diff)
# # weight = e^(-alpha * time_diff)
# 相差1s的系数为0.9
# # 相差1s的系数为0.9
alpha
=
0.11
# alpha = 0.11
return
1
/
(
alpha
*
(
x
+
y
)
+
1
)
# return 1 / (alpha * (x + y) + 1)
return
1.0
# 目前不考虑时间系数
# 检查句子中的每个单词是否都是停用词
# 检查句子中的每个单词是否都是停用词
def
is_all_stopwords
(
sentence
):
def
is_all_stopwords
(
sentence
):
sentence
=
sentence
.
replace
(
' '
,
''
)
return
all
(
word
in
stop_words
for
word
in
sentence
)
return
all
(
word
in
stop_words
for
word
in
sentence
)
# 编辑距离算法 有问题!!!!!!
def
String_edit_distance
(
str1
,
str2
):
n
,
m
=
len
(
str1
),
len
(
str2
)
dp
=
[[
0
for
_
in
range
(
m
+
1
)]
for
_
in
range
(
n
+
1
)]
for
i
in
range
(
n
+
1
):
dp
[
i
][
0
]
=
i
for
i
in
range
(
m
+
1
):
dp
[
0
][
i
]
=
i
dp
[
0
][
0
]
=
0
for
i
in
range
(
1
,
n
+
1
):
for
j
in
range
(
1
,
m
+
1
):
if
str1
[
i
-
1
]
==
str2
[
j
-
1
]:
dp
[
i
][
j
]
=
dp
[
i
-
1
][
j
-
1
]
else
:
dp
[
i
][
j
]
=
min
(
dp
[
i
-
1
][
j
-
1
],
min
(
dp
[
i
][
j
-
1
],
dp
[
i
-
1
][
j
]))
+
1
# print(dp[n][m], n, m)
return
1.0
*
dp
[
n
][
m
]
/
max
(
n
,
m
)
### 如果其中有-符号,可能在用excel打开时自动添加=变成公式,读取的时候没问题
### 如果其中有-符号,可能在用excel打开时自动添加=变成公式,读取的时候没问题
def
read_srt_to_csv
(
path_srt
,
path_output
):
def
read_srt_to_csv
(
path_srt
,
path_output
):
try
:
with
open
(
path_srt
,
'r'
,
encoding
=
'utf-8-sig'
)
as
f
:
with
open
(
path_srt
,
'r'
,
encoding
=
'utf-8-sig'
)
as
f
:
srt_content
=
f
.
read
()
# str
srt_content
=
f
.
read
()
# str
except
UnicodeDecodeError
:
print
(
f
"编码错误,已经切换到utf-16编码"
)
try
:
with
open
(
path_srt
,
'r'
,
encoding
=
'utf-16'
)
as
f
:
srt_content
=
f
.
read
()
# str
except
:
print
(
f
"请选择utf-8或utf-16编码形式的srt文件"
)
sys
.
exit
(
1
)
# 使用正则表达式匹配时间码和字幕内容
# 使用正则表达式匹配时间码和字幕内容
pattern
=
re
.
compile
(
r'(\d+)\n([\d:,]+) --> ([\d:,]+)\n(.+?)(?=\n\d+\n|$)'
,
re
.
DOTALL
)
pattern
=
re
.
compile
(
r'(\d+)\n([\d:,]+) --> ([\d:,]+)\n(.+?)(?=\n\d+\n|$)'
,
re
.
DOTALL
)
matches
=
pattern
.
findall
(
srt_content
)
matches
=
pattern
.
findall
(
srt_content
)
has_english
=
[]
for
i
in
range
(
5
):
idx
=
np
.
random
.
randint
(
len
(
matches
))
pattern
=
re
.
compile
(
r'[a-zA-Z]'
)
has_english
.
append
(
bool
(
pattern
.
search
(
matches
[
idx
][
3
])))
has_english
=
all
(
has_english
)
print
(
'!'
*
20
,
has_english
)
# 写入 csv 文件
# 写入 csv 文件
with
open
(
path_output
,
'w'
,
newline
=
''
,
encoding
=
'utf-8'
)
as
f
:
with
open
(
path_output
,
'w'
,
newline
=
''
,
encoding
=
'utf-8'
)
as
f
:
csv_writer
=
csv
.
writer
(
f
)
csv_writer
=
csv
.
writer
(
f
)
...
@@ -61,7 +124,7 @@ def read_srt_to_csv(path_srt, path_output):
...
@@ -61,7 +124,7 @@ def read_srt_to_csv(path_srt, path_output):
for
_
,
start
,
end
,
subtitle
in
matches
:
# 都是str格式
for
_
,
start
,
end
,
subtitle
in
matches
:
# 都是str格式
subtitle
=
re
.
sub
(
r'\{[^}]*\}'
,
''
,
subtitle
)
# 将srt文件前的加粗等格式去掉
subtitle
=
re
.
sub
(
r'\{[^}]*\}'
,
''
,
subtitle
)
# 将srt文件前的加粗等格式去掉
csv_writer
.
writerow
([
start
,
end
,
subtitle
.
strip
(
)])
csv_writer
.
writerow
([
start
,
end
,
extract_info
(
subtitle
.
strip
(),
has_english
)])
def
read_from_xlsx
(
path_xlsx
=
'output.xlsx'
,
path_output
=
'deal.csv'
):
def
read_from_xlsx
(
path_xlsx
=
'output.xlsx'
,
path_output
=
'deal.csv'
):
data
=
pd
.
read_excel
(
path_xlsx
)
data
=
pd
.
read_excel
(
path_xlsx
)
...
@@ -70,20 +133,19 @@ def read_from_xlsx(path_xlsx='output.xlsx', path_output='deal.csv'):
...
@@ -70,20 +133,19 @@ def read_from_xlsx(path_xlsx='output.xlsx', path_output='deal.csv'):
csv_writer
.
writerow
(
title
)
csv_writer
.
writerow
(
title
)
for
_
,
data1
in
data
.
iterrows
():
for
_
,
data1
in
data
.
iterrows
():
start
,
end
,
subtitle
=
data1
[
1
],
data1
[
3
],
data1
[
4
]
# print(data1[1])
start
,
end
,
subtitle
=
data1
[
0
],
data1
[
1
],
data1
[
2
]
if
isinstance
(
subtitle
,
float
)
and
np
.
isnan
(
subtitle
):
if
isinstance
(
subtitle
,
float
)
and
np
.
isnan
(
subtitle
):
continue
continue
# 与srt文件格式同步
# 与srt文件格式同步
start
=
start
.
replace
(
'.'
,
','
)
start
=
start
.
replace
(
'.'
,
','
)
end
=
end
.
replace
(
'.'
,
','
)
end
=
end
.
replace
(
'.'
,
','
)
# print(start, end, subtitle,)
# print(type(start), type(end), type(subtitle))
csv_writer
.
writerow
([
start
,
end
,
subtitle
.
strip
()])
csv_writer
.
writerow
([
start
,
end
,
subtitle
.
strip
()])
### 对于srt中的字幕计算相似性度。从ocr中找到时间戳满足<=time_t的字幕,
### 对于srt中的字幕计算相似性度。从ocr中找到时间戳满足<=time_t的字幕,
### 然后计算字幕间的相似度,取一个最大的。字幕从start和end都匹配一遍
### 然后计算字幕间的相似度,取一个最大的。字幕从start和end都匹配一遍
# time_threshold设置阈值,用于判断时间差是否可接受
# time_threshold设置阈值,用于判断时间差是否可接受
def
measure_score
(
path_srt
,
path_ocr
,
time_threshold
=
5.0
,
method
=
'cosine'
):
def
measure_score
(
path_srt
,
path_ocr
,
time_threshold
=
5.0
,
time_threshold_re
=
False
,
method
=
'cosine'
):
data_srt
,
data_ocr
=
[],
[]
data_srt
,
data_ocr
=
[],
[]
with
open
(
path_srt
,
'r'
,
encoding
=
'utf-8'
)
as
file
:
with
open
(
path_srt
,
'r'
,
encoding
=
'utf-8'
)
as
file
:
csv_reader
=
csv
.
reader
(
file
)
csv_reader
=
csv
.
reader
(
file
)
...
@@ -103,22 +165,36 @@ def measure_score(path_srt, path_ocr, time_threshold=5.0, method='cosine'):
...
@@ -103,22 +165,36 @@ def measure_score(path_srt, path_ocr, time_threshold=5.0, method='cosine'):
# 计算相似度
# 计算相似度
total_similarity
=
0.0
total_similarity
=
0.0
total_weight
=
0.0
total_weight
=
0.0
txt1
=
[]
for
sub
in
data_srt
:
for
sub
in
tqdm
(
data_srt
,
desc
=
"Processing"
,
ncols
=
100
)
:
max_similarity
=
0.0
max_similarity
=
0.0
if
method
!=
'distance'
else
-
1.0
# 去除srt中的停用词
# 去除srt中的停用词
if
is_all_stopwords
(
sub
[
2
]):
if
is_all_stopwords
(
sub
[
2
]):
continue
continue
subb
=
""
for
sub1
in
data_ocr
:
for
sub1
in
data_ocr
:
x
,
y
=
abs
(
sub
[
0
]
-
sub1
[
0
]),
abs
(
sub
[
1
]
-
sub1
[
1
])
x
,
y
=
abs
(
sub
[
0
]
-
sub1
[
0
]),
abs
(
sub
[
1
]
-
sub1
[
1
])
if
min
(
x
,
y
)
<=
time_threshold
:
if
time_threshold_re
:
# print(sub[2], sub1[2])
time_threshold_tmp
=
time_threshold
score
=
calculate_similarity
(
sub
[
2
],
sub1
[
2
],
'cosine'
)
else
:
time_threshold_tmp
=
(
sub
[
1
]
-
sub
[
0
])
*
0.3
# 10s允许3s的误差
if
min
(
x
,
y
)
<=
time_threshold_tmp
:
score
=
calculate_similarity
(
sub
[
2
],
sub1
[
2
],
method
)
if
max_similarity
<=
score
*
calculate_weight
(
x
,
y
):
subb
=
sub1
[
2
]
max_similarity
=
max
(
max_similarity
,
score
*
calculate_weight
(
x
,
y
))
max_similarity
=
max
(
max_similarity
,
score
*
calculate_weight
(
x
,
y
))
if
max_similarity
<=
-
0.5
:
# print(max_similarity, sub[2], subb, sub[0])
txt1
.
append
(
' !!! '
.
join
([
str
(
max_similarity
),
sub
[
2
],
subb
,
str
(
sub
[
0
])]))
total_similarity
+=
max_similarity
total_similarity
+=
max_similarity
total_weight
+=
1
total_weight
+=
1
if
method
==
'distance'
:
total_similarity
=
total_weight
+
total_similarity
with
open
(
'movie_pro.txt'
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
for
i
in
txt1
:
f
.
write
(
i
+
'
\n
'
)
# print(total_similarity, total_similarity / len(data_srt), total_similarity / total_weight)
return
total_similarity
/
len
(
data_srt
),
total_similarity
/
total_weight
return
total_similarity
/
len
(
data_srt
),
total_similarity
/
total_weight
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
@@ -128,13 +204,23 @@ if __name__ == '__main__':
...
@@ -128,13 +204,23 @@ if __name__ == '__main__':
# 添加命令行参数
# 添加命令行参数
parser
.
add_argument
(
"--path_srt"
,
required
=
True
,
type
=
str
,
help
=
"Path of srt file, format is srt"
)
parser
.
add_argument
(
"--path_srt"
,
required
=
True
,
type
=
str
,
help
=
"Path of srt file, format is srt"
)
parser
.
add_argument
(
"--path_ocr"
,
required
=
True
,
type
=
str
,
help
=
"Path of ocr file, format is xlsx"
)
parser
.
add_argument
(
"--path_ocr"
,
required
=
True
,
type
=
str
,
help
=
"Path of ocr file, format is xlsx"
)
parser
.
add_argument
(
"--method"
,
type
=
str
,
default
=
'cosine'
,
help
=
"Select evaluation method"
)
parser
.
add_argument
(
"--time_threshold"
,
type
=
float
,
default
=
5.0
,
help
=
"Allowable time frame"
)
parser
.
add_argument
(
"--time_threshold"
,
type
=
float
,
default
=
5.0
,
help
=
"Allowable time frame"
)
parser
.
add_argument
(
"--method"
,
type
=
str
,
default
=
'distance'
,
choices
=
[
'cosine'
,
'distance'
,
'sequence'
]
,
help
=
"Select evaluation method"
)
parser
.
add_argument
(
"--time_threshold_re"
,
type
=
bool
,
default
=
True
,
help
=
"Specify whether
\
time threshold is required"
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
output_file_srt
=
'deal_srt.csv'
output_file_srt
=
'deal_srt.csv'
output_file_ocr
=
'deal_ocr.csv'
output_file_ocr
=
'deal_ocr.csv'
read_srt_to_csv
(
args
.
path_srt
,
output_file_srt
)
read_srt_to_csv
(
args
.
path_srt
,
output_file_srt
)
read_from_xlsx
(
args
.
path_ocr
,
output_file_ocr
)
read_from_xlsx
(
args
.
path_ocr
,
output_file_ocr
)
score
=
measure_score
(
output_file_srt
,
output_file_ocr
,
args
.
time_threshold
,
args
.
method
)
score
=
measure_score
(
output_file_srt
,
output_file_ocr
,
args
.
time_threshold
,
\
print
(
f
'该评估算法得分: {score[1]:.5f}'
)
args
.
time_threshold_re
,
args
.
method
)
\ No newline at end of file
print
(
f
'该评估算法得分: {100 * score[1]:.3f}'
)
# python ocr_metric.py --path_srt test/new/movie_1.srt --path_ocr ../测试/the-swan-v3/The.Swan-zimu.xlsx --time_threshold 10
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment