Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_2
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
赵心治
accessibility_movie_2
Commits
9b46d4c7
Commit
9b46d4c7
authored
Nov 05, 2023
by
wux51
Browse files
Options
Browse Files
Download
Plain Diff
fix:ocr algorithm issue with repetitive subtitle
parents
9b3754fc
2c4cd5c0
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
410 additions
and
81 deletions
+410
-81
constant.py
constant.py
+2
-1
detect_with_ocr.py
detect_with_ocr.py
+266
-52
main_window.py
main_window.py
+127
-26
main_window_ui.py
main_window_ui.py
+2
-0
management.py
management.py
+8
-1
narratage_detection.py
narratage_detection.py
+5
-1
No files found.
constant.py
View file @
9b46d4c7
...
...
@@ -12,10 +12,11 @@ import os
class
Content
:
StartTimeColumn
=
0
SubtitleColumnNumber
=
2
AsideColumnNumber
=
4
SpeedColumnNumber
=
5
# ActivateColumns = [2, 3]
ActivateColumns
=
[
4
,
5
]
ActivateColumns
=
[
2
,
4
,
5
]
# ColumnCount = 3
ObjectName
=
"all_tableWidget"
# TimeFormatColumns = [0]
...
...
detect_with_ocr.py
View file @
9b46d4c7
...
...
@@ -62,7 +62,38 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
start
=
int
(
start_time
*
fps
)
video
.
set
(
cv2
.
CAP_PROP_POS_FRAMES
,
start
)
frame_num
=
video
.
get
(
cv2
.
CAP_PROP_FRAME_COUNT
)
time1
=
time
.
time
()
# 一共有60*60*60*3 均匀取2000帧
gap
=
math
.
floor
(
frame_num
/
2000
)
# 前一帧与后一帧的跨度
# #读取方案1:跳转读写头到指定帧
# for i in range(2000):
# frame_index = i*gap + 1
# video.set(cv2.CAP_PROP_POS_FRAMES,frame_index)
# _, frame = video.read()
# cv2.imwrite(str.format('./extrated_imgs/{}.png',i),frame)
# 读写方案2:连续读取,读到指定帧保存
# i=1
# while i<frame_num:
# _, frame = video.read()
# if i%gap == 0:
# print(i,gap)
# cv2.imwrite(str.format('./extrated_imgs_2/{}.png',i),frame)
# i = i + 1
# 主要工作:
paddle_dir
=
"res/.paddleocr/2.3.0.1/ocr/"
cur_cls_model_dir
=
paddle_dir
+
"cls/ch_ppocr_mobile_v2.0_cls_infer"
cur_det_model_dir
=
paddle_dir
+
"det/ch/ch_PP-OCRv2_det_infer"
cur_rec_model_dir
=
paddle_dir
+
"rec/ch/ch_PP-OCRv2_rec_infer"
ocr
=
PaddleOCR
(
use_angle_cls
=
True
,
lang
=
"ch"
,
show_log
=
False
,
use_gpu
=
False
,
cls_model_dir
=
cur_cls_model_dir
,
det_model_dir
=
cur_det_model_dir
,
rec_model_dir
=
cur_rec_model_dir
)
position_map
=
{}
R
=
[]
G
=
[]
B
=
[]
# 统计位置(高度)
# 分成五等份,1/5 2/5 3/5 4/5
for
i
in
range
(
4
):
frame_index
=
math
.
floor
((
i
+
1
)
*
frame_num
/
5
)
...
...
@@ -70,9 +101,10 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
for
j
in
range
(
10
):
#每个阶段取10帧
for
k
in
range
(
120
):
#隔120帧取一帧
_
,
frame
=
video
.
read
()
_
,
frame
=
video
.
read
()
if
(
j
+
1
)
*
120
+
frame_index
>
frame_num
:
break
_
,
frame
=
video
.
read
(
)
# cv2.imwrite(str.format('./imgs_2/{}_{}.png',i+1,j),frame
)
res
=
ocr
.
ocr
(
frame
,
cls
=
True
)
for
result_item
in
res
:
[
x1
,
y1
],[
x2
,
y2
],[
x3
,
y3
],[
x4
,
y4
]
=
result_item
[
0
]
...
...
@@ -80,22 +112,87 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
english_text
=
''
.
join
(
re
.
findall
(
r'[A-Za-z]'
,
text
))
# 跳过英文字幕
if
(
len
(
english_text
)
/
len
(
text
)
<
0.1
):
# 提取ocr区域像素点颜色
subtitle_img
=
frame
[
int
(
y1
):
int
(
y3
),
int
(
x1
):
int
(
x2
)]
r
=
subtitle_img
[:,:,
0
]
.
copy
()
r
=
r
.
reshape
(
-
1
)
g
=
subtitle_img
[:,:,
1
]
.
copy
()
g
=
g
.
reshape
(
-
1
)
b
=
subtitle_img
[:,:,
2
]
.
copy
()
b
=
b
.
reshape
(
-
1
)
R
=
np
.
append
(
R
,
r
)
G
=
np
.
append
(
G
,
g
)
B
=
np
.
append
(
B
,
b
)
print
(
i
+
1
,
j
,
text
)
# 分析文字位置
if
len
(
position_map
)
==
0
:
position_map
[(
y1
,
y3
)]
=
1
else
:
# 在列表中查找
keys
=
list
(
position_map
.
keys
())
flag
=
False
for
key
in
keys
:
if
abs
(
y1
-
key
[
0
])
<
2
and
abs
(
y3
-
key
[
1
])
<
2
:
height
=
y3
-
y1
threshold
=
int
((
y3
-
y1
)
*
0.3
)
# print(threshold)
if
abs
(
y1
-
key
[
0
])
<
threshold
or
abs
(
y3
-
key
[
1
])
<
threshold
:
position_map
[
key
]
+=
1
# # 绘制字幕横线到图片上:
# frame1 = cv2.line(frame,(0,int(key[0])),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(key[0])),(255,0,0),2)
# frame1 = cv2.line(frame,(0,int(key[1])),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(key[1])),(255,0,0),2)
# file_name = './mytest('+str(key[0])+','+str(key[1])+').png'
# cv2.imwrite(file_name,frame1)
flag
=
True
# 如果能归为某一类则flag置为True
break
else
:
position_map
[(
y1
,
y3
)]
=
1
print
(
sorted
(
position_map
.
items
(),
key
=
lambda
kv
:(
kv
[
1
],
kv
[
0
])))
y1
,
y2
=
sorted
(
position_map
.
items
(),
key
=
lambda
kv
:(
kv
[
1
],
kv
[
0
]))[
-
1
][
0
]
y1
=
y1
/
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
y2
=
y2
/
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
return
y1
,
y2
if
flag
==
False
:
# 如果找不到,则向列表添加
position_map
[(
y1
,
y3
)]
=
1
# # 绘制字幕横线到图片上:
# frame1 = cv2.line(frame,(0,int(y1)),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(y1)),(255,0,0),2)
# frame1 = cv2.line(frame,(0,int(y3)),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(y3)),(255,0,0),2)
# file_name = './mytest('+str(y1)+','+str(y3)+').png'
# cv2.imwrite(file_name,frame1)
# 给出字幕行数
position_rank_list
=
sorted
(
position_map
.
items
(),
key
=
lambda
kv
:(
kv
[
1
],
kv
[
0
]))
print
(
position_rank_list
)
line_num
=
0
num
=
position_rank_list
[
-
1
][
1
]
y1
,
y2
=
position_rank_list
[
-
1
][
0
]
height
=
abs
(
y2
-
y1
)
result
=
[]
item_result
=
[]
for
item
in
position_rank_list
:
if
item
[
1
]
>
num
*
0.3
and
item
[
0
][
0
]
>
0.5
*
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
and
item
[
0
][
1
]
>
0.5
*
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
):
#出现次数达到rank1的30%以上,判定为新的一行字幕
line_num
+=
1
y1
,
y2
=
item
[
0
]
y1
=
y1
/
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
y2
=
y2
/
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
item_result
.
append
([
y1
,
y2
])
print
([
line_num
,
height
,
item_result
])
# 给出文字和背景的颜色
import
matplotlib.pyplot
as
plt
from
sklearn
import
cluster
plt
.
hist
(
R
)
plt
.
savefig
(
'R-hist-all'
)
plt
.
close
()
# 尝试聚成两类(k-means算法)
estimator
=
cluster
.
KMeans
(
n_clusters
=
2
)
estimator
.
fit
(
R
.
reshape
(
-
1
,
1
))
# print(R[0].shape)
r_centers
=
estimator
.
cluster_centers_
.
reshape
(
-
1
)
estimator
=
cluster
.
KMeans
(
n_clusters
=
2
)
estimator
.
fit
(
G
.
reshape
(
-
1
,
1
))
g_centers
=
estimator
.
cluster_centers_
.
reshape
(
-
1
)
estimator
=
cluster
.
KMeans
(
n_clusters
=
2
)
estimator
.
fit
(
B
.
reshape
(
-
1
,
1
))
b_centers
=
estimator
.
cluster_centers_
.
reshape
(
-
1
)
r
,
g
,
b
=
max
(
r_centers
),
max
(
g_centers
),
max
(
b_centers
)
print
(
'颜色结果:'
,
r
,
g
,
b
)
# 返回值:字幕行数 字幕高度 字幕位置(比例形式) 字幕颜色(r,g,b)
# [2, 109.0, [[0.8637892376681614, 0.9243273542600897], [0.9304932735426009, 0.9915919282511211]],(r,g,b)]
return
item_result
[
0
][
0
],
item_result
[
0
][
1
]
return
[
line_num
,
height
,
item_result
,(
r
,
g
,
b
)]
ocr_positions
=
[]
...
...
@@ -257,6 +354,29 @@ def normalize(text: str) -> str:
return
text
def
resize_img
(
img
):
resize_height
=
152
height
,
width
=
img
.
shape
[:
2
]
if
resize_height
>
height
:
# 定义放大倍数
scale_factor
=
float
(
resize_height
/
height
)
# 计算新的宽度和高度
new_width
=
int
(
width
*
scale_factor
)
new_height
=
int
(
height
*
scale_factor
)
# 使用插值方法进行图像放大
enlarged_image
=
cv2
.
resize
(
img
,
(
new_width
,
new_height
),
interpolation
=
cv2
.
INTER_LINEAR
)
return
enlarged_image
else
:
# 定义缩小倍数
scale_factor
=
float
(
height
/
resize_height
)
# 0.5表示缩小为原来的一半大小
# 使用插值方法进行图像缩小
smaller_image
=
cv2
.
resize
(
img
,
None
,
fx
=
scale_factor
,
fy
=
scale_factor
,
interpolation
=
cv2
.
INTER_LINEAR
)
return
smaller_image
def
extract_white_prior
(
img
,
threshold
=
200
):
gray
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2GRAY
)
...
...
@@ -267,7 +387,8 @@ def extract_white_prior(img, threshold=200):
index
=
0
def
detect_subtitle
(
org_img
:
np
.
ndarray
)
->
Tuple
[
Union
[
str
,
None
],
float
]:
t
=
140
def
detect_subtitle
(
org_img
:
np
.
ndarray
,
lastSubTitle
,
last_confidence
)
->
Tuple
[
Union
[
str
,
None
],
float
]:
"""检测当前画面得到字幕信息
Args:
...
...
@@ -276,34 +397,64 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
Returns:
Tuple[Union[str, None]]: 字幕信息(没有字幕时返回None)和置信度
"""
subTitle
=
''
ocr_res
=
""
# up_b = 276
# down_b = 297
global
ocr_positions
# ocr_positions.append([676, 712])
h
=
None
global
index
for
i
in
range
(
len
(
ocr_positions
)):
img
=
copy
.
deepcopy
(
org_img
)
up_b
=
ocr_positions
[
i
][
0
]
down_b
=
ocr_positions
[
i
][
1
]
height
=
down_b
-
up_b
# if len(ocr_positions) == 1:
# img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
# else:
cropped_img
=
img
[
int
(
up_b
):
int
(
down_b
)]
# 定义要添加的上下空白的高度
padding_top
=
height
*
0.7
padding_bottom
=
height
*
0.7
# 计算新图像的高度
new_height
=
cropped_img
.
shape
[
0
]
+
padding_top
+
padding_bottom
# 创建一个新的空白图像
img
=
np
.
zeros
((
int
(
new_height
),
cropped_img
.
shape
[
1
],
3
),
dtype
=
np
.
uint8
)
# 将裁剪后的图像放置在新图像中间
start_y
=
int
(
padding_top
)
end_y
=
start_y
+
cropped_img
.
shape
[
0
]
img
[
start_y
:
end_y
,
:]
=
cropped_img
if
len
(
ocr_positions
)
==
1
:
img
=
img
[
int
(
up_b
-
height
*
0.7
):
int
(
down_b
+
height
*
0.7
)]
else
:
# cropped_img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
cropped_img
=
img
[
int
(
up_b
):
int
(
down_b
)]
# cropped_img = resize_img(cropped_img)
# x = float(150 / height)
# img_h, img_w = cropped_img.shape[:2]
# img_h = int(img_h * x)
# img_w = int(img_w * x)
# cropped_img = cv2.resize(cropped_img, (img_w, img_h))
# 定义要添加的上下空白的高度
padding_top
=
height
*
0.7
padding_bottom
=
height
*
0.7
# padding_top = 150
# padding_bottom = 150
# 计算新图像的高度
new_height
=
cropped_img
.
shape
[
0
]
+
padding_top
+
padding_bottom
# 创建一个新的空白图像
img
=
np
.
zeros
((
int
(
new_height
),
cropped_img
.
shape
[
1
],
3
),
dtype
=
np
.
uint8
)
# 将裁剪后的图像放置在新图像中间
start_y
=
int
(
padding_top
)
end_y
=
start_y
+
cropped_img
.
shape
[
0
]
img
[
start_y
:
end_y
,
:]
=
cropped_img
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# global index
# cv2.imwrite(f'./cap/cap{index}.png', img)
# img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# # ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# # img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
# t = 230
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# cv2.imwrite(f'./test2.png', img)
# index = index + 1
# img = img[int(up_b - height*0.2):int(down_b + height*0.2)]
# 针对低帧率的视频做图像放大处理
print
(
height
)
...
...
@@ -312,7 +463,8 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
print
(
img
.
shape
)
if
img
.
shape
[
1
]
<
1000
:
img
=
cv2
.
resize
(
img
,
(
int
(
img
.
shape
[
1
]
*
1.5
),
int
(
img
.
shape
[
0
]
*
1.5
)))
global
index
# img = extract_white_prior(img)
...
...
@@ -327,6 +479,7 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
continue
# return None, 0, None
possible_txt
=
[]
subTitle
=
''
conf
=
0
print
(
'res --------->'
,
res
)
res
.
sort
(
key
=
lambda
rect
:
rect
[
0
][
0
][
0
]
+
rect
[
0
][
1
][
0
])
# 按照中心点排序
...
...
@@ -343,6 +496,11 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
print
(
"文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}"
.
format
(
txt
,
confidence
,
mid
/
img
.
shape
[
1
],
gradient
,
font_size
))
print
(
"字体大小差距: {}"
,
format
(
height
-
font_size
))
print
(
"高度中心:{}"
.
format
((
rect
[
0
][
1
]
+
rect
[
1
][
1
])
/
2
/
img
.
shape
[
0
]))
# if confidence < 0.95:
# # global t
# print("小于0.95,重新检测,阈值为:" + str(t))
# t = t + 20
# return detect_subtitle(org_img, lastSubTitle, last_confidence)
if
h
==
None
:
h
=
font_size
conf_thred1
=
0.7
...
...
@@ -371,9 +529,18 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
# subTitle = ' '.join([x[0] for x in possible_txt])
subTitle
=
' '
.
join
(
possible_txt
)
print
(
subTitle
,
conf
)
if
len
(
subTitle
)
>
0
:
return
subTitle
,
conf
,
h
return
None
,
0
,
None
if
len
(
subTitle
)
>
0
:
ocr_res
=
ocr_res
+
subTitle
if
(
len
(
ocr_res
))
>
0
:
print
(
">>>>>>>>>>>>cur subtitle:"
+
ocr_res
+
",confidence: "
+
str
(
confidence
)
+
",last_confidence: "
+
str
(
last_confidence
))
# if len(ocr_positions) == 1 and last_confidence != None and confidence != None and confidence < last_confidence:
# ocr_res = lastSubTitle
# confidence = last_confidence
# print(">>>>>>>>>>>>res subtitle:" + ocr_res + ",confidence: " + str(confidence) + ",last_confidence: " + str(last_confidence))
return
ocr_res
,
confidence
,
conf
,
h
if
check_have_ocr
(
img
):
return
"err"
,
None
,
0
,
None
return
None
,
None
,
0
,
None
...
...
@@ -402,6 +569,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
video
=
cv2
.
VideoCapture
(
video_path
)
fps
=
video
.
get
(
cv2
.
CAP_PROP_FPS
)
lastSubTitle
=
None
last_confidence
=
None
lastConf
=
0
# res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析
res
=
[]
...
...
@@ -447,8 +615,9 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
mainWindow
.
projectContext
.
nd_process
=
state
[
0
]
mainWindow
.
projectContext
.
last_time
=
cur_time
subTitle
,
conf
,
cur_ocr_h
=
detect_subtitle
(
frame
)
subTitle
,
confidence
,
conf
,
cur_ocr_h
=
detect_subtitle
(
frame
,
lastSubTitle
,
last_confidence
)
if
subTitle
==
"err"
:
continue
if
subTitle
is
not
None
:
...
...
@@ -490,20 +659,20 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
fl
=
False
cur_time1
=
cur_time
continue
end_time
=
cur_time
res
.
append
([
start_time
,
end_time
,
lastSubTitle
])
if
(
len
(
res
)
==
1
and
res
[
-
1
][
0
]
-
last_time
>=
1
)
or
(
len
(
res
)
>
1
and
res
[
-
1
][
0
]
-
res
[
-
2
][
1
])
>=
1
:
print
(
'--------------------------------------------------'
)
recommend_lens
=
int
((
res
[
-
1
][
0
]
-
last_time
)
*
normal_speed
)
if
len
(
res
)
==
1
else
int
(
(
res
[
-
1
][
0
]
-
res
[
-
2
][
1
])
*
normal_speed
)
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
# add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
add_to_list
(
mainWindow
,
"旁白"
,
[
''
,
''
,
''
,
'
%
d'
%
recommend_lens
],
ocr_h
)
print
(
start_time
,
end_time
,
lastSubTitle
)
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end_time
,
3
),
lastSubTitle
,
''
],
ocr_h
)
print
(
">>>>>>subtitle,ocr_h2:"
+
str
(
lastSubTitle
)
+
">>>"
+
str
(
ocr_h
))
#
end_time = cur_time
#
res.append([start_time, end_time, lastSubTitle])
#
if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
#
print('--------------------------------------------------')
#
recommend_lens = int((res[-1][0] - last_time) * normal_speed) if len(res) == 1 else int(
#
(res[-1][0] - res[-2][1]) * normal_speed)
#
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
#
# add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
#
add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens],ocr_h)
#
print(start_time, end_time, lastSubTitle)
#
#
write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
#
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
#
print(">>>>>>subtitle,ocr_h2:" + str(lastSubTitle) + ">>>" + str(ocr_h))
elif
lastSubTitle
is
not
None
and
subTitle
is
not
None
:
# 两句话连在一起,但是两句话不一样
if
string_similar
(
lastSubTitle
,
subTitle
)
<
0.6
:
...
...
@@ -527,10 +696,26 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
lastSubTitle
=
subTitle
if
conf
>
lastConf
else
lastSubTitle
continue
# 当前字幕与上一段字幕不一样
lastSubTitle
=
subTitle
lastConf
=
conf
if
subTitle
!=
"err"
:
lastSubTitle
=
subTitle
last_confidence
=
confidence
lastConf
=
conf
print
(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>end 1"
)
# print(ocr_h_map)
# mainWindow.refresh_tab_slot()
while
(
mainWindow
.
refresh_flag
==
True
):
time
.
sleep
(
1
)
mainWindow
.
detect_lock
=
True
try
:
process_err_ocr
(
mainWindow
)
finally
:
mainWindow
.
detect_lock
=
False
# mainWindow.refresh_lock.acquire()
# try:
# process_err_ocr(mainWindow)
# finally:
# mainWindow.refresh_lock.release()
...
...
@@ -572,6 +757,7 @@ def process_err_ocr(mainWindow):
print
(
">>>>>>>>>will rm"
+
str
(
table_indexs
[
rm_list
[
i
]]
+
1
-
i
)
+
", subtitle:"
+
mainWindow
.
projectContext
.
all_elements
[
table_indexs
[
rm_list
[
i
]]
-
i
]
.
subtitle
)
print
(
table_indexs
[
rm_list
[
i
]])
mainWindow
.
del_line_operation_slot
(
row
=
table_indexs
[
rm_list
[
i
]]
+
1
-
i
,
show_msg_flag
=
False
)
time
.
sleep
(
0.5
)
except
Exception
as
e
:
print
(
">>>>>>>>>>>>>>>>>>>>>>del err"
)
print
(
e
)
...
...
@@ -695,9 +881,37 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
# print("process the total video at time: ", datetime.datetime.now())
process_video
(
video_path
,
start_time
,
end_time
,
book_name_xlsx
,
sheet_name_xlsx
,
state
,
mainWindow
)
def
check_have_ocr
(
img
):
new_img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2GRAY
)
t
=
230
_
,
new_img
=
cv2
.
threshold
(
new_img
,
t
,
255
,
cv2
.
THRESH_BINARY
)
if
np
.
any
(
new_img
==
255
):
return
True
else
:
return
False
if
__name__
==
'__main__'
:
# path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
path
=
"C:/Users/Smile/Desktop/accessibility-movie/"
# print("get_pos:", get_position(path, 0))
evaluate_position
(
"C:/Users/AIA/Desktop/1/1.mp4"
,
0
)
# evaluate_position("C:/Users/AIA/Desktop/1/1.mp4", 0)
# img = cv2.imread("./cap/cap879.png")
img
=
cv2
.
imread
(
"./cap/cap812.png"
,
cv2
.
IMREAD_GRAYSCALE
)
# img = cv2.equalizeHist(img)
t
=
230
_
,
img
=
cv2
.
threshold
(
img
,
t
,
255
,
cv2
.
THRESH_BINARY
)
# img = resize_img(img)
detect_subtitle
(
img
,
None
,
None
)
# img = cv2.equalizeHist(img)
# t = 120
# _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
# # ret, binary_image = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# # binary_image = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
cv2
.
imwrite
(
f
'./binary2.png'
,
img
)
if
np
.
any
(
img
==
255
):
print
(
"111111"
)
else
:
print
(
"222222"
)
main_window.py
View file @
9b46d4c7
...
...
@@ -13,7 +13,7 @@ import os
import
cv2
from
PyQt5
import
QtWidgets
from
PyQt5.QtWidgets
import
QMainWindow
,
QFileDialog
,
QTableWidget
,
QTableWidgetItem
,
QAbstractItemView
,
QProgressBar
,
QLabel
,
QApplication
,
QPushButton
,
QMenu
from
PyQt5.QtWidgets
import
QMainWindow
,
QFileDialog
,
QTableWidget
,
QTableWidgetItem
,
QAbstractItemView
,
QProgressBar
,
QLabel
,
QApplication
,
QPushButton
,
QMenu
,
QDialog
,
QVBoxLayout
from
PyQt5.QtCore
import
QUrl
,
Qt
,
QTimer
,
QRect
,
pyqtSignal
,
QPersistentModelIndex
from
PyQt5.QtMultimedia
import
*
from
PyQt5.QtGui
import
QIcon
...
...
@@ -43,6 +43,7 @@ from excel_utils import read_xls,read_xlsx
from
ffmpeg_util
import
adjust_audio_volume
from
PyQt5.QtCore
import
QThread
;
import
threading
# from emit_import_event import emit_import_event
...
...
@@ -71,13 +72,57 @@ class WorkerThread(QThread):
for
t
in
self
.
main_window
.
threads
:
t
.
start
()
def
refresh
(
self
):
self
.
main_window
.
import_process_sign
.
emit
(
self
.
elements
)
class
ProcessErrThread
(
QThread
):
def
__init__
(
self
,
main_window
):
super
()
.
__init__
()
self
.
main_window
=
main_window
def
run
(
self
):
from
narratage_detection
import
process_err
t
=
RunThread
(
funcName
=
process_err
,
args
=
(
self
.
main_window
,
),
name
=
"process_err"
)
t
.
setDaemon
(
True
)
t
.
start
()
class
AutoCloseDialog
(
QDialog
):
def
__init__
(
self
,
parent
=
None
):
super
(
AutoCloseDialog
,
self
)
.
__init__
(
parent
)
self
.
setWindowFlags
(
Qt
.
CustomizeWindowHint
|
Qt
.
NoDropShadowWindowHint
)
layout
=
QVBoxLayout
()
label
=
QLabel
(
"字幕边界定位中,请稍后!"
)
layout
.
addWidget
(
label
)
self
.
setLayout
(
layout
)
class
LocalOcrThread
(
QThread
):
def
__init__
(
self
,
main_window
,
path
):
super
()
.
__init__
()
self
.
main_window
=
main_window
self
.
path
=
path
def
run
(
self
):
# 在后台执行耗时操作
state
=
[
None
]
self
.
main_window
.
state
=
state
state
[
0
]
=
0.1
self
.
main_window
.
threads
=
[]
t
=
RunThread
(
funcName
=
self
.
main_window
.
auto_location_ocr
,
args
=
(
self
.
path
,
),
name
=
"auto_location_ocr"
)
t
.
setDaemon
(
True
)
self
.
main_window
.
threads
.
append
(
t
)
self
.
main_window
.
all_threads
.
append
(
t
)
for
t
in
self
.
main_window
.
threads
:
t
.
start
()
def
finish
(
self
):
self
.
main_window
.
location_ocr_sign
.
emit
()
class
CustomDelegate
(
QtWidgets
.
QStyledItemDelegate
):
def
paint
(
self
,
painter
,
option
,
index
):
# Customize the painting behavior for the specific column
...
...
@@ -92,9 +137,11 @@ class MainWindow(QMainWindow, Ui_MainWindow):
renew_signal
=
pyqtSignal
(
str
)
import_process_sign
=
pyqtSignal
(
list
)
refresh_sign
=
pyqtSignal
()
location_ocr_sign
=
pyqtSignal
()
def
__init__
(
self
,
project_path
):
super
(
MainWindow
,
self
)
.
__init__
()
self
.
location_ocr_sign
.
connect
(
self
.
finish_location_ocr
)
self
.
last_aside_index
=
None
self
.
setupUi
(
self
)
self
.
statusbar
.
showMessage
(
"hello"
,
5000
)
...
...
@@ -150,6 +197,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
# 所有QTimer集中管理
self
.
import_excel_timer
=
QTimer
()
self
.
import_excel_timer
.
timeout
.
connect
(
self
.
check_if_import_excel_over
)
self
.
location_ocr_timer
=
QTimer
()
self
.
location_ocr_timer
.
timeout
.
connect
(
self
.
check_if_location_ocr
)
self
.
detect_timer
=
QTimer
()
self
.
detect_timer
.
timeout
.
connect
(
self
.
check_if_detect_over_slot
)
self
.
synthesis_timer
=
QTimer
()
...
...
@@ -416,6 +465,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
aside_head_time
=
None
self
.
add_head_aside
=
False
self
.
ocr_ranges
=
[]
self
.
refresh_lock
=
threading
.
Lock
()
self
.
detect_lock
=
False
self
.
refresh_flag
=
False
# 打印到log文件中
t
=
RunThread
(
funcName
=
make_print_to_file
,
args
=
os
.
path
.
join
(
os
.
getcwd
(),
'log'
),
name
=
"logging"
)
...
...
@@ -425,13 +477,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
get_focus_thread
.
setDaemon
(
True
)
get_focus_thread
.
start
()
def
finish_location_ocr
(
self
):
self
.
import_excel_dialog
.
show_with_msg
(
"字幕定位结束,请检查是否准确,并调整正确"
)
def
show_confirmation_dialog
(
self
):
confirm_box
=
QtWidgets
.
QMessageBox
.
question
(
self
,
u'警告'
,
u'确认已经校准OCR范围(只包含中文,不含英文)?'
,
QtWidgets
.
QMessageBox
.
Yes
|
QtWidgets
.
QMessageBox
.
No
)
if
confirm_box
==
QtWidgets
.
QMessageBox
.
Yes
:
self
.
show_detect_dialog
()
else
:
print
(
1
)
print
(
">>>>>>show_confirmation_dialog"
)
def
getFocus
(
self
):
while
(
True
):
...
...
@@ -520,12 +575,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
"""弹出旁白区间检测相关信息填写窗口
"""
# if self.rate == None:
# self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
# return
# if self.rate_bottom == None:
# self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
# return
if
self
.
rate
==
None
:
# self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
h
=
self
.
widget
.
get_h
()
video_h
=
self
.
wgt_video
.
height
()
self
.
rate
=
float
(
h
-
10
)
/
float
(
video_h
)
if
self
.
rate_bottom
==
None
:
# self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
h
=
self
.
widget_bottom
.
get_h
()
video_h
=
self
.
wgt_video
.
height
()
self
.
rate_bottom
=
float
(
h
-
6
)
/
float
(
video_h
)
if
not
self
.
check_ocr_rate
():
self
.
prompt_dialog
.
show_with_msg
(
"字幕上边界不能低于下边界"
)
return
...
...
@@ -631,12 +690,30 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
action_operate
.
setEnabled
(
True
)
self
.
action_insert_aside_from_now
.
setEnabled
(
True
)
self
.
insert_aside_from_now_btn
.
setEnabled
(
True
)
# from detect_with_ocr import evaluate_position
# print(">>>>>>>>>>>>>>>>>>>>>v_path" + path.path()[1:])
# y1,y2 = evaluate_position(path.path()[1:], 0)
# self.widget.setY(y1)
# self.widget_bottom.setY(y2)
# print("y1:%d,y2:%d" %(y1,y2))
# self.import_excel_dialog.show_with_msg("正在自动定位字幕边界,请稍后!")
confirm_box
=
QtWidgets
.
QMessageBox
.
question
(
self
,
u'警告'
,
u'是否需要自动定位字幕边界?'
,
QtWidgets
.
QMessageBox
.
Yes
|
QtWidgets
.
QMessageBox
.
No
)
if
confirm_box
==
QtWidgets
.
QMessageBox
.
Yes
:
self
.
location_ocr_thread
=
LocalOcrThread
(
self
,
path
)
self
.
location_ocr_thread
.
start
()
self
.
location_ocr_timer
.
start
(
1000
)
# self.auto_close_dialog = AutoCloseDialog()
# self.auto_close_dialog.exec_()
else
:
print
(
">>>>>>>>>>>>>play_video"
)
def
auto_location_ocr
(
self
,
path
):
from
detect_with_ocr
import
evaluate_position
print
(
">>>>>>>>>>>>>>>>>>>>>v_path"
+
path
.
path
()[
1
:])
y1
,
y2
=
evaluate_position
(
path
.
path
()[
1
:],
0
)
video_h
=
self
.
wgt_video
.
height
()
self
.
widget
.
setY
(
int
(
video_h
*
y1
)
+
6
)
time
.
sleep
(
1
)
self
.
widget_bottom
.
setY
(
int
(
video_h
*
y2
)
+
10
)
print
(
"y1:
%
d,y2:
%
d"
%
(
y1
,
y2
))
def
check_ocr_rate
(
self
):
if
self
.
rate
>
self
.
rate_bottom
:
...
...
@@ -969,6 +1046,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
"""
self
.
check_if_over
(
"旁白导入"
)
def
check_if_location_ocr
(
self
):
self
.
check_if_over
(
"字幕定位"
)
alive
=
True
for
t
in
self
.
threads
:
alive
=
alive
and
t
.
is_alive
()
if
not
alive
:
self
.
location_ocr_timer
.
stop
()
# self.auto_close_dialog.close()
self
.
threads
=
[]
# type = 检测 或 合成 或 导出
def
check_if_over
(
self
,
type
:
str
):
"""确认传入的待检测任务是否完成
...
...
@@ -996,12 +1083,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
elif
type
==
"检测"
:
self
.
detect_timer
.
stop
()
self
.
refresh_tab_timer
.
stop
()
from
narratage_detection
import
process_err
process_err
(
self
)
# t = ProcessErrThread(self)
# t.start()
# from narratage_detection import process_err
# process_err(self)
elif
type
==
"旁白导入"
:
self
.
import_excel_timer
.
stop
()
# self.refresh_tab_timer.stop()
elif
type
==
"字幕定位"
:
self
.
location_ocr_timer
.
stop
()
else
:
self
.
export_timer
.
stop
()
...
...
@@ -1016,6 +1107,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
progressBar
.
setValue
(
100
)
self
.
progressLabel
.
setText
(
f
"100
%
"
)
self
.
projectContext
.
nd_process
=
1
self
.
threads
=
[]
def
deal_synthesis_callback_slot
(
self
,
threads
,
state
):
"""实现旁白音频合成任务状态在界面中的实时显示,更新界面中的对应变量,每5s更新一次任务状态
...
...
@@ -1400,9 +1492,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
item
=
QTableWidgetItem
(
text
)
item
.
setTextAlignment
(
Qt
.
AlignCenter
)
# 设置为不可编辑
if
self
.
checkIfTableItemCanChange
(
table
,
idx
,
j
)
==
False
:
#
if self.checkIfTableItemCanChange(table, idx, j) == False:
# item.setFlags(Qt.ItemIsEnabled)
print
(
1
)
# print(">>>>>>>>>setElememtToTable"
)
table
.
setItem
(
idx
,
j
,
item
)
# 只有Content页的字幕列和 Aside页的字幕列 可编辑
...
...
@@ -1871,7 +1963,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
pre_item
=
self
.
all_tableWidget
.
item
(
row
,
col
-
1
)
suggest
=
pre_item
.
text
()
if
suggest
!=
None
and
suggest
!=
""
:
print
(
">>>>>>>>suggest:"
+
suggest
)
if
col
==
constant
.
Content
.
AsideColumnNumber
and
suggest
!=
None
and
suggest
!=
""
:
arrays
=
suggest
.
split
(
"/"
)
if
len
(
arrays
)
==
2
:
suggest
=
str
(
len
(
text
))
+
"/"
+
arrays
[
1
]
...
...
@@ -1902,6 +1996,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
# self.all_tableWidget.setItem(
# int(idx), constant.Content.SpeedColumnNumber, QTableWidgetItem(text))
self
.
projectContext
.
refresh_speed
(
row
,
text
)
elif
col
==
constant
.
Content
.
SubtitleColumnNumber
:
self
.
projectContext
.
refresh_subtitle
(
row
,
text
)
# self.all_tableWidget_idx = int(row)
# self.set_table_to_window(False)
...
...
@@ -2037,8 +2133,13 @@ class MainWindow(QMainWindow, Ui_MainWindow):
将表格内容更新至界面中,并保存当前工程内容
"""
self
.
set_table_to_window
(
need_refresh_all
=
False
)
self
.
projectContext
.
save_project
(
False
)
if
not
self
.
detect_lock
:
self
.
refresh_flag
=
True
try
:
self
.
set_table_to_window
(
need_refresh_all
=
False
)
self
.
projectContext
.
save_project
(
False
)
finally
:
self
.
refresh_flag
=
False
def
refresh_all_tab_slot
(
self
):
"""刷新整个表格
...
...
main_window_ui.py
View file @
9b46d4c7
...
...
@@ -65,6 +65,8 @@ class MyWidget(QWidget):
print
(
">>>>>cur_y2 : "
+
str
(
self
.
y
()))
self
.
move
(
0
,
h
)
def
get_h
(
self
):
return
self
.
y
()
def
down
(
self
,
mov_len
):
...
...
management.py
View file @
9b46d4c7
...
...
@@ -264,6 +264,11 @@ class ProjectContext:
if
not
self
.
initial_ing
:
save_excel_to_path
(
self
.
all_elements
,
self
.
excel_path
,
self
.
write_header
,
self
.
excel_sheet_name
)
def
refresh_subtitle
(
self
,
row
,
subtitle
:
str
):
self
.
all_elements
[
int
(
row
)]
.
subtitle
=
subtitle
if
not
self
.
initial_ing
:
save_excel_to_path
(
self
.
all_elements
,
self
.
excel_path
,
self
.
write_header
,
self
.
excel_sheet_name
)
def
refresh_speed
(
self
,
row
,
speed
:
str
)
->
None
:
self
.
all_elements
[
int
(
row
)]
.
speed
=
speed
if
not
self
.
initial_ing
:
...
...
@@ -308,7 +313,7 @@ class ProjectContext:
if
d
[
"终止时间"
][
i
]
is
None
:
# 如果是最后一条
if
i
==
len
(
d
[
"字幕"
])
-
1
:
print
(
1
)
print
(
">>>>>>>>>load_excel_from_path"
)
# ed_time_sec = "360000" if self.duration == 0 else self.duration # todo 默认最大时长是100h
else
:
ed_time_sec
=
"
%.2
f"
%
(
float
(
d
[
"起始时间"
][
i
+
1
])
-
0.01
)
...
...
@@ -429,6 +434,8 @@ def save_excel_to_path(all_element, new_excel_path, header, excel_sheet_name):
backup_path
=
os
.
path
.
dirname
(
new_excel_path
)
+
"/tmp_"
+
str
(
time
.
time
())
+
".xlsx"
# os.remove(new_excel_path)
os
.
rename
(
new_excel_path
,
backup_path
)
# print(">>>>>>new_excel_path:" + new_excel_path)
# print(">>>>>>>>>>backup_path:" + backup_path)
try
:
create_sheet
(
new_excel_path
,
"旁白插入位置建议"
,
[
header
])
# for element in all_element:
...
...
narratage_detection.py
View file @
9b46d4c7
...
...
@@ -65,7 +65,11 @@ def detect(video_path: str, start_time: float, end_time: float, book_path: str,
def
process_err
(
mainWindow
:
MainWindow
=
None
):
from
detect_with_ocr
import
process_err_ocr
process_err_ocr
(
mainWindow
)
try
:
process_err_ocr
(
mainWindow
)
except
Exception
as
e
:
print
(
"process_err err"
)
print
(
e
)
if
__name__
==
'__main__'
:
# 定义参数
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment