Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_2
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
赵心治
accessibility_movie_2
Commits
d3fcd34f
Commit
d3fcd34f
authored
Nov 05, 2023
by
smile2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
多行字幕检测,提取字幕颜色,台标水印排除
parent
c035e726
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
108 additions
and
6 deletions
+108
-6
detect_with_ocr.py
detect_with_ocr.py
+108
-6
No files found.
detect_with_ocr.py
View file @
d3fcd34f
...
...
@@ -51,7 +51,38 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
start
=
int
(
start_time
*
fps
)
video
.
set
(
cv2
.
CAP_PROP_POS_FRAMES
,
start
)
frame_num
=
video
.
get
(
cv2
.
CAP_PROP_FRAME_COUNT
)
time1
=
time
.
time
()
# 一共有60*60*60*3 均匀取2000帧
gap
=
math
.
floor
(
frame_num
/
2000
)
# 前一帧与后一帧的跨度
# #读取方案1:跳转读写头到指定帧
# for i in range(2000):
# frame_index = i*gap + 1
# video.set(cv2.CAP_PROP_POS_FRAMES,frame_index)
# _, frame = video.read()
# cv2.imwrite(str.format('./extrated_imgs/{}.png',i),frame)
# 读写方案2:连续读取,读到指定帧保存
# i=1
# while i<frame_num:
# _, frame = video.read()
# if i%gap == 0:
# print(i,gap)
# cv2.imwrite(str.format('./extrated_imgs_2/{}.png',i),frame)
# i = i + 1
# 主要工作:
paddle_dir
=
"res/.paddleocr/2.3.0.1/ocr/"
cur_cls_model_dir
=
paddle_dir
+
"cls/ch_ppocr_mobile_v2.0_cls_infer"
cur_det_model_dir
=
paddle_dir
+
"det/ch/ch_PP-OCRv2_det_infer"
cur_rec_model_dir
=
paddle_dir
+
"rec/ch/ch_PP-OCRv2_rec_infer"
ocr
=
PaddleOCR
(
use_angle_cls
=
True
,
lang
=
"ch"
,
show_log
=
False
,
use_gpu
=
False
,
cls_model_dir
=
cur_cls_model_dir
,
det_model_dir
=
cur_det_model_dir
,
rec_model_dir
=
cur_rec_model_dir
)
position_map
=
{}
R
=
[]
G
=
[]
B
=
[]
# 统计位置(高度)
# 分成五等份,1/5 2/5 3/5 4/5
for
i
in
range
(
4
):
frame_index
=
math
.
floor
((
i
+
1
)
*
frame_num
/
5
)
...
...
@@ -59,9 +90,10 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
for
j
in
range
(
10
):
#每个阶段取10帧
for
k
in
range
(
120
):
#隔120帧取一帧
_
,
frame
=
video
.
read
()
_
,
frame
=
video
.
read
()
if
(
j
+
1
)
*
120
+
frame_index
>
frame_num
:
break
_
,
frame
=
video
.
read
(
)
# cv2.imwrite(str.format('./imgs_2/{}_{}.png',i+1,j),frame
)
res
=
ocr
.
ocr
(
frame
,
cls
=
True
)
for
result_item
in
res
:
[
x1
,
y1
],[
x2
,
y2
],[
x3
,
y3
],[
x4
,
y4
]
=
result_item
[
0
]
...
...
@@ -69,22 +101,87 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
english_text
=
''
.
join
(
re
.
findall
(
r'[A-Za-z]'
,
text
))
# 跳过英文字幕
if
(
len
(
english_text
)
/
len
(
text
)
<
0.1
):
# 提取ocr区域像素点颜色
subtitle_img
=
frame
[
int
(
y1
):
int
(
y3
),
int
(
x1
):
int
(
x2
)]
r
=
subtitle_img
[:,:,
0
]
.
copy
()
r
=
r
.
reshape
(
-
1
)
g
=
subtitle_img
[:,:,
1
]
.
copy
()
g
=
g
.
reshape
(
-
1
)
b
=
subtitle_img
[:,:,
2
]
.
copy
()
b
=
b
.
reshape
(
-
1
)
R
=
np
.
append
(
R
,
r
)
G
=
np
.
append
(
G
,
g
)
B
=
np
.
append
(
B
,
b
)
print
(
i
+
1
,
j
,
text
)
# 分析文字位置
if
len
(
position_map
)
==
0
:
position_map
[(
y1
,
y3
)]
=
1
else
:
# 在列表中查找
keys
=
list
(
position_map
.
keys
())
flag
=
False
for
key
in
keys
:
if
abs
(
y1
-
key
[
0
])
<
2
and
abs
(
y3
-
key
[
1
])
<
2
:
height
=
y3
-
y1
threshold
=
int
((
y3
-
y1
)
*
0.3
)
# print(threshold)
if
abs
(
y1
-
key
[
0
])
<
threshold
or
abs
(
y3
-
key
[
1
])
<
threshold
:
position_map
[
key
]
+=
1
# # 绘制字幕横线到图片上:
# frame1 = cv2.line(frame,(0,int(key[0])),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(key[0])),(255,0,0),2)
# frame1 = cv2.line(frame,(0,int(key[1])),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(key[1])),(255,0,0),2)
# file_name = './mytest('+str(key[0])+','+str(key[1])+').png'
# cv2.imwrite(file_name,frame1)
flag
=
True
# 如果能归为某一类则flag置为True
break
else
:
if
flag
==
False
:
# 如果找不到,则向列表添加
position_map
[(
y1
,
y3
)]
=
1
print
(
sorted
(
position_map
.
items
(),
key
=
lambda
kv
:(
kv
[
1
],
kv
[
0
])))
y1
,
y2
=
sorted
(
position_map
.
items
(),
key
=
lambda
kv
:(
kv
[
1
],
kv
[
0
]))[
-
1
][
0
]
# # 绘制字幕横线到图片上:
# frame1 = cv2.line(frame,(0,int(y1)),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(y1)),(255,0,0),2)
# frame1 = cv2.line(frame,(0,int(y3)),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(y3)),(255,0,0),2)
# file_name = './mytest('+str(y1)+','+str(y3)+').png'
# cv2.imwrite(file_name,frame1)
# 给出字幕行数
position_rank_list
=
sorted
(
position_map
.
items
(),
key
=
lambda
kv
:(
kv
[
1
],
kv
[
0
]))
print
(
position_rank_list
)
line_num
=
0
num
=
position_rank_list
[
-
1
][
1
]
y1
,
y2
=
position_rank_list
[
-
1
][
0
]
height
=
abs
(
y2
-
y1
)
result
=
[]
item_result
=
[]
for
item
in
position_rank_list
:
if
item
[
1
]
>
num
*
0.3
and
item
[
0
][
0
]
>
0.5
*
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
and
item
[
0
][
1
]
>
0.5
*
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
):
#出现次数达到rank1的30%以上,判定为新的一行字幕
line_num
+=
1
y1
,
y2
=
item
[
0
]
y1
=
y1
/
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
y2
=
y2
/
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
return
y1
,
y2
item_result
.
append
([
y1
,
y2
])
print
([
line_num
,
height
,
item_result
])
# 给出文字和背景的颜色
import
matplotlib.pyplot
as
plt
from
sklearn
import
cluster
plt
.
hist
(
R
)
plt
.
savefig
(
'R-hist-all'
)
plt
.
close
()
# 尝试聚成两类(k-means算法)
estimator
=
cluster
.
KMeans
(
n_clusters
=
2
)
estimator
.
fit
(
R
.
reshape
(
-
1
,
1
))
# print(R[0].shape)
r_centers
=
estimator
.
cluster_centers_
.
reshape
(
-
1
)
estimator
=
cluster
.
KMeans
(
n_clusters
=
2
)
estimator
.
fit
(
G
.
reshape
(
-
1
,
1
))
g_centers
=
estimator
.
cluster_centers_
.
reshape
(
-
1
)
estimator
=
cluster
.
KMeans
(
n_clusters
=
2
)
estimator
.
fit
(
B
.
reshape
(
-
1
,
1
))
b_centers
=
estimator
.
cluster_centers_
.
reshape
(
-
1
)
r
,
g
,
b
=
max
(
r_centers
),
max
(
g_centers
),
max
(
b_centers
)
print
(
'颜色结果:'
,
r
,
g
,
b
)
# 返回值:字幕行数 字幕高度 字幕位置(比例形式) 字幕颜色(r,g,b)
# [2, 109.0, [[0.8637892376681614, 0.9243273542600897], [0.9304932735426009, 0.9915919282511211]],(r,g,b)]
return
item_result
[
0
][
0
],
item_result
[
0
][
1
]
return
[
line_num
,
height
,
item_result
,(
r
,
g
,
b
)]
def
get_position
(
video_path
:
str
,
start_time
:
float
,
rate
:
float
,
rate_bottom
:
float
)
->
Tuple
[
float
,
float
]:
# return (885.0, 989.0)
...
...
@@ -118,6 +215,11 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f
# down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
print
(
up
)
# print(down)
up_rate
,
down_rate
=
evaluate_position
(
video_path
,
0
)
up
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
up_rate
)
down
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
down_rate
)
return
int
(
up
),
int
(
down
)
# TODO 现阶段是主窗体设定字幕的开始位置和结束位置,传入该函数。现在希望做成自动检测的?
# while True:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment