Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_2
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
赵心治
accessibility_movie_2
Commits
03b3f93e
Commit
03b3f93e
authored
Sep 26, 2023
by
陈晓勇(工程师)
Browse files
Options
Browse Files
Download
Plain Diff
dev
parents
ee03e0ae
464b8e26
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
86 additions
and
12 deletions
+86
-12
.gitignore
.gitignore
+2
-0
detect_with_ocr.py
detect_with_ocr.py
+71
-12
main_window.py
main_window.py
+6
-0
main_window_ui.py
main_window_ui.py
+7
-0
No files found.
.gitignore
View file @
03b3f93e
...
@@ -13,3 +13,4 @@ res/ffmpeg-4.3.1/bin/output.mp4
...
@@ -13,3 +13,4 @@ res/ffmpeg-4.3.1/bin/output.mp4
res/ffmpeg-4.3.1/bin/qiji_local.mp4
res/ffmpeg-4.3.1/bin/qiji_local.mp4
venv/
venv/
venv37/
venv37/
shenming_test
\ No newline at end of file
detect_with_ocr.py
View file @
03b3f93e
...
@@ -33,6 +33,8 @@ from main_window import MainWindow, Element
...
@@ -33,6 +33,8 @@ from main_window import MainWindow, Element
import
time
import
time
import
numpy
as
np
import
numpy
as
np
import
copy
import
copy
import
math
# 字幕的上下边界
# 字幕的上下边界
up_b
,
down_b
=
0
,
0
up_b
,
down_b
=
0
,
0
...
@@ -48,6 +50,43 @@ normal_speed = 4
...
@@ -48,6 +50,43 @@ normal_speed = 4
table_index
=
0
table_index
=
0
ocr_h_map
=
{}
ocr_h_map
=
{}
def
evaluate_position
(
video_path
:
str
,
start_time
:
float
)
->
Tuple
[
float
,
float
]:
print
(
">>>>>>video path:"
+
video_path
)
video
=
cv2
.
VideoCapture
(
video_path
)
fps
=
video
.
get
(
cv2
.
CAP_PROP_FPS
)
start
=
int
(
start_time
*
fps
)
video
.
set
(
cv2
.
CAP_PROP_POS_FRAMES
,
start
)
frame_num
=
video
.
get
(
cv2
.
CAP_PROP_FRAME_COUNT
)
position_map
=
{}
# 分成五等份,1/5 2/5 3/5 4/5
for
i
in
range
(
4
):
frame_index
=
math
.
floor
((
i
+
1
)
*
frame_num
/
5
)
video
.
set
(
cv2
.
CAP_PROP_POS_FRAMES
,
frame_index
)
for
j
in
range
(
10
):
#每个阶段取10帧
for
k
in
range
(
120
):
#隔120帧取一帧
_
,
frame
=
video
.
read
()
_
,
frame
=
video
.
read
()
res
=
ocr
.
ocr
(
frame
,
cls
=
True
)
for
result_item
in
res
:
[
x1
,
y1
],[
x2
,
y2
],[
x3
,
y3
],[
x4
,
y4
]
=
result_item
[
0
]
text
=
result_item
[
1
][
0
]
english_text
=
''
.
join
(
re
.
findall
(
r'[A-Za-z]'
,
text
))
# 跳过英文字幕
if
(
len
(
english_text
)
/
len
(
text
)
<
0.1
):
print
(
i
+
1
,
j
,
text
)
if
len
(
position_map
)
==
0
:
position_map
[(
y1
,
y3
)]
=
1
else
:
keys
=
list
(
position_map
.
keys
())
for
key
in
keys
:
if
abs
(
y1
-
key
[
0
])
<
2
and
abs
(
y3
-
key
[
1
])
<
2
:
position_map
[
key
]
+=
1
break
else
:
position_map
[(
y1
,
y3
)]
=
1
print
(
sorted
(
position_map
.
items
(),
key
=
lambda
kv
:(
kv
[
1
],
kv
[
0
])))
y1
,
y2
=
sorted
(
position_map
.
items
(),
key
=
lambda
kv
:(
kv
[
1
],
kv
[
0
]))[
-
1
][
0
]
return
y1
,
y2
ocr_positions
=
[]
ocr_positions
=
[]
...
@@ -228,11 +267,25 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
...
@@ -228,11 +267,25 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
up_b
=
ocr_positions
[
i
][
0
]
up_b
=
ocr_positions
[
i
][
0
]
down_b
=
ocr_positions
[
i
][
1
]
down_b
=
ocr_positions
[
i
][
1
]
height
=
down_b
-
up_b
height
=
down_b
-
up_b
img
=
img
[
int
(
up_b
-
height
*
0.7
):
int
(
down_b
+
height
*
0.7
)]
# if len(ocr_positions) == 1:
print
(
">>>>>into ocr"
)
# img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
print
(
int
(
up_b
-
height
*
0.7
))
# else:
print
(
int
(
down_b
+
height
*
0.7
))
cropped_img
=
img
[
int
(
up_b
):
int
(
down_b
)]
# img = img[int(up_b):int(down_b)]
# 定义要添加的上下空白的高度
padding_top
=
height
*
0.7
padding_bottom
=
height
*
0.7
# 计算新图像的高度
new_height
=
cropped_img
.
shape
[
0
]
+
padding_top
+
padding_bottom
# 创建一个新的空白图像
img
=
np
.
zeros
((
int
(
new_height
),
cropped_img
.
shape
[
1
],
3
),
dtype
=
np
.
uint8
)
# 将裁剪后的图像放置在新图像中间
start_y
=
int
(
padding_top
)
end_y
=
start_y
+
cropped_img
.
shape
[
0
]
img
[
start_y
:
end_y
,
:]
=
cropped_img
# img = img[int(up_b - height*0.2):int(down_b + height*0.2)]
# 针对低帧率的视频做图像放大处理
# 针对低帧率的视频做图像放大处理
print
(
height
)
print
(
height
)
print
(
up_b
)
print
(
up_b
)
...
@@ -240,10 +293,10 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
...
@@ -240,10 +293,10 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
print
(
img
.
shape
)
print
(
img
.
shape
)
if
img
.
shape
[
1
]
<
1000
:
if
img
.
shape
[
1
]
<
1000
:
img
=
cv2
.
resize
(
img
,
(
int
(
img
.
shape
[
1
]
*
1.5
),
int
(
img
.
shape
[
0
]
*
1.5
)))
img
=
cv2
.
resize
(
img
,
(
int
(
img
.
shape
[
1
]
*
1.5
),
int
(
img
.
shape
[
0
]
*
1.5
)))
#
global index
global
index
#
cv2.imwrite(f'./cap/cap{index}.png', img)
cv2
.
imwrite
(
f
'./cap/cap{index}.png'
,
img
)
#
index = index + 1
index
=
index
+
1
cv2
.
imwrite
(
f
'./cap.png'
,
img
)
print
(
">>>>>>>>>>>>>>>>>>>>>>>>>>>new log"
+
str
(
index
-
1
)
)
res
=
ocr
.
ocr
(
img
,
cls
=
True
)
res
=
ocr
.
ocr
(
img
,
cls
=
True
)
print
(
'--------> res'
,
res
)
print
(
'--------> res'
,
res
)
sorted
(
res
,
key
=
lambda
text
:
text
[
0
][
0
][
1
])
sorted
(
res
,
key
=
lambda
text
:
text
[
0
][
0
][
1
])
...
@@ -277,6 +330,7 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
...
@@ -277,6 +330,7 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
# conf_thred2 = 0.7
# conf_thred2 = 0.7
if
(
rect
[
0
][
1
]
+
rect
[
1
][
1
])
/
2
/
img
.
shape
[
0
]
>
0.5
or
(
rect
[
0
][
1
]
+
rect
[
1
][
1
])
/
2
/
img
.
shape
[
0
]
<=
0.1
:
if
(
rect
[
0
][
1
]
+
rect
[
1
][
1
])
/
2
/
img
.
shape
[
0
]
>
0.5
or
(
rect
[
0
][
1
]
+
rect
[
1
][
1
])
/
2
/
img
.
shape
[
0
]
<=
0.1
:
continue
continue
# TODO 字幕去重算法改进
if
confidence
>
conf_thred1
and
gradient
<
0.1
and
0.4
<
mid
/
img
.
shape
[
1
]
<
0.6
:
if
confidence
>
conf_thred1
and
gradient
<
0.1
and
0.4
<
mid
/
img
.
shape
[
1
]
<
0.6
:
subTitle
+=
txt
subTitle
+=
txt
conf
=
max
(
conf
,
confidence
)
conf
=
max
(
conf
,
confidence
)
...
@@ -299,6 +353,7 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
...
@@ -299,6 +353,7 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
return
None
,
0
,
None
return
None
,
0
,
None
def
process_video
(
video_path
:
str
,
begin
:
float
,
end
:
float
,
book_path
:
str
,
sheet_name
:
str
,
state
=
None
,
mainWindow
:
MainWindow
=
None
):
def
process_video
(
video_path
:
str
,
begin
:
float
,
end
:
float
,
book_path
:
str
,
sheet_name
:
str
,
state
=
None
,
mainWindow
:
MainWindow
=
None
):
"""处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
"""处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
...
@@ -353,8 +408,10 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
...
@@ -353,8 +408,10 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
if
end_time
<
start_time
:
if
end_time
<
start_time
:
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end, 2), lastSubTitle, ''])
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end, 2), lastSubTitle, ''])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end
,
3
),
lastSubTitle
,
''
],
ocr_h
=
ocr_h
)
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end
,
3
),
lastSubTitle
,
''
],
ocr_h
=
ocr_h
)
print
(
">>>>>>subtitle,ocr_h1:"
+
str
(
lastSubTitle
)
+
">>>"
+
str
(
ocr_h
))
break
break
# 每秒取4帧画面左右
# 每秒取4帧画面左右
# TODO 取帧算法优化
if
cnt
%
int
(
fps
/
4
)
==
0
:
if
cnt
%
int
(
fps
/
4
)
==
0
:
# 更新当前工程的检测进度
# 更新当前工程的检测进度
if
pre_state
is
None
:
if
pre_state
is
None
:
...
@@ -394,7 +451,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
...
@@ -394,7 +451,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end_time
,
3
),
lastSubTitle
,
''
],
ocr_h
)
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end_time
,
3
),
lastSubTitle
,
''
],
ocr_h
)
print
(
">>>>>>subtitle,ocr_h:"
+
str
(
lastSubTitle
)
+
">>>"
+
str
(
ocr_h
))
print
(
">>>>>>subtitle,ocr_h
2
:"
+
str
(
lastSubTitle
)
+
">>>"
+
str
(
ocr_h
))
elif
lastSubTitle
is
not
None
and
subTitle
is
not
None
:
elif
lastSubTitle
is
not
None
and
subTitle
is
not
None
:
# 两句话连在一起,但是两句话不一样
# 两句话连在一起,但是两句话不一样
if
string_similar
(
lastSubTitle
,
subTitle
)
<
0.6
:
if
string_similar
(
lastSubTitle
,
subTitle
)
<
0.6
:
...
@@ -412,7 +469,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
...
@@ -412,7 +469,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
print
(
start_time
,
end_time
,
lastSubTitle
)
print
(
start_time
,
end_time
,
lastSubTitle
)
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end_time
,
3
),
lastSubTitle
,
''
],
ocr_h
)
add_to_list
(
mainWindow
,
"字幕"
,
[
round
(
start_time
,
3
),
round
(
end_time
,
3
),
lastSubTitle
,
''
],
ocr_h
)
print
(
">>>>>>subtitle,ocr_h:"
+
str
(
lastSubTitle
)
+
">>>"
+
str
(
ocr_h
))
print
(
">>>>>>subtitle,ocr_h
3
:"
+
str
(
lastSubTitle
)
+
">>>"
+
str
(
ocr_h
))
start_time
=
end_time
start_time
=
end_time
else
:
else
:
lastSubTitle
=
subTitle
if
conf
>
lastConf
else
lastSubTitle
lastSubTitle
=
subTitle
if
conf
>
lastConf
else
lastSubTitle
...
@@ -588,5 +645,7 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
...
@@ -588,5 +645,7 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
path
=
"D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
# path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
path
=
"C:/Users/Smile/Desktop/accessibility-movie/"
# print("get_pos:", get_position(path, 0))
# print("get_pos:", get_position(path, 0))
evaluate_position
(
"C:/Users/AIA/Desktop/1/1.mp4"
,
0
)
main_window.py
View file @
03b3f93e
...
@@ -631,6 +631,12 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -631,6 +631,12 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
action_operate
.
setEnabled
(
True
)
self
.
action_operate
.
setEnabled
(
True
)
self
.
action_insert_aside_from_now
.
setEnabled
(
True
)
self
.
action_insert_aside_from_now
.
setEnabled
(
True
)
self
.
insert_aside_from_now_btn
.
setEnabled
(
True
)
self
.
insert_aside_from_now_btn
.
setEnabled
(
True
)
# from detect_with_ocr import evaluate_position
# print(">>>>>>>>>>>>>>>>>>>>>v_path" + path.path()[1:])
# y1,y2 = evaluate_position(path.path()[1:], 0)
# self.widget.setY(y1)
# self.widget_bottom.setY(y2)
# print("y1:%d,y2:%d" %(y1,y2))
def
check_ocr_rate
(
self
):
def
check_ocr_rate
(
self
):
if
self
.
rate
>
self
.
rate_bottom
:
if
self
.
rate
>
self
.
rate_bottom
:
...
...
main_window_ui.py
View file @
03b3f93e
...
@@ -58,8 +58,15 @@ class MyWidget(QWidget):
...
@@ -58,8 +58,15 @@ class MyWidget(QWidget):
# painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
# painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
# painter.drawLine(0, 1, 800, 1)
# painter.drawLine(0, 1, 800, 1)
# painter.end()
# painter.end()
print
(
">>>>>cur_y : "
+
str
(
self
.
y
()))
return
self
.
y
()
return
self
.
y
()
def
setY
(
self
,
h
):
print
(
">>>>>cur_y2 : "
+
str
(
self
.
y
()))
self
.
move
(
0
,
h
)
def
down
(
self
,
mov_len
):
def
down
(
self
,
mov_len
):
print
(
">>>>>>>>>>>down"
+
str
(
mov_len
))
print
(
">>>>>>>>>>>down"
+
str
(
mov_len
))
self
.
move
(
0
,
self
.
y
()
+
mov_len
)
self
.
move
(
0
,
self
.
y
()
+
mov_len
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment