Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_1
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
翟艳秋(20软)
accessibility_movie_1
Commits
5f39d7a7
You need to sign in or sign up before continuing.
Commit
5f39d7a7
authored
Mar 01, 2022
by
翟艳秋(20软)
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
优化ocr的结果
parent
71880733
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
31 additions
and
1 deletion
+31
-1
detect_with_ocr.py
detect_with_ocr.py
+30
-0
main_gui.py
main_gui.py
+1
-1
No files found.
detect_with_ocr.py
View file @
5f39d7a7
...
...
@@ -6,6 +6,7 @@ from paddleocr import PaddleOCR
import
difflib
import
openpyxl
from
openpyxl.styles
import
PatternFill
,
Alignment
import
re
# 字幕的上下边界
up_b
,
down_b
=
0
,
0
...
...
@@ -86,6 +87,12 @@ def get_position(video_path, start_time):
return
up_b
+
height
,
down_b
+
height
def
erasePunc
(
txt
):
pattern
=
re
.
compile
(
r'[^\u4e00-\u9fa5]'
)
txt
=
re
.
sub
(
pattern
,
''
,
txt
)
return
txt
def
string_similar
(
s1
,
s2
):
"""
比较字符串s1和s2的相似度,主要用于减少输出文件中相似字幕的重复
...
...
@@ -93,9 +100,30 @@ def string_similar(s1, s2):
:param s2:
:return: 字符串间的相似度
"""
# 去除非中文字符后,再比较相似度
s1
=
erasePunc
(
s1
)
s2
=
erasePunc
(
s2
)
return
difflib
.
SequenceMatcher
(
None
,
s1
,
s2
)
.
quick_ratio
()
def
normalize
(
text
):
"""
用于规范化处理文本中的一些标点符号
"""
# 将英文标点转换为中文标点
E_pun
=
u',.!?()[]:;'
C_pun
=
u',。!?()【】:;'
table
=
{
ord
(
f
):
ord
(
t
)
for
f
,
t
in
zip
(
E_pun
,
C_pun
)}
text
=
text
.
translate
(
table
)
text
=
text
.
strip
(
' ,。、【】_·:-@‘[;'
)
# 促成首尾匹配的()
if
text
[
-
1
]
==
')'
and
text
[
0
]
!=
'('
:
text
=
'('
+
text
elif
text
[
-
1
]
!=
')'
and
text
[
0
]
==
'('
:
text
=
text
+
')'
return
text
def
detect_subtitle
(
img
):
"""
检测当前画面得到字幕信息
...
...
@@ -162,6 +190,8 @@ def process_video(video_path, begin, end, state):
state
[
0
]
=
float
((
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
-
begin
)
/
(
end
-
begin
))
\
if
state
[
0
]
is
None
or
state
[
0
]
<
0.99
else
0.99
subTitle
=
detect_subtitle
(
frame
)
if
subTitle
is
not
None
:
subTitle
=
normalize
(
subTitle
)
# 第一次找到字幕
if
lastSubTitle
is
None
and
subTitle
is
not
None
:
start_time
=
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
...
...
main_gui.py
View file @
5f39d7a7
...
...
@@ -416,7 +416,7 @@ tabControl = ttk.Notebook(window)
tab1
=
ttk
.
Frame
(
tabControl
)
tabControl
.
add
(
tab1
,
text
=
"旁白位置推荐"
)
tab2
=
ttk
.
Frame
(
tabControl
)
tabControl
.
add
(
tab2
,
text
=
"旁白
语音合成
及字幕导出"
)
tabControl
.
add
(
tab2
,
text
=
"旁白及字幕导出"
)
tabControl
.
pack
(
expand
=
1
,
fill
=
"both"
)
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment