Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_1
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
翟艳秋(20软)
accessibility_movie_1
Commits
5f39d7a7
Commit
5f39d7a7
authored
Mar 01, 2022
by
翟艳秋(20软)
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
优化ocr的结果
parent
71880733
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
31 additions
and
1 deletion
+31
-1
detect_with_ocr.py
detect_with_ocr.py
+30
-0
main_gui.py
main_gui.py
+1
-1
No files found.
detect_with_ocr.py
View file @
5f39d7a7
...
@@ -6,6 +6,7 @@ from paddleocr import PaddleOCR
...
@@ -6,6 +6,7 @@ from paddleocr import PaddleOCR
import
difflib
import
difflib
import
openpyxl
import
openpyxl
from
openpyxl.styles
import
PatternFill
,
Alignment
from
openpyxl.styles
import
PatternFill
,
Alignment
import
re
# 字幕的上下边界
# 字幕的上下边界
up_b
,
down_b
=
0
,
0
up_b
,
down_b
=
0
,
0
...
@@ -86,6 +87,12 @@ def get_position(video_path, start_time):
...
@@ -86,6 +87,12 @@ def get_position(video_path, start_time):
return
up_b
+
height
,
down_b
+
height
return
up_b
+
height
,
down_b
+
height
def
erasePunc
(
txt
):
pattern
=
re
.
compile
(
r'[^\u4e00-\u9fa5]'
)
txt
=
re
.
sub
(
pattern
,
''
,
txt
)
return
txt
def
string_similar
(
s1
,
s2
):
def
string_similar
(
s1
,
s2
):
"""
"""
比较字符串s1和s2的相似度,主要用于减少输出文件中相似字幕的重复
比较字符串s1和s2的相似度,主要用于减少输出文件中相似字幕的重复
...
@@ -93,9 +100,30 @@ def string_similar(s1, s2):
...
@@ -93,9 +100,30 @@ def string_similar(s1, s2):
:param s2:
:param s2:
:return: 字符串间的相似度
:return: 字符串间的相似度
"""
"""
# 去除非中文字符后,再比较相似度
s1
=
erasePunc
(
s1
)
s2
=
erasePunc
(
s2
)
return
difflib
.
SequenceMatcher
(
None
,
s1
,
s2
)
.
quick_ratio
()
return
difflib
.
SequenceMatcher
(
None
,
s1
,
s2
)
.
quick_ratio
()
def
normalize
(
text
):
"""
用于规范化处理文本中的一些标点符号
"""
# 将英文标点转换为中文标点
E_pun
=
u',.!?()[]:;'
C_pun
=
u',。!?()【】:;'
table
=
{
ord
(
f
):
ord
(
t
)
for
f
,
t
in
zip
(
E_pun
,
C_pun
)}
text
=
text
.
translate
(
table
)
text
=
text
.
strip
(
' ,。、【】_·:-@‘[;'
)
# 促成首尾匹配的()
if
text
[
-
1
]
==
')'
and
text
[
0
]
!=
'('
:
text
=
'('
+
text
elif
text
[
-
1
]
!=
')'
and
text
[
0
]
==
'('
:
text
=
text
+
')'
return
text
def
detect_subtitle
(
img
):
def
detect_subtitle
(
img
):
"""
"""
检测当前画面得到字幕信息
检测当前画面得到字幕信息
...
@@ -162,6 +190,8 @@ def process_video(video_path, begin, end, state):
...
@@ -162,6 +190,8 @@ def process_video(video_path, begin, end, state):
state
[
0
]
=
float
((
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
-
begin
)
/
(
end
-
begin
))
\
state
[
0
]
=
float
((
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
-
begin
)
/
(
end
-
begin
))
\
if
state
[
0
]
is
None
or
state
[
0
]
<
0.99
else
0.99
if
state
[
0
]
is
None
or
state
[
0
]
<
0.99
else
0.99
subTitle
=
detect_subtitle
(
frame
)
subTitle
=
detect_subtitle
(
frame
)
if
subTitle
is
not
None
:
subTitle
=
normalize
(
subTitle
)
# 第一次找到字幕
# 第一次找到字幕
if
lastSubTitle
is
None
and
subTitle
is
not
None
:
if
lastSubTitle
is
None
and
subTitle
is
not
None
:
start_time
=
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
start_time
=
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
...
...
main_gui.py
View file @
5f39d7a7
...
@@ -416,7 +416,7 @@ tabControl = ttk.Notebook(window)
...
@@ -416,7 +416,7 @@ tabControl = ttk.Notebook(window)
tab1
=
ttk
.
Frame
(
tabControl
)
tab1
=
ttk
.
Frame
(
tabControl
)
tabControl
.
add
(
tab1
,
text
=
"旁白位置推荐"
)
tabControl
.
add
(
tab1
,
text
=
"旁白位置推荐"
)
tab2
=
ttk
.
Frame
(
tabControl
)
tab2
=
ttk
.
Frame
(
tabControl
)
tabControl
.
add
(
tab2
,
text
=
"旁白
语音合成
及字幕导出"
)
tabControl
.
add
(
tab2
,
text
=
"旁白及字幕导出"
)
tabControl
.
pack
(
expand
=
1
,
fill
=
"both"
)
tabControl
.
pack
(
expand
=
1
,
fill
=
"both"
)
"""
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment