Commit 71880733 authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

可正常打包的最后一版

parent 5b0ac742
/dist/
/build/
/missing_packages/
/dist
/build
/missing_packages
.idea/
__pycache__/
dataset/hysxm
!dataset/test.wav
!dataset/test_vad.wav
models/
lm/
log/
audios_cache/
data_utils/*.pyc
data_utils/featurizer/*.pyc
decoders/*.pyc
model_utils/*.pyc
nohup.out
test.py
tools/generate_audio/models/
tools/generate_audio/dgk_lost_conv/
tools/generate_audio/corpus.txt
\ No newline at end of file
......@@ -75,7 +75,6 @@ def predict_long_audio_with_paddle(wav_path, pre_time, state):
for i, audio_path in enumerate(audios_path):
print("{}开始处理{}".format(paddle.get_device(), audio_path))
# 标识当前语音识别的进度
state[0] = float((i + 1) / len(audio_path)) if state[0] is None or state[0] < 0.99 else 0.99
text = asr_executor(
model='conformer_wenetspeech',
lang='zh',
......@@ -94,8 +93,8 @@ def predict_long_audio_with_paddle(wav_path, pre_time, state):
narratages.append(
[round(time_stamps[i][0] + pre_time, 2), round(time_stamps[i][1] + pre_time, 2), text, ''])
last_time = time_stamps[i][1]
print(
"第%d个分割音频 对应时间为%.2f-%.2f 识别结果: %s" % (i, time_stamps[i][0] + pre_time, time_stamps[i][1] + pre_time, text))
print("第%d个分割音频 对应时间为%.2f-%.2f 识别结果: %s" % (i, time_stamps[i][0] + pre_time, time_stamps[i][1] + pre_time, text))
state[0] = float((i + 1) / len(audios_path)) if state[0] is None or state[0] < 0.99 else 0.99
print("最终结果,消耗时间:%d, 识别结果: %s" % (round((time.time() - start) * 1000), texts))
# 完成后删除分割出来的音频
......
import os
import shutil
import time
import openpyxl
......@@ -6,7 +8,6 @@ from openpyxl.styles import PatternFill, Alignment
from split_wav import *
def trans_to_mono(wav_path):
"""
将音频的通道数channel转换为1
......@@ -34,7 +35,8 @@ def concat_wav(root):
def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None):
# 临时存储各种中间产物的文件夹
tmp_root = './tmp'
tmp_root = os.path.join(os.path.dirname(video_path), 'tmp')
print(tmp_root)
if not os.path.exists(tmp_root):
os.mkdir(tmp_root)
......@@ -44,7 +46,7 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None
# 提取出视频中的音频,分割后提取出其中的人声部分并存储
audio_path = extract_audio(video_path, tmp_root, start_time, end_time)
# root = split_audio()
# extrac_speech()
# extract_speech()
#
# # 将提取出的人声拼接,并将音频的channel调整为1
# total_wav_path = concat_wav(root)
......@@ -60,9 +62,9 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None
table_content = table_head + predict_long_audio_with_paddle(audio_path, start_time, state)
from detect_with_ocr import write_excel_xlsx
write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, table_content)
state[0] = 1
# 删除中间文件
# shutil.rmtree(tmp_root)
shutil.rmtree(tmp_root)
state[0] = 1
if __name__ == '__main__':
......
eagle_2.ico

16.6 KB

This diff is collapsed.
Zeranoe FFmpeg Builds <http://ffmpeg.zeranoe.com/builds/>
Build: ffmpeg-4.3.1-win64-static
Configuration:
--enable-gpl
--enable-version3
--enable-sdl2
--enable-fontconfig
--enable-gnutls
--enable-iconv
--enable-libass
--enable-libdav1d
--enable-libbluray
--enable-libfreetype
--enable-libmp3lame
--enable-libopencore-amrnb
--enable-libopencore-amrwb
--enable-libopenjpeg
--enable-libopus
--enable-libshine
--enable-libsnappy
--enable-libsoxr
--enable-libsrt
--enable-libtheora
--enable-libtwolame
--enable-libvpx
--enable-libwavpack
--enable-libwebp
--enable-libx264
--enable-libx265
--enable-libxml2
--enable-libzimg
--enable-lzma
--enable-zlib
--enable-gmp
--enable-libvidstab
--enable-libvmaf
--enable-libvorbis
--enable-libvo-amrwbenc
--enable-libmysofa
--enable-libspeex
--enable-libxvid
--enable-libaom
--enable-libgsm
--disable-w32threads
--enable-libmfx
--enable-ffnvcodec
--enable-cuda-llvm
--enable-cuvid
--enable-d3d11va
--enable-nvenc
--enable-nvdec
--enable-dxva2
--enable-avisynth
--enable-libopenmpt
--enable-amf
Libraries:
SDL 20200724-a8f8265 <https://libsdl.org>
Fontconfig 2.13.92 <http://freedesktop.org/wiki/Software/fontconfig>
GnuTLS 3.6.14 <https://gnutls.org/>
libiconv 1.16 <http://gnu.org/software/libiconv>
libass 20200714-66dba8d <https://github.com/libass/libass>
dav1d 20200720-6cf58c8 <https://code.videolan.org/videolan/dav1d>
libbluray 20200723-76b073b <https://www.videolan.org/developers/libbluray.html>
FreeType 2.10.2 <http://freetype.sourceforge.net>
LAME 3.100 <http://lame.sourceforge.net>
OpenCORE AMR 20170731-07a5be4 <https://sourceforge.net/projects/opencore-amr>
OpenJPEG 20200630-cbee789 <https://github.com/uclouvain/openjpeg>
Opus 20200707-484af25 <https://opus-codec.org>
shine 20190420-76ea4f0 <https://github.com/savonet/shine>
Snappy 1.1.8 <https://github.com/google/snappy>
libsoxr 20180224-945b592 <http://sourceforge.net/projects/soxr>
SRT 1.4.1 <https://www.srtalliance.org>
Theora 20200618-f98989a <http://theora.org>
TwoLAME 0.4.0 <http://twolame.org>
vpx 20200723-dbe00bb <http://webmproject.org>
WavPack 5.3.0 <http://wavpack.com>
WebP 20200505-e3c259a <https://developers.google.com/speed/webp>
x264 20200714-db0d417 <https://www.videolan.org/developers/x264.html>
x265 20200529-73ca1d7 <https://bitbucket.org/multicoreware/x265/wiki/Home>
libxml2 2.9.10 <http://xmlsoft.org>
z.lib 20200716-ecc9d07 <https://github.com/sekrit-twc/zimg>
XZ Utils 5.2.5 <http://tukaani.org/xz>
zlib 1.2.11 <http://zlib.net>
GMP 6.2.0 <https://gmplib.org>
vid.stab 20190213-aeabc8d <http://public.hronopik.de/vid.stab>
VMAF 20200725-4fb36ef <https://github.com/Netflix/vmaf>
Vorbis 20200720-4a767c9 <http://vorbis.com>
VisualOn AMR-WB 20141107-3b3fcd0 <https://sourceforge.net/projects/opencore-amr>
libmysofa 20200710-6f4f25e <https://github.com/hoene/libmysofa>
Speex 20200716-870ff84 <http://speex.org>
Xvid 1.3.7 <https://labs.xvid.com>
aom 20200724-0da8c24 <https://aomedia.googlesource.com/aom>
GSM 1.0.19 <http://quut.com/gsm/>
libmfx 1.28 <https://software.intel.com/en-us/media-sdk>
nv-codec-headers 20200701-de0b1bf <https://git.videolan.org/?p=ffmpeg/nv-codec-headers.git>
AviSynth+ 20200619-1eb7ce6 <https://github.com/AviSynth/AviSynthPlus>
OpenMPT 20200726-5de00f4 <https://openmpt.org>
AMF 20200515-802f92e <https://gpuopen.com/gaming-product/advanced-media-framework>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
This source diff could not be displayed because it is too large. You can view the blob instead.
a.summary-letter {
text-decoration: none;
}
a {
color: #2D6198;
}
a:visited {
color: #884488;
}
#banner {
background-color: white;
position: relative;
text-align: center;
}
#banner img {
margin-bottom: 1px;
margin-top: 5px;
}
#body {
margin-left: 1em;
margin-right: 1em;
}
body {
background-color: #313131;
margin: 0;
text-align: justify;
}
.center {
margin-left: auto;
margin-right: auto;
text-align: center;
}
#container {
background-color: white;
color: #202020;
margin-left: 1em;
margin-right: 1em;
}
#footer {
text-align: center;
}
h1 a, h2 a, h3 a, h4 a {
text-decoration: inherit;
color: inherit;
}
h1, h2, h3, h4 {
padding-left: 0.4em;
border-radius: 4px;
padding-bottom: 0.25em;
padding-top: 0.25em;
border: 1px solid #6A996A;
}
h1 {
background-color: #7BB37B;
color: #151515;
font-size: 1.2em;
padding-bottom: 0.3em;
padding-top: 0.3em;
}
h2 {
color: #313131;
font-size: 1.0em;
background-color: #ABE3AB;
}
h3 {
color: #313131;
font-size: 0.9em;
margin-bottom: -6px;
background-color: #BBF3BB;
}
h4 {
color: #313131;
font-size: 0.8em;
margin-bottom: -8px;
background-color: #D1FDD1;
}
img {
border: 0;
}
#navbar {
background-color: #738073;
border-bottom: 1px solid #5C665C;
border-top: 1px solid #5C665C;
margin-top: 12px;
padding: 0.3em;
position: relative;
text-align: center;
}
#navbar a, #navbar_secondary a {
color: white;
padding: 0.3em;
text-decoration: none;
}
#navbar a:hover, #navbar_secondary a:hover {
background-color: #313131;
color: white;
text-decoration: none;
}
#navbar_secondary {
background-color: #738073;
border-bottom: 1px solid #5C665C;
border-left: 1px solid #5C665C;
border-right: 1px solid #5C665C;
padding: 0.3em;
position: relative;
text-align: center;
}
p {
margin-left: 1em;
margin-right: 1em;
}
pre {
margin-left: 3em;
margin-right: 3em;
padding: 0.3em;
border: 1px solid #bbb;
background-color: #f7f7f7;
}
dl dt {
font-weight: bold;
}
#proj_desc {
font-size: 1.2em;
}
#repos {
margin-left: 1em;
margin-right: 1em;
border-collapse: collapse;
border: solid 1px #6A996A;
}
#repos th {
background-color: #7BB37B;
border: solid 1px #6A996A;
}
#repos td {
padding: 0.2em;
border: solid 1px #6A996A;
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment