Commit 742031dc authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

1. add progressbar \n 2.upload paddlespeech \n 3. add requirements.txt

parent 453f190c
Subproject commit 081f7807a2ce0e12b98e6f0a0da0e650133f2d9e
import shutil
import time
import openpyxl
......
......@@ -19,6 +19,7 @@ def trans_to_seconds(timepoint):
def detect(video_path, start_time, end_time, book_path):
print("开始检测")
if book_path is None:
book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
else:
......
azure_storage==0.37.0
cn2an==0.5.16
ffmpeg==1.4
ffmpeg_python==0.2.0
Flask==2.0.2
flask_cors==3.0.10
ijson==3.1.4
LAC==2.1.2
librosa==0.8.1
mttkinter==0.6.1
numpy==1.19.3
opencv_python==4.5.4.58
openpyxl==3.0.9
paddle==1.0.2
paddleocr==2.3.0.1
paddlepaddle_gpu==2.2.0
paddlespeech==0.1.0
parakeet==0.24
Pillow==9.0.0
pyaudio==0.2.11
pydub==0.25.1
PyYAML==6.0
resampy==0.2.2
scipy==1.6.1
SoundFile==0.10.3.post1
tqdm==4.62.3
visualdl==2.2.1
webrtcvad==2.0.10
yacs==0.1.8
zhconv==1.4.3
# coding=utf-8
import os
import argparse
import time
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer
from azure.cognitiveservices.speech.audio import AudioOutputConfig
......@@ -44,10 +45,9 @@ def change_speed(wav_path, speed=1.5):
:param speed: 转换后的语速
:return:
"""
print("调速")
cmd_line = 'ffmpeg -y -i {} -filter:a \"atempo={}\" {}'.format(tmp_file, speed, wav_path)
os.system(cmd_line)
# 删除临时文件
os.remove(tmp_file)
def read_sheet(book_path, sheet_name=None):
......@@ -89,7 +89,7 @@ def get_narratage_text(sheet_content):
if text is not None:
if text == '翻译':
narratage_text.append(subtitle[i])
narratage_start_time.append(float(start_time[i]) + 0.1)
narratage_start_time.append(float(start_time[i]))
else:
# 如果旁白中有换行符,即分为n段,则按照换行符进行分割,并间隔0.5s
text_split = text.split('\n')
......@@ -97,7 +97,7 @@ def get_narratage_text(sheet_content):
for x in text_split:
narratage_text.append(x)
narratage_start_time.append(cur_start)
cur_start = cur_start + len(x) / (4.5 * args.speed) + 0.5
cur_start = cur_start + len(x) / 4.5 + 0.5
return narratage_text, narratage_start_time
......@@ -131,15 +131,20 @@ def export_caption(sheet_content, caption_file):
f.write(x + "\n\n")
def ss_and_export():
def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
"""
生成语音并导出字幕
:param sheet_path: 校对过的旁白脚本表格文件
:param output_dir: 存放音频文件的
:param speed:
:param caption_file:
:return:
"""
# 旁白解说表格的位置
book_path = args.sheet_path
book_path = sheet_path
# 音频输出位置路径
root_path = args.output_dir
# 语速
speed = args.speed
# 字幕文件路径
caption_file = args.caption_file
root_path = output_dir
# 如果文件夹不存在,则新建文件夹
if not os.path.exists(root_path):
......@@ -149,11 +154,19 @@ def ss_and_export():
sheet_content = read_sheet(book_path)
narratages, start_timepoint = get_narratage_text(sheet_content)
export_caption(sheet_content, caption_file)
print("已导出旁白文件")
# 生成旁白解说语音
for i, text in enumerate(narratages):
wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i])
speech_synthesis(text, wav_path, speed)
print("目前正在处理{}".format(wav_path))
state[0] = float((i + 1) / len(narratages))
# 删除临时语音文件
if os.path.exists(tmp_file):
time.sleep(1)
os.remove(tmp_file)
if __name__ == '__main__':
......@@ -166,4 +179,4 @@ if __name__ == '__main__':
args = parser.parse_args()
# 主函数执行
ss_and_export(args.output_dir,args.sheet_path,args.caption_file,args.speed)
ss_and_export(args.output_dir, args.sheet_path, args.speed, args.caption_file)
# 最简单的是音轨分离,直接将背景音乐的轨道剥离,只剩下人声道后即可根据空白片段进行切割
# 只有一个音轨时,使用音乐检索系统,分割人声和背景音乐声
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import numpy as np
from moviepy.editor import *
from pydub import AudioSegment
from spleeter.audio.adapter import AudioAdapter
from spleeter.separator import Separator
audio_path = r'D:\AddCaption\disappearedBullet.wav'
folder_name = r'D:\AddCaption\hysxm'
split_len = 18
def extract_audio(video_path, root, start_time, end_time):
"""
从视频中读取音频
:param video_path: 视频存储路径
:param root: 存储临时文件的文件夹路径
:param start_time: 视频的实际开始时间
:param end_time: 视频的实际结束时间
:return: audio_path 提取的音频存储路径
"""
global audio_path
audio_path = os.path.join(root, os.path.basename(video_path).split('.')[0] + ".wav")
if end_time == -1:
command = "ffmpeg -i {} -ar 16000 -ac 1 -ss {} -y {}".format(video_path, start_time, audio_path)
else:
command = "ffmpeg -i {} -ar 16000 -ac 1 -ss {} -to {} -y {}".format(video_path, start_time, end_time, audio_path)
os.system(command)
return audio_path
# 将音频划分为n个文件,返回存储
def split_audio():
parent = os.path.dirname(os.path.realpath(audio_path))
global folder_name
folder_name = os.path.join(parent, os.path.basename(audio_path)[:-4])
if not os.path.exists(folder_name):
os.mkdir(folder_name)
audio = AudioSegment.from_file(audio_path)
per_part = 300000
global split_len
split_len = int(np.ceil(len(audio) / per_part))
if audio.channels > 1:
audio, _ = audio.split_to_mono()
for i in range(1, split_len + 1):
cur_path = os.path.join(folder_name, '{}.wav'.format(str(i).zfill(2)))
audio[per_part * (i - 1):per_part * i].export(cur_path, format='wav')
return folder_name
# 从音频中提取人声
def extrac_speech():
separator = Separator('spleeter:2stems', multiprocess=False)
audio_adapter = AudioAdapter.default()
for i in range(1, split_len + 1):
audio_path = os.path.join(folder_name, '{}.wav'.format(str(i).zfill(2)))
out_path = os.path.join(folder_name, str(i).zfill(2))
if not os.path.exists(out_path):
os.mkdir(out_path)
waveform, framerate = audio_adapter.load(audio_path)
vocals = separator.separate(waveform)['vocals']
audio_adapter.save(os.path.join(out_path, 'vocal.wav'), vocals, framerate)
# 提取背景音乐
def extract_bgm():
mp4_path = 'test.wav'
snd = AudioSegment.from_file(mp4_path)
print(len(snd))
snd_l, snd_r = snd.split_to_mono()
print(len(snd_r), len(snd_l))
snd_r_inv = snd_r.invert_phase() # 反相
bg_music2 = snd_r.overlay(snd_l.invert_phase())
bg_music = snd_l.overlay(snd_r_inv) # 覆盖后获得背景音乐,但有噪音
print(len(bg_music))
bg_music.export('bgm.wav', format='wav')
total_music = snd_l.overlay(snd_r)
total_music.export("total.wav", format='wav')
if __name__ == '__main__':
video_path = 'D:/heelo/shaolin.mkv'
extract_audio(video_path, './tmp', 62, 7489)
split_audio()
extrac_speech()
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment