Commit 742031dc authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

1. add progressbar \n 2.upload paddlespeech \n 3. add requirements.txt

parent 453f190c
Subproject commit 081f7807a2ce0e12b98e6f0a0da0e650133f2d9e
import shutil
import time import time
import openpyxl import openpyxl
......
...@@ -19,6 +19,7 @@ def trans_to_seconds(timepoint): ...@@ -19,6 +19,7 @@ def trans_to_seconds(timepoint):
def detect(video_path, start_time, end_time, book_path): def detect(video_path, start_time, end_time, book_path):
print("开始检测")
if book_path is None: if book_path is None:
book_path = os.path.basename(video_path).split('.')[0] + ".xlsx" book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
else: else:
......
azure_storage==0.37.0
cn2an==0.5.16
ffmpeg==1.4
ffmpeg_python==0.2.0
Flask==2.0.2
flask_cors==3.0.10
ijson==3.1.4
LAC==2.1.2
librosa==0.8.1
mttkinter==0.6.1
numpy==1.19.3
opencv_python==4.5.4.58
openpyxl==3.0.9
paddle==1.0.2
paddleocr==2.3.0.1
paddlepaddle_gpu==2.2.0
paddlespeech==0.1.0
parakeet==0.24
Pillow==9.0.0
pyaudio==0.2.11
pydub==0.25.1
PyYAML==6.0
resampy==0.2.2
scipy==1.6.1
SoundFile==0.10.3.post1
tqdm==4.62.3
visualdl==2.2.1
webrtcvad==2.0.10
yacs==0.1.8
zhconv==1.4.3
# coding=utf-8 # coding=utf-8
import os import os
import argparse import argparse
import time
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer
from azure.cognitiveservices.speech.audio import AudioOutputConfig from azure.cognitiveservices.speech.audio import AudioOutputConfig
...@@ -44,10 +45,9 @@ def change_speed(wav_path, speed=1.5): ...@@ -44,10 +45,9 @@ def change_speed(wav_path, speed=1.5):
:param speed: 转换后的语速 :param speed: 转换后的语速
:return: :return:
""" """
print("调速")
cmd_line = 'ffmpeg -y -i {} -filter:a \"atempo={}\" {}'.format(tmp_file, speed, wav_path) cmd_line = 'ffmpeg -y -i {} -filter:a \"atempo={}\" {}'.format(tmp_file, speed, wav_path)
os.system(cmd_line) os.system(cmd_line)
# 删除临时文件
os.remove(tmp_file)
def read_sheet(book_path, sheet_name=None): def read_sheet(book_path, sheet_name=None):
...@@ -89,7 +89,7 @@ def get_narratage_text(sheet_content): ...@@ -89,7 +89,7 @@ def get_narratage_text(sheet_content):
if text is not None: if text is not None:
if text == '翻译': if text == '翻译':
narratage_text.append(subtitle[i]) narratage_text.append(subtitle[i])
narratage_start_time.append(float(start_time[i]) + 0.1) narratage_start_time.append(float(start_time[i]))
else: else:
# 如果旁白中有换行符,即分为n段,则按照换行符进行分割,并间隔0.5s # 如果旁白中有换行符,即分为n段,则按照换行符进行分割,并间隔0.5s
text_split = text.split('\n') text_split = text.split('\n')
...@@ -97,7 +97,7 @@ def get_narratage_text(sheet_content): ...@@ -97,7 +97,7 @@ def get_narratage_text(sheet_content):
for x in text_split: for x in text_split:
narratage_text.append(x) narratage_text.append(x)
narratage_start_time.append(cur_start) narratage_start_time.append(cur_start)
cur_start = cur_start + len(x) / (4.5 * args.speed) + 0.5 cur_start = cur_start + len(x) / 4.5 + 0.5
return narratage_text, narratage_start_time return narratage_text, narratage_start_time
...@@ -131,15 +131,20 @@ def export_caption(sheet_content, caption_file): ...@@ -131,15 +131,20 @@ def export_caption(sheet_content, caption_file):
f.write(x + "\n\n") f.write(x + "\n\n")
def ss_and_export(): def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
"""
生成语音并导出字幕
:param sheet_path: 校对过的旁白脚本表格文件
:param output_dir: 存放音频文件的
:param speed:
:param caption_file:
:return:
"""
# 旁白解说表格的位置 # 旁白解说表格的位置
book_path = args.sheet_path book_path = sheet_path
# 音频输出位置路径 # 音频输出位置路径
root_path = args.output_dir root_path = output_dir
# 语速
speed = args.speed
# 字幕文件路径
caption_file = args.caption_file
# 如果文件夹不存在,则新建文件夹 # 如果文件夹不存在,则新建文件夹
if not os.path.exists(root_path): if not os.path.exists(root_path):
...@@ -149,11 +154,19 @@ def ss_and_export(): ...@@ -149,11 +154,19 @@ def ss_and_export():
sheet_content = read_sheet(book_path) sheet_content = read_sheet(book_path)
narratages, start_timepoint = get_narratage_text(sheet_content) narratages, start_timepoint = get_narratage_text(sheet_content)
export_caption(sheet_content, caption_file) export_caption(sheet_content, caption_file)
print("已导出旁白文件")
# 生成旁白解说语音 # 生成旁白解说语音
for i, text in enumerate(narratages): for i, text in enumerate(narratages):
wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i]) wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i])
speech_synthesis(text, wav_path, speed) speech_synthesis(text, wav_path, speed)
print("目前正在处理{}".format(wav_path))
state[0] = float((i + 1) / len(narratages))
# 删除临时语音文件
if os.path.exists(tmp_file):
time.sleep(1)
os.remove(tmp_file)
if __name__ == '__main__': if __name__ == '__main__':
...@@ -166,4 +179,4 @@ if __name__ == '__main__': ...@@ -166,4 +179,4 @@ if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
# 主函数执行 # 主函数执行
ss_and_export(args.output_dir,args.sheet_path,args.caption_file,args.speed) ss_and_export(args.output_dir, args.sheet_path, args.speed, args.caption_file)
# 最简单的是音轨分离,直接将背景音乐的轨道剥离,只剩下人声道后即可根据空白片段进行切割
# 只有一个音轨时,使用音乐检索系统,分割人声和背景音乐声
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import numpy as np
from moviepy.editor import *
from pydub import AudioSegment
from spleeter.audio.adapter import AudioAdapter
from spleeter.separator import Separator
audio_path = r'D:\AddCaption\disappearedBullet.wav'
folder_name = r'D:\AddCaption\hysxm'
split_len = 18
def extract_audio(video_path, root, start_time, end_time):
"""
从视频中读取音频
:param video_path: 视频存储路径
:param root: 存储临时文件的文件夹路径
:param start_time: 视频的实际开始时间
:param end_time: 视频的实际结束时间
:return: audio_path 提取的音频存储路径
"""
global audio_path
audio_path = os.path.join(root, os.path.basename(video_path).split('.')[0] + ".wav")
if end_time == -1:
command = "ffmpeg -i {} -ar 16000 -ac 1 -ss {} -y {}".format(video_path, start_time, audio_path)
else:
command = "ffmpeg -i {} -ar 16000 -ac 1 -ss {} -to {} -y {}".format(video_path, start_time, end_time, audio_path)
os.system(command)
return audio_path
# 将音频划分为n个文件,返回存储
def split_audio():
parent = os.path.dirname(os.path.realpath(audio_path))
global folder_name
folder_name = os.path.join(parent, os.path.basename(audio_path)[:-4])
if not os.path.exists(folder_name):
os.mkdir(folder_name)
audio = AudioSegment.from_file(audio_path)
per_part = 300000
global split_len
split_len = int(np.ceil(len(audio) / per_part))
if audio.channels > 1:
audio, _ = audio.split_to_mono()
for i in range(1, split_len + 1):
cur_path = os.path.join(folder_name, '{}.wav'.format(str(i).zfill(2)))
audio[per_part * (i - 1):per_part * i].export(cur_path, format='wav')
return folder_name
# 从音频中提取人声
def extrac_speech():
separator = Separator('spleeter:2stems', multiprocess=False)
audio_adapter = AudioAdapter.default()
for i in range(1, split_len + 1):
audio_path = os.path.join(folder_name, '{}.wav'.format(str(i).zfill(2)))
out_path = os.path.join(folder_name, str(i).zfill(2))
if not os.path.exists(out_path):
os.mkdir(out_path)
waveform, framerate = audio_adapter.load(audio_path)
vocals = separator.separate(waveform)['vocals']
audio_adapter.save(os.path.join(out_path, 'vocal.wav'), vocals, framerate)
# 提取背景音乐
def extract_bgm():
mp4_path = 'test.wav'
snd = AudioSegment.from_file(mp4_path)
print(len(snd))
snd_l, snd_r = snd.split_to_mono()
print(len(snd_r), len(snd_l))
snd_r_inv = snd_r.invert_phase() # 反相
bg_music2 = snd_r.overlay(snd_l.invert_phase())
bg_music = snd_l.overlay(snd_r_inv) # 覆盖后获得背景音乐,但有噪音
print(len(bg_music))
bg_music.export('bgm.wav', format='wav')
total_music = snd_l.overlay(snd_r)
total_music.export("total.wav", format='wav')
if __name__ == '__main__':
video_path = 'D:/heelo/shaolin.mkv'
extract_audio(video_path, './tmp', 62, 7489)
split_audio()
extrac_speech()
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment