Commit 742031dc authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

1. add progressbar \n 2.upload paddlespeech \n 3. add requirements.txt

parent 453f190c
Subproject commit 081f7807a2ce0e12b98e6f0a0da0e650133f2d9e
import shutil
import time
import openpyxl
......
......@@ -19,6 +19,7 @@ def trans_to_seconds(timepoint):
def detect(video_path, start_time, end_time, book_path):
print("开始检测")
if book_path is None:
book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
else:
......
azure_storage==0.37.0
cn2an==0.5.16
ffmpeg==1.4
ffmpeg_python==0.2.0
Flask==2.0.2
flask_cors==3.0.10
ijson==3.1.4
LAC==2.1.2
librosa==0.8.1
mttkinter==0.6.1
numpy==1.19.3
opencv_python==4.5.4.58
openpyxl==3.0.9
paddle==1.0.2
paddleocr==2.3.0.1
paddlepaddle_gpu==2.2.0
paddlespeech==0.1.0
parakeet==0.24
Pillow==9.0.0
pyaudio==0.2.11
pydub==0.25.1
PyYAML==6.0
resampy==0.2.2
scipy==1.6.1
SoundFile==0.10.3.post1
tqdm==4.62.3
visualdl==2.2.1
webrtcvad==2.0.10
yacs==0.1.8
zhconv==1.4.3
# coding=utf-8
import os
import argparse
import time
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer
from azure.cognitiveservices.speech.audio import AudioOutputConfig
......@@ -44,10 +45,9 @@ def change_speed(wav_path, speed=1.5):
:param speed: 转换后的语速
:return:
"""
print("调速")
cmd_line = 'ffmpeg -y -i {} -filter:a \"atempo={}\" {}'.format(tmp_file, speed, wav_path)
os.system(cmd_line)
# 删除临时文件
os.remove(tmp_file)
def read_sheet(book_path, sheet_name=None):
......@@ -89,7 +89,7 @@ def get_narratage_text(sheet_content):
if text is not None:
if text == '翻译':
narratage_text.append(subtitle[i])
narratage_start_time.append(float(start_time[i]) + 0.1)
narratage_start_time.append(float(start_time[i]))
else:
# 如果旁白中有换行符,即分为n段,则按照换行符进行分割,并间隔0.5s
text_split = text.split('\n')
......@@ -97,7 +97,7 @@ def get_narratage_text(sheet_content):
for x in text_split:
narratage_text.append(x)
narratage_start_time.append(cur_start)
cur_start = cur_start + len(x) / (4.5 * args.speed) + 0.5
cur_start = cur_start + len(x) / 4.5 + 0.5
return narratage_text, narratage_start_time
......@@ -131,15 +131,20 @@ def export_caption(sheet_content, caption_file):
f.write(x + "\n\n")
def ss_and_export():
def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
"""
生成语音并导出字幕
:param sheet_path: 校对过的旁白脚本表格文件
:param output_dir: 存放音频文件的
:param speed:
:param caption_file:
:return:
"""
# 旁白解说表格的位置
book_path = args.sheet_path
book_path = sheet_path
# 音频输出位置路径
root_path = args.output_dir
# 语速
speed = args.speed
# 字幕文件路径
caption_file = args.caption_file
root_path = output_dir
# 如果文件夹不存在,则新建文件夹
if not os.path.exists(root_path):
......@@ -149,11 +154,19 @@ def ss_and_export():
sheet_content = read_sheet(book_path)
narratages, start_timepoint = get_narratage_text(sheet_content)
export_caption(sheet_content, caption_file)
print("已导出旁白文件")
# 生成旁白解说语音
for i, text in enumerate(narratages):
wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i])
speech_synthesis(text, wav_path, speed)
print("目前正在处理{}".format(wav_path))
state[0] = float((i + 1) / len(narratages))
# 删除临时语音文件
if os.path.exists(tmp_file):
time.sleep(1)
os.remove(tmp_file)
if __name__ == '__main__':
......@@ -166,4 +179,4 @@ if __name__ == '__main__':
args = parser.parse_args()
# 主函数执行
ss_and_export(args.output_dir,args.sheet_path,args.caption_file,args.speed)
ss_and_export(args.output_dir, args.sheet_path, args.speed, args.caption_file)
# 最简单的是音轨分离,直接将背景音乐的轨道剥离,只剩下人声道后即可根据空白片段进行切割
# 只有一个音轨时,使用音乐检索系统,分割人声和背景音乐声
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import numpy as np
from moviepy.editor import *
from pydub import AudioSegment
from spleeter.audio.adapter import AudioAdapter
from spleeter.separator import Separator
audio_path = r'D:\AddCaption\disappearedBullet.wav'
folder_name = r'D:\AddCaption\hysxm'
split_len = 18
def extract_audio(video_path, root, start_time, end_time):
"""
从视频中读取音频
:param video_path: 视频存储路径
:param root: 存储临时文件的文件夹路径
:param start_time: 视频的实际开始时间
:param end_time: 视频的实际结束时间
:return: audio_path 提取的音频存储路径
"""
global audio_path
audio_path = os.path.join(root, os.path.basename(video_path).split('.')[0] + ".wav")
if end_time == -1:
command = "ffmpeg -i {} -ar 16000 -ac 1 -ss {} -y {}".format(video_path, start_time, audio_path)
else:
command = "ffmpeg -i {} -ar 16000 -ac 1 -ss {} -to {} -y {}".format(video_path, start_time, end_time, audio_path)
os.system(command)
return audio_path
# 将音频划分为n个文件,返回存储
def split_audio():
parent = os.path.dirname(os.path.realpath(audio_path))
global folder_name
folder_name = os.path.join(parent, os.path.basename(audio_path)[:-4])
if not os.path.exists(folder_name):
os.mkdir(folder_name)
audio = AudioSegment.from_file(audio_path)
per_part = 300000
global split_len
split_len = int(np.ceil(len(audio) / per_part))
if audio.channels > 1:
audio, _ = audio.split_to_mono()
for i in range(1, split_len + 1):
cur_path = os.path.join(folder_name, '{}.wav'.format(str(i).zfill(2)))
audio[per_part * (i - 1):per_part * i].export(cur_path, format='wav')
return folder_name
# 从音频中提取人声
def extrac_speech():
separator = Separator('spleeter:2stems', multiprocess=False)
audio_adapter = AudioAdapter.default()
for i in range(1, split_len + 1):
audio_path = os.path.join(folder_name, '{}.wav'.format(str(i).zfill(2)))
out_path = os.path.join(folder_name, str(i).zfill(2))
if not os.path.exists(out_path):
os.mkdir(out_path)
waveform, framerate = audio_adapter.load(audio_path)
vocals = separator.separate(waveform)['vocals']
audio_adapter.save(os.path.join(out_path, 'vocal.wav'), vocals, framerate)
# 提取背景音乐
def extract_bgm():
mp4_path = 'test.wav'
snd = AudioSegment.from_file(mp4_path)
print(len(snd))
snd_l, snd_r = snd.split_to_mono()
print(len(snd_r), len(snd_l))
snd_r_inv = snd_r.invert_phase() # 反相
bg_music2 = snd_r.overlay(snd_l.invert_phase())
bg_music = snd_l.overlay(snd_r_inv) # 覆盖后获得背景音乐,但有噪音
print(len(bg_music))
bg_music.export('bgm.wav', format='wav')
total_music = snd_l.overlay(snd_r)
total_music.export("total.wav", format='wav')
if __name__ == '__main__':
video_path = 'D:/heelo/shaolin.mkv'
extract_audio(video_path, './tmp', 62, 7489)
split_audio()
extrac_speech()
......@@ -3,8 +3,9 @@ import threading
from mttkinter import mtTkinter as tk
from tkinter import filedialog, ttk, messagebox, Frame, Canvas
import os
import time
import ffmpeg
from speech_synthesis import ss_and_export
window = tk.Tk()
window.title('无障碍电影辅助工具') # 标题
......@@ -32,14 +33,7 @@ def open_video_file():
except:
endTime.set(vs['tags']['DURATION'])
if len(video_path) != 0 and not is_video(video_path):
messagebox.showinfo('警告', "请选择正确的视频格式,能够处理的视频格式如下所示:\n'.mkv', '.rmvb', '.mp4', '.avi'")
def open_sheet_file():
sheet_path = filedialog.askopenfilename(title=u'选择文件', initialdir=(os.path.expanduser('D:/heelo')))
if len(sheet_path) != 0 and ".xlsx" in sheet_path:
print("打开表格", sheet_path)
narratagePath.set(sheet_path)
messagebox.showwarning('警告', "请选择正确的视频格式,能够处理的视频格式如下所示:\n'.mkv', '.rmvb', '.mp4', '.avi'")
def find_save_file():
......@@ -63,6 +57,11 @@ def find_save_file():
def is_video(filename):
"""
检查视频格式是否正确
:param filename: 视频文件名
:return: True or False
"""
video_tag = ['.mkv', '.rmvb', '.mp4', '.avi']
for x in video_tag:
if x in filename:
......@@ -70,6 +69,42 @@ def is_video(filename):
return False
def trans_to_seconds(timepoint):
time_in_seconds = 0
timepoint = timepoint.split(':')
units = 1
for i in range(len(timepoint) - 1, -1, -1):
time_in_seconds += units * float(timepoint[i])
units *= 60
return time_in_seconds
def start_process(p, p_label, state, intervals=100):
"""
启动进度条
:param p: 进度条组件
:param p_label: 进度条对应百分比文本
:param intervals: 进度条前进所需时间
:return:
"""
print("进度条开始滚动")
p.start(interval=int(intervals))
laststate = state[0]
while True:
if state[0] and state[0] != laststate:
p['value'] = int(state[0] * 100)
p_label['text'] = str(int(p['value'])) + "%"
if p['value'] == 99:
p.stop()
p['value'] = 99
break
if p['value'] == 100:
p.stop()
p['value'] = 100
break
print("进度条停止")
def start_detect():
"""
开始检测旁白
......@@ -80,49 +115,111 @@ def start_detect():
book_path = outputFilePath.get()
if len(video_path) == 0:
messagebox.showinfo('警告', "请输入视频文件路径")
messagebox.showwarning('警告', "请输入视频文件路径")
return
elif not is_video(video_path):
messagebox.showinfo('警告', "视频文件格式错误,请重新确认")
messagebox.showwarning('警告', "视频文件格式错误,请重新确认")
return
if len(book_path) == 0:
messagebox.showinfo("警告", "请输入表格存放路径")
messagebox.showwarning("警告", "请输入表格存放路径")
return
# print("视频文件路径:", inputFilePath.get())
# print("开始时间:", startTime.get())
# print("结束时间:", endTime.get())
# print("输出表格文件路径:", outputFilePath.get())
processState.set("正在启动中……")
from narratage_detection import detect
detect(video_path, startTime.get(), endTime.get(), book_path)
# 显示进度条及开始检测
progressbar_1.grid(column=2, row=1)
progress_1.grid(column=3, row=1)
processState.set("开始检测")
intervals = trans_to_seconds(endTime.get())
# 多线程同步进行检测和进度条更新
threads = [threading.Thread(target=start_process, args=(progressbar_1, progress_1, None, intervals * 5)),
threading.Thread(target=detect, args=(video_path, startTime.get(), endTime.get(), book_path))]
for t in threads:
t.start()
# 线程完成任务后结束线程
for t in threads:
t.join()
print("线程{}已结束".format(t.name))
# 将进度条的进度拉满到100%,并给出“任务已完成”的提示
progressbar_1['value'] = 100
progress_1['text'] = '100%'
processState.set("任务已完成")
def open_sheet_file():
"""
选择导入的旁白解说脚本表格所在位置
:return:
"""
sheet_path = filedialog.askopenfilename(title=u'选择文件',
initialdir=(os.path.expanduser('D:/AddCaption/test/detectSubtitle/电影旁白打标')))
if len(sheet_path) != 0 and ".xlsx" in sheet_path:
print("打开表格", sheet_path)
narratagePath.set(sheet_path)
def find_save_dir():
"""
寻找存储音频的文件夹
:return:
"""
audio_dir = filedialog.askdirectory(title=u'保存文件至')
print('保存音频于:', audio_dir)
audioDir.set(audio_dir)
def set_caption_file():
"""
设置字幕文件存储路径(使用存放音频的文件夹作为默认文件夹、旁白表格名作为默认字幕名)
:return:
"""
defaultName = os.path.basename(narratagePath.get()).split('.')[0] + ".srt"
defaultDir = audioDir.get()
caption_path = filedialog.asksaveasfilename(title=u'保存文件至',
initialdir=defaultDir,
initialfile=defaultName,
filetype=[('字幕文件', ".srt")])
print(caption_path)
captionPath.set(caption_path)
def start_synthesis():
"""
开始合成语音
:return:
"""
audio_dir = audioDir.get()
sheet_path = narratagePath.get()
speed = audio_speed.get()
caption_path = captionPath.get()
from speech_synthesis import ss_and_export
ss_and_export(audio_dir,sheet_path,speed,caption_path)
# 判断各个变量的合理性
if len(audio_dir) == 0:
messagebox.showwarning("警告", "请选择音频存放路径")
return
elif not os.path.exists(audio_dir):
messagebox.showwarning("警告", "当前音频存放路径有误,请检查一遍。")
return
if len(sheet_path) == 0:
messagebox.showwarning("警告", "请选择你要处理的表格")
return
elif not os.path.exists(sheet_path):
messagebox.showwarning("警告", "当前输入的表格不存在,请检查一遍。")
# 显示进度条、进度条百分比及任务状态提示文本
progressbar_2.grid(column=2, row=2)
progress_2.grid(column=3, row=2)
processState_2.set("开始生成音频及字幕")
# 多线程同时实现语音合成+字幕导出、进度条
state = [None]
threads = [threading.Thread(target=start_process, args=(progressbar_2, progress_2, state)),
threading.Thread(target=ss_and_export,
args=(sheet_path, audio_dir, speed, caption_path, state))]
for t in threads:
t.start()
for t in threads:
t.join()
processState_2.set("语音和字幕已导出完毕")
def thread_it(func, *args):
......@@ -152,9 +249,15 @@ tabControl.pack(expand=1, fill="both")
"""
为”旁白位置推荐“添加按钮、进度条等部件
"""
"""
视频信息相关内容,包含以下内容:
- 视频文件|视频文件路径文本框|上传文件按钮
- 视频实际开始时间|文本框
- 视频实际结束时间|文本框
"""
video_info = ttk.LabelFrame(tab1, text=" 视频信息操作 ")
# video_info.grid(column=0, row=0, padx=8, pady=4)
video_info.place(relx=0.1, rely=0.1, relwidth=0.8, relheight=0.3)
video_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4)
input_label = ttk.Label(video_info, text="视频文件")
input_label.grid(column=0, row=0)
......@@ -176,10 +279,15 @@ endTime = tk.StringVar(value="23:59:59")
endTime_entered = ttk.Entry(video_info, width=11, textvariable=endTime)
endTime_entered.grid(column=1, row=2, sticky="W")
"""
检测步骤相关内容,包含以下内容:
- 输出表格路径|输出表格路径文本框|打开文件夹
- 开始检测按钮|当前检测状态提示文本|任务进度条|进度条百分比
"""
detect_command = ttk.LabelFrame(tab1, text=" 检测步骤 ")
detect_command.place(relx=0.1, rely=0.5, relwidth=0.8, relheight=0.3)
detect_command.place(relx=0.05, rely=0.5, relwidth=0.9, relheight=0.4)
output_label = ttk.Label(detect_command, text="输出表格路径")
output_label = ttk.Label(detect_command, text="输出表格")
output_label.grid(column=0, row=0)
outputFilePath = tk.StringVar()
outputFile = ttk.Entry(detect_command, width=30, textvariable=outputFilePath)
......@@ -192,12 +300,17 @@ startDetection.grid(column=0, row=1)
processState = tk.StringVar()
stateLabel = tk.Label(detect_command, textvariable=processState, fg="green")
stateLabel.grid(column=1, row=1, sticky="W")
progressbar_1 = ttk.Progressbar(detect_command, length=100, mode="determinate")
progress_1 = tk.Label(detect_command, text="0%")
"""
为旁白语音合成添加部件
"""
"""
"""
audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ")
audio_info.place(relx=0.1, rely=0.1, relwidth=0.8, relheight=0.3)
audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4)
narratage_label = ttk.Label(audio_info, text="旁白脚本表格")
narratage_label.grid(column=0, row=0)
......@@ -209,12 +322,14 @@ upload_button_2.grid(column=2, row=0)
speed_label = ttk.Label(audio_info, text="旁白语速")
speed_label.grid(column=0, row=1)
audio_speed = tk.StringVar(value="1.0")
speed_input = ttk.Entry(audio_info, width=30, textvariable=audio_speed)
speed_input.grid(column=1, row=1)
audio_speed = tk.StringVar()
speedChosen = ttk.Combobox(audio_info, width=12, textvariable=audio_speed)
speedChosen['values'] = ("1.00", "1.10", "1.25", "1.50", "1.75", "2.00", "2.50")
speedChosen.current(0)
speedChosen.grid(column=1, row=1, sticky="W")
synthesis_command = ttk.LabelFrame(tab2, text=" 语音合成步骤 ")
synthesis_command.place(relx=0.1, rely=0.5, relwidth=0.8, relheight=0.3)
synthesis_command.place(relx=0.05, rely=0.55, relwidth=0.9, relheight=0.4)
caption_label = ttk.Label(synthesis_command, text="输出字幕文件")
caption_label.grid(column=0, row=1)
......@@ -237,46 +352,8 @@ startSynthesis.grid(column=0, row=2)
processState_2 = tk.StringVar()
stateLabel_2 = tk.Label(synthesis_command, textvariable=processState_2, fg="green")
stateLabel_2.grid(column=1, row=2, sticky="W")
progressbar_2 = ttk.Progressbar(synthesis_command, length=100, mode="determinate")
progress_2 = tk.Label(synthesis_command, text="0%")
# # creating a menu bar
# menu_bar = Menu(window)
# window.config(menu=menu_bar)
#
# # Add menu items
# file_menu = Menu(menu_bar, tearoff=0)
# # file_menu.add_command(label="", command=save_file)
# # file_menu.add_separator()
# file_menu.add_command(label="退出", command=_quit)
#
# # 添加下拉列表
# menu_bar.add_cascade(label="文件", menu=file_menu)
# # 更新进度条函数
# def change_schedule(now_schedule, all_schedule):
# canvas.coords(fill_rec, (5, 5, 6 + (now_schedule / all_schedule) * 100, 25))
# tab2.update()
# x.set(str(round(now_schedule / all_schedule * 100, 2)) + '%')
# if round(now_schedule / all_schedule * 100, 2) == 100.00:
# x.set("完成")
#
#
# # 创建画布
# canvas = Canvas(tab2, width=120, height=30, bg="white")
# canvas.grid(row=0, column=0)
# x = tk.StringVar()
# # 进度条以及完成程度
# out_rec = canvas.create_rectangle(5, 5, 105, 25, outline="blue", width=1)
# fill_rec = canvas.create_rectangle(5, 5, 5, 25, outline="", width=0, fill="blue")
#
# tk.Label(tab2, textvariable=x).grid(row=0, column=1)
#
# '''
# 使用时直接调用函数change_schedule(now_schedule,all_schedule)
# 下面就模拟一下....
# '''
#
# for i in range(100):
# time.sleep(0.1)
# change_schedule(i, 99)
window.mainloop() # 显示
# 刷新显示
window.mainloop()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment