Commit 7d7ed791 authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

1. [add] 在输出表格中添加推荐字数;

2. [add] 在界面中添加“是否有字幕”的选项,并兼容代码; 3. [modified] 修改进度条状态的计算公式; 4. [modified] 将字幕检测中的每段时长修改为180s,并对短视频做向下兼容; 5. [modified] 修改语音合成部分每段音频的开始时间的计算公式; 6. [modified] 在界面中对语速进行解释(x字/秒)
parent dda3b840
......@@ -13,6 +13,9 @@ up_b, down_b = 0, 0
# 初始化ocr工具
ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
# 正常语速为4字/秒
normal_speed = 4
def get_position(video_path, start_time):
"""
......@@ -45,7 +48,7 @@ def get_position(video_path, start_time):
log = []
for x in res:
rect, (txt, confidence) = x
font_size = rect[2][1] - rect[0][1]
# font_size = rect[2][1] - rect[0][1]
mid = (rect[0][0] + rect[1][0]) / 2
gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
# 可能是字幕的文本
......@@ -156,7 +159,8 @@ def process_video(video_path, begin, end, state):
cnt += 1
# 每秒取4帧画面左右
if cnt % int(fps / 4) == 0:
state[0] = float(cnt / video.get(cv2.CAP_PROP_FRAME_COUNT)) if state[0] < 0.99 else 0.99
state[0] = float((video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - begin) / (end - begin)) \
if state[0] is None or state[0] < 0.99 else 0.99
subTitle = detect_subtitle(frame)
# 第一次找到字幕
if lastSubTitle is None and subTitle is not None:
......@@ -167,7 +171,9 @@ def process_video(video_path, begin, end, state):
res.append([start_time, end_time, lastSubTitle])
if len(res) == 1 or res[-1][0] - res[-2][1] >= 1:
print('--------------------------------------------------')
narratage_recommend.append(['', '', '', '插入旁白'])
recommend_lens = int(res[-1][0] * normal_speed) if len(res) == 1 else int(
(res[-1][0] - res[-2][1]) * normal_speed)
narratage_recommend.append(['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
print(start_time, end_time, lastSubTitle)
narratage_recommend.append([round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
# 两句话连在一起,但是两句话不一样
......@@ -176,8 +182,10 @@ def process_video(video_path, begin, end, state):
end_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
res.append([start_time, end_time, lastSubTitle])
if len(res) == 1 or res[-1][0] - res[-2][1] >= 1:
narratage_recommend.append(['', '', '', '插入旁白'])
print('--------------------------------------------------')
recommend_lens = int(res[-1][0] * normal_speed) if len(res) == 1 else int(
(res[-1][0] - res[-2][1]) * normal_speed)
narratage_recommend.append(['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
print(start_time, end_time, lastSubTitle)
narratage_recommend.append([round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
start_time = end_time
......@@ -189,7 +197,9 @@ def process_video(video_path, begin, end, state):
if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 > end:
if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time > 1:
print('--------------------------------------------------')
narratage_recommend.append(['', '', '', '插入旁白'])
recommend_lens = int(res[-1][0] * normal_speed) if len(res) == 1 else int(
(res[-1][0] - res[-2][1]) * normal_speed)
narratage_recommend.append(['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
break
return narratage_recommend
......@@ -208,10 +218,12 @@ def write_excel_xlsx(path, sheet_name, value):
sheet.title = sheet_name
# 将字幕对应的那一列扩宽一些
sheet.column_dimensions['C'].width = 50
sheet.column_dimensions['D'].width = 30
for i in range(0, index):
for j in range(0, len(value[i])):
sheet.cell(row=i + 1, column=j + 1, value=str(value[i][j]))
if value[i][j] == '' or value[i][j] == '插入旁白' or value[i][j] == '翻译':
print(value[i][j])
if value[i][j] == '' or '插入旁白' in str(value[i][j]) or value[i][j] == '翻译':
sheet.cell(row=i + 1, column=j + 1).fill = PatternFill(fill_type='solid', fgColor='ffff00')
workbook.save(path)
......@@ -222,7 +234,7 @@ def detect_with_ocr(video_path, book_path, start_time, end_time, state):
# 获取字幕在画面中的上下边界,方便在后续视频遍历过程中直接对字幕对应区域进行分析
global up_b, down_b
up_b, down_b = get_position(video_path, start_time)
up_b, down_b = get_position(video_path, start_time + 30)
# 获取并构建输出信息
table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
......@@ -236,4 +248,4 @@ def detect_with_ocr(video_path, book_path, start_time, end_time, state):
if __name__ == '__main__':
video_path = "D:/heelo/hysxm_1.mp4"
book_path = '何以笙箫默.xlsx'
detect_with_ocr(video_path, book_path, 0, 300)
detect_with_ocr(video_path, book_path, 0, 300, [None])
......@@ -64,6 +64,9 @@ def detect_movie(video_path, start, interval):
"""
video = cv2.VideoCapture(video_path)
fps = np.ceil(video.get(cv2.CAP_PROP_FPS))
end_time = video.get(cv2.CAP_PROP_FRAME_COUNT) / fps
if start + interval * 3 > end_time:
interval = int((end_time - start) / 3)
start = start * fps
interval = interval * fps
random_number = 50
......
......@@ -18,7 +18,7 @@ def trans_to_seconds(timepoint):
return time_in_seconds
def detect(video_path, start_time, end_time, book_path, state):
def detect(video_path, start_time, end_time, book_path, state, subtitle=None):
print("开始检测")
if book_path is None:
book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
......@@ -28,7 +28,14 @@ def detect(video_path, start_time, end_time, book_path, state):
start_time = trans_to_seconds(start_time)
end_time = trans_to_seconds(end_time)
has_subtitle = detect_movie(video_path, start_time, 60)
# 根据用户的选择来确定电影是否有字幕,如果“未知”,则自动检测
if subtitle == 0:
has_subtitle = detect_movie(video_path, start_time, 180)
elif subtitle == 1:
has_subtitle = True
else:
has_subtitle = False
if has_subtitle:
detect_with_ocr(video_path, book_path, start_time, end_time, state)
else:
......
......@@ -9,6 +9,9 @@ import openpyxl
tmp_file = 'tmp.wav'
normal_speed = 4
normal_interval = 0.1
def speech_synthesis(text, output_file, speed):
"""
......@@ -38,7 +41,7 @@ def speech_synthesis(text, output_file, speed):
change_speed(output_file, speed)
def change_speed(wav_path, speed=1.5):
def change_speed(wav_path, speed=1.0):
"""
调整语速
:param wav_path: 原音频路径
......@@ -72,10 +75,11 @@ def read_sheet(book_path, sheet_name=None):
return sheet_content
def get_narratage_text(sheet_content):
def get_narratage_text(sheet_content, speed):
"""
根据从表格中获取到的内容,分析得到解说文本+对应开始时间
:param sheet_content: dict,keys=["起始时间","终止时间","字幕","建议","解说脚本"]
:param speed: float, 旁白语速
:return: narratage_text: list, 旁白文本,
narratage_start_time: list, 旁白对应开始时间
"""
......@@ -97,7 +101,7 @@ def get_narratage_text(sheet_content):
for x in text_split:
narratage_text.append(x)
narratage_start_time.append(cur_start)
cur_start = cur_start + len(x) / 4.5 + 0.5
cur_start = cur_start + (len(x) / normal_speed + normal_interval) / speed
return narratage_text, narratage_start_time
......@@ -152,7 +156,7 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
# 读取表格,并获取旁白及对应插入位置
sheet_content = read_sheet(book_path)
narratages, start_timepoint = get_narratage_text(sheet_content)
narratages, start_timepoint = get_narratage_text(sheet_content, speed)
export_caption(sheet_content, caption_file)
print("已导出旁白文件")
......@@ -164,6 +168,8 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
print("目前正在处理{}".format(wav_path))
state[0] = float((i + 1) / len(narratages))
# 合成总音频,并入原视频音频中
# 删除临时语音文件
if os.path.exists(tmp_file):
time.sleep(1)
......
......@@ -7,8 +7,6 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import numpy as np
from moviepy.editor import *
from pydub import AudioSegment
from spleeter.audio.adapter import AudioAdapter
from spleeter.separator import Separator
audio_path = r'D:\AddCaption\disappearedBullet.wav'
folder_name = r'D:\AddCaption\hysxm'
......@@ -55,6 +53,8 @@ def split_audio():
# 从音频中提取人声
def extrac_speech():
from spleeter.audio.adapter import AudioAdapter
from spleeter.separator import Separator
separator = Separator('spleeter:2stems', multiprocess=False)
audio_adapter = AudioAdapter.default()
......
......@@ -18,8 +18,10 @@ def open_video_file():
打开文件
:return:
"""
video_path = filedialog.askopenfilename(title=u'选择文件', initialdir=(os.path.expanduser('D:/heelo')))
if len(video_path) != 0 and is_video(video_path):
video_path = filedialog.askopenfilename(title=u'选择文件',
filetype=[("视频文件", ".avi"), ("视频文件", ".mp4"), ("视频文件", ".rmvb"),
("视频文件", ".mkv")])
if len(video_path) != 0:
print('打开文件:', video_path)
inputFilePath.set(video_path)
# 获取视频的时长等信息,初始化开始结束时间
......@@ -37,8 +39,6 @@ def open_video_file():
k_l = str.lower(k)
if 'duration' in k_l:
endTime.set(vs['tags'][k])
if len(video_path) != 0 and not is_video(video_path):
messagebox.showwarning('警告', "请选择正确的视频格式,能够处理的视频格式如下所示:\n'.mkv', '.rmvb', '.mp4', '.avi'")
def find_save_file():
......@@ -54,26 +54,12 @@ def find_save_file():
book_path = filedialog.asksaveasfilename(title=u'保存文件至',
initialdir=os.path.dirname(video_path),
initialfile=defaultName,
filetype=[('excel文件', ".xlsx")])
if len(book_path) != 0 and ".xlsx" not in book_path:
book_path += ".xlsx"
filetype=[('Excel工作簿', ".xlsx"), ('Excel 97工作簿', ".xls")],
defaultextension='.xlsx')
print('保存文件至:', book_path)
outputFilePath.set(book_path)
def is_video(filename):
"""
检查视频格式是否正确
:param filename: 视频文件名
:return: True or False
"""
video_tag = ['.mkv', '.rmvb', '.mp4', '.avi']
for x in video_tag:
if x in filename:
return True
return False
def trans_to_seconds(timepoint):
time_in_seconds = 0
timepoint = timepoint.split(':')
......@@ -120,9 +106,6 @@ def start_detect():
if len(video_path) == 0:
messagebox.showwarning('警告', "请输入视频文件路径")
return
elif not is_video(video_path):
messagebox.showwarning('警告', "视频文件格式错误,请重新确认")
return
if len(book_path) == 0:
messagebox.showwarning("警告", "请输入表格存放路径")
return
......@@ -142,7 +125,8 @@ def start_detect():
state = [None]
threads = [
threading.Thread(target=start_process, args=(progressbar_1, progress_1, state, 100000), name="startProgress1"),
threading.Thread(target=detect, args=(video_path, startTime.get(), endTime.get(), book_path, state),
threading.Thread(target=detect,
args=(video_path, startTime.get(), endTime.get(), book_path, state, hasSubtitle.get()),
name="detect")]
for t in threads:
t.start()
......@@ -179,8 +163,8 @@ def open_sheet_file():
:return:
"""
sheet_path = filedialog.askopenfilename(title=u'选择文件',
initialdir=(os.path.expanduser('D:/AddCaption/test/detectSubtitle/电影旁白打标')))
if len(sheet_path) != 0 and ".xlsx" in sheet_path:
filetype=[('Excel工作簿', ".xlsx"), ('Excel 97工作簿', ".xls")], )
if len(sheet_path) != 0:
print("打开表格", sheet_path)
narratagePath.set(sheet_path)
......@@ -209,6 +193,14 @@ def set_caption_file():
captionPath.set(caption_path)
def confirm_video_path():
# 仅能打开mp4\rmvb\avi\mkv格式的文件
video_path = filedialog.askopenfilename(title=u'选择文件',
filetype=[("视频文件", ".avi"), ("视频文件", ".mp4"), ("视频文件", ".rmvb"),
("视频文件", ".mkv")])
videoPath.set(video_path)
def start_synthesis():
"""
开始合成语音
......@@ -216,7 +208,7 @@ def start_synthesis():
"""
audio_dir = audioDir.get()
sheet_path = narratagePath.get()
speed = audio_speed.get()
speed = float(audio_speed.get().split('(')[0])
caption_path = captionPath.get()
# 判断各个变量的合理性
......@@ -241,7 +233,8 @@ def start_synthesis():
# 多线程同时实现语音合成+字幕导出、进度条
state = [None]
threads = [threading.Thread(target=start_process, args=(progressbar_2, progress_2, state), name="startProgress2"),
threads = [
threading.Thread(target=start_process, args=(progressbar_2, progress_2, state, 100000), name="startProgress2"),
threading.Thread(target=ss_and_export,
args=(sheet_path, audio_dir, speed, caption_path, state), name="ssAndExport")]
for t in threads:
......@@ -343,6 +336,16 @@ endTime = tk.StringVar(value="23:59:59")
endTime_entered = ttk.Entry(video_info, width=11, textvariable=endTime)
endTime_entered.grid(column=1, row=2, sticky="W")
subtitle_label = ttk.Label(video_info, text="是否有字幕")
subtitle_label.grid(column=0, row=3)
hasSubtitle = tk.IntVar()
choice_1 = tk.Radiobutton(video_info, text="未知", variable=hasSubtitle, value=0)
choice_1.grid(column=1, row=3, sticky="W")
choice_2 = tk.Radiobutton(video_info, text="是", variable=hasSubtitle, value=1)
choice_2.grid(column=2, row=3, sticky="W")
choice_3 = tk.Radiobutton(video_info, text="否", variable=hasSubtitle, value=2)
choice_3.grid(column=3, row=3, sticky="W")
"""
检测步骤相关内容,包含以下内容:
- 输出表格路径|输出表格路径文本框|打开文件夹
......@@ -378,37 +381,47 @@ stopDetection.config(state=tk.DISABLED)
"""
语音相关设置,包含以下内容:
- 旁白脚本表格|表格路径|上传文件按钮
- 原视频|视频路径|上传文件按钮
- 旁白语速选择
"""
audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ")
audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4)
narratage_label = ttk.Label(audio_info, text="旁白脚本表格")
narratage_label.grid(column=0, row=0)
narratage_label.grid(column=0, row=1)
narratagePath = tk.StringVar()
narratagePath_input = ttk.Entry(audio_info, width=30, textvariable=narratagePath)
narratagePath_input.grid(column=1, row=0)
narratagePath_input.grid(column=1, row=1)
upload_button_2 = ttk.Button(audio_info, text="上传文件", command=open_sheet_file)
upload_button_2.grid(column=2, row=0)
upload_button_2.grid(column=2, row=1)
speed_label = ttk.Label(audio_info, text="旁白语速")
speed_label.grid(column=0, row=1)
speed_label.grid(column=0, row=2)
audio_speed = tk.StringVar()
speedChosen = ttk.Combobox(audio_info, width=12, textvariable=audio_speed)
speedChosen['values'] = ("1.00", "1.10", "1.25", "1.50", "1.75", "2.00", "2.50")
speedChosen['values'] = (
"1.00(4字/秒)", "1.10(4.5字/秒)", "1.25(5字/秒)", "1.50(6字/秒)", "1.75(7字/秒)", "2.00(8字/秒)", "2.50(10字/秒)")
speedChosen.current(0)
speedChosen.grid(column=1, row=1, sticky="W")
speedChosen.grid(column=1, row=2, sticky="W")
video_label = ttk.Label(audio_info, text="原视频")
video_label.grid(column=0, row=0)
videoPath = tk.StringVar()
videoPath_input = ttk.Entry(audio_info, width=30, textvariable=videoPath)
videoPath_input.grid(column=1, row=0)
upload_button_3 = ttk.Button(audio_info, text="上传文件", command=confirm_video_path)
upload_button_3.grid(column=2, row=0)
"""
语音合成步骤,包含以下内容:
- 输出音频存放于|路径文本框|打开文件夹
- 输出字幕文件|路径文本框|打开文件夹
- 开始合成按钮|当前检测状态提示文本|任务进度条|进度条百分比
- 停止合成按钮
"""
synthesis_command = ttk.LabelFrame(tab2, text=" 语音合成步骤 ")
synthesis_command.place(relx=0.05, rely=0.55, relwidth=0.9, relheight=0.4)
caption_label = ttk.Label(synthesis_command, text="输出字幕文件")
caption_label.grid(column=0, row=1)
captionPath = tk.StringVar()
captionPath_input = ttk.Entry(synthesis_command, width=30, textvariable=captionPath)
captionPath_input.grid(column=1, row=1)
save_button_2 = ttk.Button(synthesis_command, text="打开文件夹", command=set_caption_file)
save_button_2.grid(column=2, row=1)
audioDir_label = ttk.Label(synthesis_command, text="输出音频存放于")
audioDir_label.grid(column=0, row=0)
audioDir = tk.StringVar()
......@@ -417,6 +430,14 @@ audioDir_input.grid(column=1, row=0)
save_button_2 = ttk.Button(synthesis_command, text="打开文件夹", command=find_save_dir)
save_button_2.grid(column=2, row=0)
caption_label = ttk.Label(synthesis_command, text="输出字幕文件")
caption_label.grid(column=0, row=1)
captionPath = tk.StringVar()
captionPath_input = ttk.Entry(synthesis_command, width=30, textvariable=captionPath)
captionPath_input.grid(column=1, row=1)
save_button_2 = ttk.Button(synthesis_command, text="打开文件夹", command=set_caption_file)
save_button_2.grid(column=2, row=1)
startSynthesis = ttk.Button(synthesis_command, text="开始合成",
command=lambda: thread_it(start_synthesis, name="startSynthesis"))
startSynthesis.grid(column=0, row=2)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment