Commit 453f190c authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

initial project

parents
import shutil
import time
import openpyxl
from openpyxl.styles import PatternFill, Alignment
from split_wav import *
def create_sheet(path, sheet_name, value):
"""
根据给定的表头,初始化表格,
:param path: str, 表格(book)的存储位置
:param sheet_name: str, 表(sheet)的名字
:param value: list, 表头内容为['起始时间','终止时间','字幕','建议','旁边解说脚本']
:return: None
"""
index = len(value)
workbook = openpyxl.Workbook()
sheet = workbook.active
sheet.title = sheet_name
# 将字幕对应的那一列扩宽一些
sheet.column_dimensions['C'].width = 50
for i in range(0, index):
for j in range(0, len(value[i])):
sheet.cell(row=i + 1, column=j + 1, value=str(value[i][j]))
workbook.save(path)
def write_to_sheet(path, sheet_name, value):
"""
向已存在的表格中写入数据
:param path:
:param sheet_name:
:param value:
:return:
"""
index = len(value)
workbook = openpyxl.load_workbook(path)
sheet = workbook.get_sheet_by_name(sheet_name)
cur_row = sheet.max_row
for i in range(0, index):
for j in range(0, len(value[i])):
sheet.cell(row=cur_row + i + 1, column=j + 1, value=str(value[i][j]))
if value[i][j] == '' or value[i][j] == '插入旁白':
sheet.cell(row=cur_row + i + 1, column=j + 1).fill = PatternFill(fill_type='solid', fgColor='ffff00')
if j == 2:
sheet.cell(row=cur_row + i + 1, column=j + 1).alignment = Alignment(wrapText=True)
workbook.save(path)
def trans_to_mono(wav_path):
"""
将音频的通道数channel转换为1
:param wav_path: str, 需要转换的音频地址
:return: new_wav_path: str, 转换后得到的新音频地址
"""
new_wav_path = wav_path[:-4] + "_1.wav"
command = 'ffmpeg -i {} -ac 1 -y {}'.format(wav_path, new_wav_path)
os.system(command)
return new_wav_path
def concat_wav(root):
txt_path = os.path.join(root, 'list.txt')
with open(txt_path, 'w', encoding='utf-8') as f:
for file_name in os.listdir(root):
if os.path.isdir(os.path.join(root, file_name)):
wav_path = os.path.join(root, file_name) + "/vocal.wav"
f.write("file \'" + wav_path + "\'\n")
output_file = os.path.join(root, 'total.wav')
command = 'ffmpeg -f concat -safe 0 -i {} -y {}'.format(txt_path, output_file)
os.system(command)
return output_file
def detect_with_asr(video_path, book_path, start_time=0, end_time=-1):
# 临时存储各种中间产物的文件夹
tmp_root = './tmp'
if not os.path.exists(tmp_root):
os.mkdir(tmp_root)
if not os.path.exists(video_path):
print("你输入的视频地址有误,请仔细检查一下")
return
# 提取出视频中的音频,分割后提取出其中的人声部分并存储
audio_path = extract_audio(video_path, tmp_root, start_time, end_time)
# root = split_audio()
# extrac_speech()
#
# # 将提取出的人声拼接,并将音频的channel调整为1
# total_wav_path = concat_wav(root)
# audio_path = trans_to_mono(total_wav_path)
# xlsx中的表格名为“旁白插入位置建议”
book_name_xlsx = book_path
sheet_name_xlsx = "旁白插入位置建议"
# 如果当前路径下不存在与视频同名的表格,则创建输出内容存放的表格
if not os.path.exists(book_name_xlsx):
table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
create_sheet(book_name_xlsx, sheet_name_xlsx, table_head)
sys.path.append("./PaddlePaddle_DeepSpeech2")
from infer_path import predict_long_audio_with_paddle
table_content = predict_long_audio_with_paddle(audio_path, book_name_xlsx, start_time)
write_to_sheet(book_name_xlsx, sheet_name_xlsx, table_content)
# 删除中间文件
# shutil.rmtree(tmp_root)
if __name__ == '__main__':
start_time = time.time()
# 给定待处理的视频路径
video_path = 'D:/heelo/zhanlang.rmvb'
detect_with_asr(video_path, "zhanlang.xlsx", 50, 5154)
print("处理视频 {} 需要时长为{} ".format(os.path.basename(video_path), time.time() - start_time))
import time
import cv2
import numpy as np
from paddleocr import PaddleOCR
import difflib
import openpyxl
from openpyxl.styles import PatternFill
# 字幕的上下边界
up_b, down_b = 0, 0
# 初始化ocr工具
ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
def get_position(video_path,start_time):
"""
根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
:param video_path: 视频存储路径
:return: 字幕在整个画面中的实际上下边界位置
"""
video = cv2.VideoCapture(video_path)
subtitle_position = {}
fps = video.get(cv2.CAP_PROP_FPS)
start = int(start_time * fps)
cnt = 0
txt_cnt = 0
pre_txt = None
video.set(cv2.CAP_PROP_POS_FRAMES, start)
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
while True:
_, img = video.read()
cnt += 1
if img is None or cnt > 1000:
break
if cnt % int(fps / 3) != 0:
continue
img = img[height:]
res = ocr.ocr(img, cls=True)
sorted(res, key=lambda x: x[0][0][1])
bottom_position = None
if len(res) == 0:
continue
log = []
for x in res:
rect, (txt, confidence) = x
font_size = rect[2][1] - rect[0][1]
mid = (rect[0][0] + rect[1][0]) / 2
gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
# 可能是字幕的文本
if confidence > 0.9 and 0.4 * img.shape[1] < mid < 0.6 * img.shape[1] and gradient < 0.1:
if bottom_position is None:
bottom_position = rect[0][1]
# 判断是否与前一文本相同(是不是同一个字幕),非同一字幕的前提下,取对应上下边界,
keys = subtitle_position.keys()
if abs(rect[0][1] - bottom_position) < 10:
if pre_txt is None or pre_txt != txt:
txt_cnt += 1
pre_txt = txt
if (rect[0][0], rect[2][1]) in keys:
subtitle_position[(rect[0][1], rect[2][1])] += 1
else:
replace = False
for k in keys:
# 更新键值为最宽的上下限
if abs(rect[0][1] - k[0]) + abs(rect[2][1] - k[1]) < 10:
new_k = min(k[0], rect[0][1]), max(k[1], rect[2][1])
if new_k != k:
subtitle_position[new_k] = subtitle_position[k]
subtitle_position[new_k] += 1
subtitle_position.pop(k)
else:
subtitle_position[k] += 1
replace = True
break
if not replace:
subtitle_position[(rect[0][1], rect[2][1])] = 1
if txt_cnt == 3:
break
print(subtitle_position)
up_b, down_b = max(subtitle_position, key=subtitle_position.get)
return up_b + height, down_b + height
def string_similar(s1, s2):
"""
比较字符串s1和s2的相似度,主要用于减少输出文件中相似字幕的重复
:param s1:
:param s2:
:return: 字符串间的相似度
"""
return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
def detect_subtitle(img):
"""
检测当前画面得到字幕信息
:param img: 当前画面
:return: 字幕信息
"""
subTitle = ''
img = img[int(up_b) - 30:int(down_b) + 30]
# img = cv2.resize(img, (int(img.shape[1] * 0.5), int(img.shape[0] * 0.5)))
res = ocr.ocr(img, cls=True)
sorted(res, key=lambda x: x[0][0][1])
bottom_position = None
if len(res) == 0:
return None
log = []
possible_txt = []
for x in res:
rect, (txt, confidence) = x
font_size = rect[2][1] - rect[0][1]
mid = (rect[0][0] + rect[1][0]) / 2
gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
log.append("文本:{},置信度:{},中心点:{},斜率:{},字体大小:{}".format(txt, confidence, mid / img.shape[1], gradient, font_size))
# 置信度>0.7 & 斜率<0.1 & 字幕偏移量<=25 & 字幕中心在画面宽的0.4-0.6之间
if confidence > 0.7 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6 and \
abs(rect[0][1] - 30) + abs(img.shape[0] - rect[2][1] - 30) <= 25:
subTitle += txt
# 如果字幕在一行中分为两个(或以上)对话文本
elif confidence > 0.85 and gradient < 0.1:
if 0.3 < mid / img.shape[1] < 0.4 or 0.6 < mid / img.shape[1] < 0.7:
possible_txt.append(txt)
if len(possible_txt) >= 2:
subTitle = ''.join(possible_txt)
if len(subTitle) > 0:
return subTitle
return None
def process_video(video_path, begin, end):
"""
处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
:param video_path: 待处理视频的路径
:param begin: 电影的实际开始位置(秒)
:param end: 电影除演职表外的实际结束位置(秒)
:return:
"""
video = cv2.VideoCapture(video_path)
fps = video.get(cv2.CAP_PROP_FPS)
lastSubTitle = None
# res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析
res = []
# narratage_recommand是旁白推荐信息,用于输出为表格
narratage_recommend = []
cnt = 0
start_time = 0
end_time = 0
video.set(cv2.CAP_PROP_POS_MSEC, begin * 1000)
while True:
_, frame = video.read()
if frame is None:
break
cnt += 1
# 每秒取4帧画面左右
if cnt % int(fps / 4) == 0:
subTitle = detect_subtitle(frame)
# 第一次找到字幕
if lastSubTitle is None and subTitle is not None:
start_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
# 字幕消失
elif lastSubTitle is not None and subTitle is None:
end_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
res.append([start_time, end_time, lastSubTitle])
if len(res) == 1 or res[-1][0] - res[-2][1] >= 1:
print('--------------------------------------------------')
narratage_recommend.append(['', '', '', '插入旁白'])
print(start_time, end_time, lastSubTitle)
narratage_recommend.append([round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
# 两句话连在一起,但是两句话不一样
elif lastSubTitle is not None and subTitle is not None:
if string_similar(lastSubTitle, subTitle) < 0.7:
end_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
res.append([start_time, end_time, lastSubTitle])
if len(res) == 1 or res[-1][0] - res[-2][1] >= 1:
narratage_recommend.append(['', '', '', '插入旁白'])
print('--------------------------------------------------')
print(start_time, end_time, lastSubTitle)
narratage_recommend.append([round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
start_time = end_time
else:
lastSubTitle = subTitle if len(subTitle) > len(lastSubTitle) else lastSubTitle
continue
# 当前字幕与上一段字幕不一样
lastSubTitle = subTitle
if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 > end:
if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time > 1:
print('--------------------------------------------------')
narratage_recommend.append(['', '', '', '插入旁白'])
break
return narratage_recommend
def write_excel_xlsx(path, sheet_name, value):
"""
将旁白推荐信息输出表格
:param path: 输出表格的存储路径
:param sheet_name:表格中的表名
:param value:输出到表格中的信息
:return:
"""
index = len(value)
workbook = openpyxl.Workbook()
sheet = workbook.active
sheet.title = sheet_name
# 将字幕对应的那一列扩宽一些
sheet.column_dimensions['C'].width = 50
for i in range(0, index):
for j in range(0, len(value[i])):
sheet.cell(row=i + 1, column=j + 1, value=str(value[i][j]))
if value[i][j] == '' or value[i][j] == '插入旁白' or value[i][j] == '翻译':
sheet.cell(row=i + 1, column=j + 1).fill = PatternFill(fill_type='solid', fgColor='ffff00')
workbook.save(path)
def detect_with_ocr(video_path, book_path, start_time, end_time):
book_name_xlsx = book_path
sheet_name_xlsx = "旁白插入位置建议"
# 获取字幕在画面中的上下边界,方便在后续视频遍历过程中直接对字幕对应区域进行分析
global up_b, down_b
up_b, down_b = get_position(video_path, start_time)
# 获取并构建输出信息
table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
table_content = table_head + process_video(video_path, start_time, end_time)
# 输出旁白位置推荐信息到表格
write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, table_content)
if __name__ == '__main__':
video_path = "D:/heelo/hysxm_1.mp4"
book_path = '何以笙箫默.xlsx'
detect_with_ocr(video_path, book_path, 0, 300)
import random
import time
import cv2
import numpy as np
from paddleocr import PaddleOCR
from collections import Counter
ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
def random_int_list(start, stop, length):
"""
在某一段区间内取n个随机数
:param start: 随机数区间的最小值
:param stop: 随机数区间的最大值
:param length: 随机数个数
:return: 随机数数组[list]
"""
start, stop = (int(start), int(stop)) if start <= stop else (int(stop), int(start))
length = int(abs(length)) if length else 0
random_list = []
while True:
tmp = random.randint(start, stop)
if tmp not in random_list:
random_list.append(tmp)
if len(random_list) == length:
break
return random_list
def detect_subtitle(frame):
"""
判断画面中是否含字幕
:param frame: 视频的某一帧画面
:return: Ture or False
"""
frame = frame[int(frame.shape[0] * 0.7):]
subtitle = ocr.ocr(frame, cls=True)
print(subtitle)
for x in subtitle:
position, (txt, confidence) = x
height = position[2][1] - position[0][1]
mid = (position[0][0] + position[1][0]) / 2
print(height, txt)
# 求倾斜度
gradient = np.arctan(abs((position[1][1] - position[0][1]) / (position[1][0] - position[0][0])))
print(gradient)
if confidence > 0.7 and 0.4 * frame.shape[1] < mid < 0.6 * frame.shape[1] \
and gradient < 0.1:
return True
else:
continue
return False
def detect_movie(video_path, start, interval):
"""
使用整部视频进行测试,确定视频是否提供字幕
:param video_path: 视频的地址
:param start: 取随机帧的时间区间的开始时间
:param interval: 取随机帧的每段区间时长,单位为秒
:return: True or False(视频是否含字幕)
"""
video = cv2.VideoCapture(video_path)
fps = np.ceil(video.get(cv2.CAP_PROP_FPS))
start = start * fps
interval = interval * fps
random_number = 50
ans = [False] * 3
print(ans)
for i in range(3):
random_list = random_int_list(start, start + interval, random_number)
start = start + interval
for _, random_point in enumerate(random_list):
video.set(cv2.CAP_PROP_POS_FRAMES, float(random_point))
if video.isOpened():
success, frame = video.read()
if not success:
break
ans[i] = detect_subtitle(frame)
if ans[i]:
print(random_point)
break
video.release()
print(ans)
return Counter(ans).most_common(1)[0][0]
if __name__ == '__main__':
video_path = r'D:\heelo\hysxm.mp4'
start_time = time.time()
start = 90
interval = 120
print(detect_movie(video_path, start, interval))
print(time.time() - start_time)
# encoding=utf8
import os.path
import argparse
import time
from judge_subtitle import detect_movie
from detect_with_asr import detect_with_asr
from detect_with_ocr import detect_with_ocr
def trans_to_seconds(timepoint):
time_in_seconds = 0
timepoint = timepoint.split(':')
units = 1
for i in range(len(timepoint) - 1, -1, -1):
time_in_seconds += units * float(timepoint[i])
units *= 60
return time_in_seconds
def detect(video_path, start_time, end_time, book_path):
if book_path is None:
book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
else:
book_path = book_path
start_time = trans_to_seconds(start_time)
end_time = trans_to_seconds(end_time)
has_subtitle = detect_movie(video_path, start_time, 60)
if has_subtitle:
detect_with_ocr(video_path, book_path, start_time, end_time)
else:
detect_with_asr(video_path, book_path, start_time, end_time)
if __name__ == '__main__':
# 定义参数
parser = argparse.ArgumentParser(description='Speech Synthesis guideness')
parser.add_argument("--video_path", required=True, type=str, help="待处理的视频存储路径")
parser.add_argument("--start_time", required=True, type=str, help="视频中影片除开场动画外的实际开始时间点,格式为'时:分:秒',也可以输入对应的秒数")
parser.add_argument("--end_time", required=True, type=str, help="视频中影片除演职表外的实际结束时间点,格式为'时:分:秒',也可以输入对应的秒数")
parser.add_argument("--book_path", type=str, help='旁白解说表格存储路径,包含表格名,如"D:\AddCaption\hysxm.xlsx"')
args = parser.parse_args()
detect(args.video_path, args.start_time, args.end_time, args.book_path)
# coding=utf-8
import os
import argparse
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer
from azure.cognitiveservices.speech.audio import AudioOutputConfig
import openpyxl
tmp_file = 'tmp.wav'
def speech_synthesis(text, output_file, speed):
"""
用于合成讲解音频并输出
:param text: 解说文本
:param output_file: 输出文件路径
:param speed: 指定的音频语速,默认为1.0
:return:
"""
if float(speed) != 1.0:
audio_path = tmp_file
else:
audio_path = output_file
speech_config = SpeechConfig(subscription="ffa331815f0f4c7fa418bb6c2e1c4e17", region="eastus")
speech_config.speech_synthesis_language = "zh-CN"
speech_config.speech_synthesis_voice_name = 'zh-CN-XiaomoNeural'
# 先把合成的语音文件输出得到tmp.wav中,便于可能的调速需求
audio_config = AudioOutputConfig(filename=audio_path)
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
synthesizer.speak_text_async(text)
if float(speed) != 1.0:
change_speed(output_file, speed)
def change_speed(wav_path, speed=1.5):
"""
调整语速
:param wav_path: 原音频路径
:param speed: 转换后的语速
:return:
"""
cmd_line = 'ffmpeg -y -i {} -filter:a \"atempo={}\" {}'.format(tmp_file, speed, wav_path)
os.system(cmd_line)
# 删除临时文件
os.remove(tmp_file)
def read_sheet(book_path, sheet_name=None):
"""
从表格中读出所有的内容,用dict保存(表格的格式固定,第一行为表头(起始时间|终止时间|字幕|建议|解说脚本))
:param book_path: 表格的存储路径
:param sheet_name: 想要读取的表在excel表格中的名字(可选项)
:return: sheet_content (dict) 表格中的所有内容
"""
workbook = openpyxl.load_workbook(book_path)
sheet = workbook.active
rows = sheet.max_row
cols = sheet.max_column
sheet_content = {}
# 读取xlsx中每列的内容,默认第一行是各列的列名
for i in range(1, rows + 1):
for j in range(1, cols + 1):
if i == 1:
sheet_content[sheet.cell(1, j).value] = []
else:
sheet_content[sheet.cell(1, j).value].append(sheet.cell(i, j).value)
return sheet_content
def get_narratage_text(sheet_content):
"""
根据从表格中获取到的内容,分析得到解说文本+对应开始时间
:param sheet_content: dict,keys=["起始时间","终止时间","字幕","建议","解说脚本"]
:return: narratage_text: list, 旁白文本,
narratage_start_time: list, 旁白对应开始时间
"""
narratage = sheet_content['解说脚本']
subtitle = sheet_content['字幕']
start_time = sheet_content['起始时间']
end_time = sheet_content['终止时间']
narratage_start_time = []
narratage_text = []
for i, text in enumerate(narratage):
if text is not None:
if text == '翻译':
narratage_text.append(subtitle[i])
narratage_start_time.append(float(start_time[i]) + 0.1)
else:
# 如果旁白中有换行符,即分为n段,则按照换行符进行分割,并间隔0.5s
text_split = text.split('\n')
cur_start = float(end_time[i - 1]) + 0.1 if i > 0 else 0
for x in text_split:
narratage_text.append(x)
narratage_start_time.append(cur_start)
cur_start = cur_start + len(x) / (4.5 * args.speed) + 0.5
return narratage_text, narratage_start_time
def second_to_str(seconds):
seconds = float(seconds)
hour = int(seconds / 3600)
minute = int((seconds - hour * 3600) / 60)
second = int(seconds - hour * 3600 - minute * 60)
ms = int((seconds - second - minute * 60 - hour * 3600) * 1000)
time_str = "%02d:%02d:%02d,%03d" % (hour, minute, second, ms)
return time_str
def export_caption(sheet_content, caption_file):
"""
将用户校正后的字幕输出为字幕文件(srt格式)
:param sheet_content: 用户校正后的表格内容
:return:
"""
caption = sheet_content["字幕"]
start_time = sheet_content['起始时间']
end_time = sheet_content['终止时间']
cnt = 0
with open(caption_file, "w", encoding="utf-8") as f:
for i, x in enumerate(caption):
if x is not None:
start, end = second_to_str(start_time[i]), second_to_str(end_time[i])
cnt += 1
f.write(str(cnt) + "\n")
f.write(start + " --> " + end + "\n")
f.write(x + "\n\n")
def ss_and_export():
# 旁白解说表格的位置
book_path = args.sheet_path
# 音频输出位置路径
root_path = args.output_dir
# 语速
speed = args.speed
# 字幕文件路径
caption_file = args.caption_file
# 如果文件夹不存在,则新建文件夹
if not os.path.exists(root_path):
os.mkdir(root_path)
# 读取表格,并获取旁白及对应插入位置
sheet_content = read_sheet(book_path)
narratages, start_timepoint = get_narratage_text(sheet_content)
export_caption(sheet_content, caption_file)
# 生成旁白解说语音
for i, text in enumerate(narratages):
wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i])
speech_synthesis(text, wav_path, speed)
if __name__ == '__main__':
# 定义参数
parser = argparse.ArgumentParser(description='Speech Synthesis guideness')
parser.add_argument("--output_dir", required=True, type=str, help="音频输出位置路径")
parser.add_argument("--sheet_path", required=True, type=str, help='旁白解说表格存储路径')
parser.add_argument("--caption_file", required=True, type=str, help="输出的字幕文件存储路径")
parser.add_argument("--speed", type=float, default=1.0, help="设置语速,默认为1.0")
args = parser.parse_args()
# 主函数执行
ss_and_export(args.output_dir,args.sheet_path,args.caption_file,args.speed)
# -*- coding:utf-8 -*-
import threading
from mttkinter import mtTkinter as tk
from tkinter import filedialog, ttk, messagebox, Frame, Canvas
import os
import ffmpeg
window = tk.Tk()
window.title('无障碍电影辅助工具') # 标题
window.geometry('600x400') # 窗口尺寸
def open_video_file():
"""
打开文件
:return:
"""
video_path = filedialog.askopenfilename(title=u'选择文件', initialdir=(os.path.expanduser('D:/heelo')))
if len(video_path) != 0 and is_video(video_path):
print('打开文件:', video_path)
inputFilePath.set(video_path)
# 获取视频的时长等信息,初始化开始结束时间
info = ffmpeg.probe(video_path)
vs = next(c for c in info['streams'] if c['codec_type'] == 'video')
try:
duration = int(float(vs['duration']))
hours = int(duration / 3600)
minutes = int(duration / 60 - 60 * hours)
seconds = int(duration - 60 * minutes - 3600 * hours)
endTime.set("%02d:%02d:%02d" % (hours, minutes, seconds))
except:
endTime.set(vs['tags']['DURATION'])
if len(video_path) != 0 and not is_video(video_path):
messagebox.showinfo('警告', "请选择正确的视频格式,能够处理的视频格式如下所示:\n'.mkv', '.rmvb', '.mp4', '.avi'")
def open_sheet_file():
sheet_path = filedialog.askopenfilename(title=u'选择文件', initialdir=(os.path.expanduser('D:/heelo')))
if len(sheet_path) != 0 and ".xlsx" in sheet_path:
print("打开表格", sheet_path)
narratagePath.set(sheet_path)
def find_save_file():
"""
找到保存表格的地址
:return:
"""
video_path = inputFilePath.get()
defaultName = ""
if "." in os.path.basename(video_path):
defaultName = os.path.basename(video_path).split('.')[0]
book_path = filedialog.asksaveasfilename(title=u'保存文件至',
initialdir=os.path.dirname(video_path),
initialfile=defaultName,
filetype=[('excel文件', ".xlsx")])
if len(book_path) != 0 and ".xlsx" not in book_path:
book_path += ".xlsx"
print('保存文件至:', book_path)
outputFilePath.set(book_path)
def is_video(filename):
video_tag = ['.mkv', '.rmvb', '.mp4', '.avi']
for x in video_tag:
if x in filename:
return True
return False
def start_detect():
"""
开始检测旁白
:return:
"""
# 检测各种输入的合理性
video_path = inputFilePath.get()
book_path = outputFilePath.get()
if len(video_path) == 0:
messagebox.showinfo('警告', "请输入视频文件路径")
return
elif not is_video(video_path):
messagebox.showinfo('警告', "视频文件格式错误,请重新确认")
return
if len(book_path) == 0:
messagebox.showinfo("警告", "请输入表格存放路径")
return
# print("视频文件路径:", inputFilePath.get())
# print("开始时间:", startTime.get())
# print("结束时间:", endTime.get())
# print("输出表格文件路径:", outputFilePath.get())
from narratage_detection import detect
detect(video_path, startTime.get(), endTime.get(), book_path)
processState.set("任务已完成")
def find_save_dir():
audio_dir = filedialog.askdirectory(title=u'保存文件至')
print('保存音频于:', audio_dir)
audioDir.set(audio_dir)
def set_caption_file():
defaultName = os.path.basename(narratagePath.get()).split('.')[0] + ".srt"
defaultDir = audioDir.get()
caption_path = filedialog.asksaveasfilename(title=u'保存文件至',
initialdir=defaultDir,
initialfile=defaultName,
filetype=[('字幕文件', ".srt")])
print(caption_path)
captionPath.set(caption_path)
def start_synthesis():
audio_dir = audioDir.get()
sheet_path = narratagePath.get()
speed = audio_speed.get()
caption_path = captionPath.get()
from speech_synthesis import ss_and_export
ss_and_export(audio_dir,sheet_path,speed,caption_path)
def thread_it(func, *args):
# 创建线程
t = threading.Thread(target=func, args=args)
# 守护
t.setDaemon(True)
# 启动
t.start()
def _quit():
window.quit()
window.destroy()
exit()
# 创建tab栏
tabControl = ttk.Notebook(window)
tab1 = ttk.Frame(tabControl)
tabControl.add(tab1, text="旁白位置推荐")
tab2 = ttk.Frame(tabControl)
tabControl.add(tab2, text="旁白语音合成")
tabControl.pack(expand=1, fill="both")
"""
为”旁白位置推荐“添加按钮、进度条等部件
"""
video_info = ttk.LabelFrame(tab1, text=" 视频信息操作 ")
# video_info.grid(column=0, row=0, padx=8, pady=4)
video_info.place(relx=0.1, rely=0.1, relwidth=0.8, relheight=0.3)
input_label = ttk.Label(video_info, text="视频文件")
input_label.grid(column=0, row=0)
inputFilePath = tk.StringVar()
inputFile = ttk.Entry(video_info, width=30, textvariable=inputFilePath)
inputFile.grid(column=1, row=0)
upload_button = ttk.Button(video_info, text="上传文件", command=open_video_file)
upload_button.grid(column=2, row=0)
startTime_label = ttk.Label(video_info, text="视频实际开始时间")
startTime_label.grid(column=0, row=1)
startTime = tk.StringVar(value="00:00:00")
startTime_entered = ttk.Entry(video_info, width=11, textvariable=startTime)
startTime_entered.grid(column=1, row=1, sticky="W")
endTime_label = ttk.Label(video_info, text="视频实际结束时间")
endTime_label.grid(column=0, row=2)
endTime = tk.StringVar(value="23:59:59")
endTime_entered = ttk.Entry(video_info, width=11, textvariable=endTime)
endTime_entered.grid(column=1, row=2, sticky="W")
detect_command = ttk.LabelFrame(tab1, text=" 检测步骤 ")
detect_command.place(relx=0.1, rely=0.5, relwidth=0.8, relheight=0.3)
output_label = ttk.Label(detect_command, text="输出表格路径")
output_label.grid(column=0, row=0)
outputFilePath = tk.StringVar()
outputFile = ttk.Entry(detect_command, width=30, textvariable=outputFilePath)
outputFile.grid(column=1, row=0)
save_button = ttk.Button(detect_command, text="打开文件夹", command=find_save_file)
save_button.grid(column=2, row=0)
startDetection = ttk.Button(detect_command, text="开始检测", command=lambda: thread_it(start_detect))
startDetection.grid(column=0, row=1)
processState = tk.StringVar()
stateLabel = tk.Label(detect_command, textvariable=processState, fg="green")
stateLabel.grid(column=1, row=1, sticky="W")
"""
为旁白语音合成添加部件
"""
audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ")
audio_info.place(relx=0.1, rely=0.1, relwidth=0.8, relheight=0.3)
narratage_label = ttk.Label(audio_info, text="旁白脚本表格")
narratage_label.grid(column=0, row=0)
narratagePath = tk.StringVar()
narratagePath_input = ttk.Entry(audio_info, width=30, textvariable=narratagePath)
narratagePath_input.grid(column=1, row=0)
upload_button_2 = ttk.Button(audio_info, text="上传文件", command=open_sheet_file)
upload_button_2.grid(column=2, row=0)
speed_label = ttk.Label(audio_info, text="旁白语速")
speed_label.grid(column=0, row=1)
audio_speed = tk.StringVar(value="1.0")
speed_input = ttk.Entry(audio_info, width=30, textvariable=audio_speed)
speed_input.grid(column=1, row=1)
synthesis_command = ttk.LabelFrame(tab2, text=" 语音合成步骤 ")
synthesis_command.place(relx=0.1, rely=0.5, relwidth=0.8, relheight=0.3)
caption_label = ttk.Label(synthesis_command, text="输出字幕文件")
caption_label.grid(column=0, row=1)
captionPath = tk.StringVar()
captionPath_input = ttk.Entry(synthesis_command, width=30, textvariable=captionPath)
captionPath_input.grid(column=1, row=1)
save_button_2 = ttk.Button(synthesis_command, text="打开文件夹", command=set_caption_file)
save_button_2.grid(column=2, row=1)
audioDir_label = ttk.Label(synthesis_command, text="输出音频存放于")
audioDir_label.grid(column=0, row=0)
audioDir = tk.StringVar()
audioDir_input = ttk.Entry(synthesis_command, width=30, textvariable=audioDir)
audioDir_input.grid(column=1, row=0)
save_button_2 = ttk.Button(synthesis_command, text="打开文件夹", command=find_save_dir)
save_button_2.grid(column=2, row=0)
startSynthesis = ttk.Button(synthesis_command, text="开始合成", command=lambda: thread_it(start_synthesis))
startSynthesis.grid(column=0, row=2)
processState_2 = tk.StringVar()
stateLabel_2 = tk.Label(synthesis_command, textvariable=processState_2, fg="green")
stateLabel_2.grid(column=1, row=2, sticky="W")
# # creating a menu bar
# menu_bar = Menu(window)
# window.config(menu=menu_bar)
#
# # Add menu items
# file_menu = Menu(menu_bar, tearoff=0)
# # file_menu.add_command(label="", command=save_file)
# # file_menu.add_separator()
# file_menu.add_command(label="退出", command=_quit)
#
# # 添加下拉列表
# menu_bar.add_cascade(label="文件", menu=file_menu)
# # 更新进度条函数
# def change_schedule(now_schedule, all_schedule):
# canvas.coords(fill_rec, (5, 5, 6 + (now_schedule / all_schedule) * 100, 25))
# tab2.update()
# x.set(str(round(now_schedule / all_schedule * 100, 2)) + '%')
# if round(now_schedule / all_schedule * 100, 2) == 100.00:
# x.set("完成")
#
#
# # 创建画布
# canvas = Canvas(tab2, width=120, height=30, bg="white")
# canvas.grid(row=0, column=0)
# x = tk.StringVar()
# # 进度条以及完成程度
# out_rec = canvas.create_rectangle(5, 5, 105, 25, outline="blue", width=1)
# fill_rec = canvas.create_rectangle(5, 5, 5, 25, outline="", width=0, fill="blue")
#
# tk.Label(tab2, textvariable=x).grid(row=0, column=1)
#
# '''
# 使用时直接调用函数change_schedule(now_schedule,all_schedule)
# 下面就模拟一下....
# '''
#
# for i in range(100):
# time.sleep(0.1)
# change_schedule(i, 99)
window.mainloop() # 显示
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment