initial project

453f190c · 翟艳秋（20软） · 453f190c · 453f190c · 453f190c · 453f190c
Commit 453f190c authored Jan 14, 2022 by 翟艳秋（20软）
7 changed files
--- a/README.md
+++ b/README.md
--- a/detect_with_asr.py
+++ b/detect_with_asr.py
+import shutil
+import time
+
+import openpyxl
+
+from openpyxl.styles import PatternFill, Alignment
+from split_wav import *
+
+
+def create_sheet(path, sheet_name, value):
+    """
+        根据给定的表头，初始化表格，
+        :param path: str, 表格(book)的存储位置
+        :param sheet_name: str, 表(sheet)的名字
+        :param value: list, 表头内容为['起始时间','终止时间','字幕','建议','旁边解说脚本']
+        :return: None
+    """
+    index = len(value)
+    workbook = openpyxl.Workbook()
+    sheet = workbook.active
+    sheet.title = sheet_name
+    # 将字幕对应的那一列扩宽一些
+    sheet.column_dimensions['C'].width = 50
+    for i in range(0, index):
+        for j in range(0, len(value[i])):
+            sheet.cell(row=i + 1, column=j + 1, value=str(value[i][j]))
+    workbook.save(path)
+
+
+def write_to_sheet(path, sheet_name, value):
+    """
+    向已存在的表格中写入数据
+    :param path:
+    :param sheet_name:
+    :param value:
+    :return:
+    """
+    index = len(value)
+    workbook = openpyxl.load_workbook(path)
+    sheet = workbook.get_sheet_by_name(sheet_name)
+    cur_row = sheet.max_row
+    for i in range(0, index):
+        for j in range(0, len(value[i])):
+            sheet.cell(row=cur_row + i + 1, column=j + 1, value=str(value[i][j]))
+            if value[i][j] == '' or value[i][j] == '插入旁白':
+                sheet.cell(row=cur_row + i + 1, column=j + 1).fill = PatternFill(fill_type='solid', fgColor='ffff00')
+            if j == 2:
+                sheet.cell(row=cur_row + i + 1, column=j + 1).alignment = Alignment(wrapText=True)
+    workbook.save(path)
+
+
+def trans_to_mono(wav_path):
+    """
+    将音频的通道数channel转换为1
+    :param wav_path: str, 需要转换的音频地址
+    :return: new_wav_path: str, 转换后得到的新音频地址
+    """
+    new_wav_path = wav_path[:-4] + "_1.wav"
+    command = 'ffmpeg -i {} -ac 1 -y {}'.format(wav_path, new_wav_path)
+    os.system(command)
+    return new_wav_path
+
+
+def concat_wav(root):
+    txt_path = os.path.join(root, 'list.txt')
+    with open(txt_path, 'w', encoding='utf-8') as f:
+        for file_name in os.listdir(root):
+            if os.path.isdir(os.path.join(root, file_name)):
+                wav_path = os.path.join(root, file_name) + "/vocal.wav"
+                f.write("file \'" + wav_path + "\'\n")
+    output_file = os.path.join(root, 'total.wav')
+    command = 'ffmpeg -f concat -safe 0 -i {} -y {}'.format(txt_path, output_file)
+    os.system(command)
+    return output_file
+
+
+def detect_with_asr(video_path, book_path, start_time=0, end_time=-1):
+    # 临时存储各种中间产物的文件夹
+    tmp_root = './tmp'
+    if not os.path.exists(tmp_root):
+        os.mkdir(tmp_root)
+
+    if not os.path.exists(video_path):
+        print("你输入的视频地址有误，请仔细检查一下")
+        return
+    # 提取出视频中的音频，分割后提取出其中的人声部分并存储
+    audio_path = extract_audio(video_path, tmp_root, start_time, end_time)
+    # root = split_audio()
+    # extrac_speech()
+    #
+    # # 将提取出的人声拼接，并将音频的channel调整为1
+    # total_wav_path = concat_wav(root)
+    # audio_path = trans_to_mono(total_wav_path)
+
+    # xlsx中的表格名为“旁白插入位置建议”
+    book_name_xlsx = book_path
+    sheet_name_xlsx = "旁白插入位置建议"
+
+    # 如果当前路径下不存在与视频同名的表格，则创建输出内容存放的表格
+    if not os.path.exists(book_name_xlsx):
+        table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
+        create_sheet(book_name_xlsx, sheet_name_xlsx, table_head)
+
+    sys.path.append("./PaddlePaddle_DeepSpeech2")
+    from infer_path import predict_long_audio_with_paddle
+    table_content = predict_long_audio_with_paddle(audio_path, book_name_xlsx, start_time)
+    write_to_sheet(book_name_xlsx, sheet_name_xlsx, table_content)
+
+    # 删除中间文件
+    # shutil.rmtree(tmp_root)
+
+
+if __name__ == '__main__':
+    start_time = time.time()
+    # 给定待处理的视频路径
+    video_path = 'D:/heelo/zhanlang.rmvb'
+
+    detect_with_asr(video_path, "zhanlang.xlsx", 50, 5154)
+    print("处理视频 {} 需要时长为{} ".format(os.path.basename(video_path), time.time() - start_time))
--- a/detect_with_ocr.py
+++ b/detect_with_ocr.py
+import time
+
+import cv2
+import numpy as np
+from paddleocr import PaddleOCR
+import difflib
+import openpyxl
+from openpyxl.styles import PatternFill
+
+# 字幕的上下边界
+up_b, down_b = 0, 0
+
+# 初始化ocr工具
+ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
+
+
+def get_position(video_path,start_time):
+    """
+    根据对视频中的画面进行分析，确定字幕的位置，以便后续的字幕识别
+    :param video_path: 视频存储路径
+    :return: 字幕在整个画面中的实际上下边界位置
+    """
+    video = cv2.VideoCapture(video_path)
+    subtitle_position = {}
+    fps = video.get(cv2.CAP_PROP_FPS)
+    start = int(start_time * fps)
+    cnt = 0
+    txt_cnt = 0
+    pre_txt = None
+    video.set(cv2.CAP_PROP_POS_FRAMES, start)
+    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
+    while True:
+        _, img = video.read()
+        cnt += 1
+        if img is None or cnt > 1000:
+            break
+        if cnt % int(fps / 3) != 0:
+            continue
+        img = img[height:]
+        res = ocr.ocr(img, cls=True)
+        sorted(res, key=lambda x: x[0][0][1])
+        bottom_position = None
+        if len(res) == 0:
+            continue
+        log = []
+        for x in res:
+            rect, (txt, confidence) = x
+            font_size = rect[2][1] - rect[0][1]
+            mid = (rect[0][0] + rect[1][0]) / 2
+            gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
+            # 可能是字幕的文本
+            if confidence > 0.9 and 0.4 * img.shape[1] < mid < 0.6 * img.shape[1] and gradient < 0.1:
+                if bottom_position is None:
+                    bottom_position = rect[0][1]
+                # 判断是否与前一文本相同（是不是同一个字幕），非同一字幕的前提下，取对应上下边界，
+                keys = subtitle_position.keys()
+                if abs(rect[0][1] - bottom_position) < 10:
+                    if pre_txt is None or pre_txt != txt:
+                        txt_cnt += 1
+                        pre_txt = txt
+                        if (rect[0][0], rect[2][1]) in keys:
+                            subtitle_position[(rect[0][1], rect[2][1])] += 1
+                        else:
+                            replace = False
+                            for k in keys:
+                                # 更新键值为最宽的上下限
+                                if abs(rect[0][1] - k[0]) + abs(rect[2][1] - k[1]) < 10:
+                                    new_k = min(k[0], rect[0][1]), max(k[1], rect[2][1])
+                                    if new_k != k:
+                                        subtitle_position[new_k] = subtitle_position[k]
+                                        subtitle_position[new_k] += 1
+                                        subtitle_position.pop(k)
+                                    else:
+                                        subtitle_position[k] += 1
+                                    replace = True
+                                    break
+                            if not replace:
+                                subtitle_position[(rect[0][1], rect[2][1])] = 1
+        if txt_cnt == 3:
+            break
+    print(subtitle_position)
+    up_b, down_b = max(subtitle_position, key=subtitle_position.get)
+    return up_b + height, down_b + height
+
+
+def string_similar(s1, s2):
+    """
+    比较字符串s1和s2的相似度，主要用于减少输出文件中相似字幕的重复
+    :param s1:
+    :param s2:
+    :return: 字符串间的相似度
+    """
+    return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
+
+
+def detect_subtitle(img):
+    """
+    检测当前画面得到字幕信息
+    :param img: 当前画面
+    :return: 字幕信息
+    """
+    subTitle = ''
+    img = img[int(up_b) - 30:int(down_b) + 30]
+    # img = cv2.resize(img, (int(img.shape[1] * 0.5), int(img.shape[0] * 0.5)))
+    res = ocr.ocr(img, cls=True)
+    sorted(res, key=lambda x: x[0][0][1])
+    bottom_position = None
+    if len(res) == 0:
+        return None
+    log = []
+    possible_txt = []
+    for x in res:
+        rect, (txt, confidence) = x
+        font_size = rect[2][1] - rect[0][1]
+        mid = (rect[0][0] + rect[1][0]) / 2
+        gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
+        log.append("文本：{}，置信度：{}，中心点：{}，斜率：{}，字体大小：{}".format(txt, confidence, mid / img.shape[1], gradient, font_size))
+        # 置信度>0.7 & 斜率<0.1 & 字幕偏移量<=25 & 字幕中心在画面宽的0.4-0.6之间
+        if confidence > 0.7 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6 and \
+                abs(rect[0][1] - 30) + abs(img.shape[0] - rect[2][1] - 30) <= 25:
+            subTitle += txt
+        # 如果字幕在一行中分为两个（或以上）对话文本
+        elif confidence > 0.85 and gradient < 0.1:
+            if 0.3 < mid / img.shape[1] < 0.4 or 0.6 < mid / img.shape[1] < 0.7:
+                possible_txt.append(txt)
+    if len(possible_txt) >= 2:
+        subTitle = ''.join(possible_txt)
+    if len(subTitle) > 0:
+        return subTitle
+    return None
+
+
+def process_video(video_path, begin, end):
+    """
+    处理视频，主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
+    :param video_path: 待处理视频的路径
+    :param begin: 电影的实际开始位置（秒）
+    :param end: 电影除演职表外的实际结束位置（秒）
+    :return:
+    """
+    video = cv2.VideoCapture(video_path)
+    fps = video.get(cv2.CAP_PROP_FPS)
+    lastSubTitle = None
+    # res是在视频遍历过程中获取的字幕文件，不掺杂对旁白的分析
+    res = []
+    # narratage_recommand是旁白推荐信息，用于输出为表格
+    narratage_recommend = []
+    cnt = 0
+    start_time = 0
+    end_time = 0
+    video.set(cv2.CAP_PROP_POS_MSEC, begin * 1000)
+    while True:
+        _, frame = video.read()
+        if frame is None:
+            break
+        cnt += 1
+        # 每秒取4帧画面左右
+        if cnt % int(fps / 4) == 0:
+            subTitle = detect_subtitle(frame)
+            # 第一次找到字幕
+            if lastSubTitle is None and subTitle is not None:
+                start_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
+            # 字幕消失
+            elif lastSubTitle is not None and subTitle is None:
+                end_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
+                res.append([start_time, end_time, lastSubTitle])
+                if len(res) == 1 or res[-1][0] - res[-2][1] >= 1:
+                    print('--------------------------------------------------')
+                    narratage_recommend.append(['', '', '', '插入旁白'])
+                print(start_time, end_time, lastSubTitle)
+                narratage_recommend.append([round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
+            # 两句话连在一起，但是两句话不一样
+            elif lastSubTitle is not None and subTitle is not None:
+                if string_similar(lastSubTitle, subTitle) < 0.7:
+                    end_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
+                    res.append([start_time, end_time, lastSubTitle])
+                    if len(res) == 1 or res[-1][0] - res[-2][1] >= 1:
+                        narratage_recommend.append(['', '', '', '插入旁白'])
+                        print('--------------------------------------------------')
+                    print(start_time, end_time, lastSubTitle)
+                    narratage_recommend.append([round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
+                    start_time = end_time
+                else:
+                    lastSubTitle = subTitle if len(subTitle) > len(lastSubTitle) else lastSubTitle
+                    continue
+            # 当前字幕与上一段字幕不一样
+            lastSubTitle = subTitle
+        if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 > end:
+            if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time > 1:
+                print('--------------------------------------------------')
+                narratage_recommend.append(['', '', '', '插入旁白'])
+                break
+    return narratage_recommend
+
+
+def write_excel_xlsx(path, sheet_name, value):
+    """
+    将旁白推荐信息输出表格
+    :param path: 输出表格的存储路径
+    :param sheet_name:表格中的表名
+    :param value:输出到表格中的信息
+    :return:
+    """
+    index = len(value)
+    workbook = openpyxl.Workbook()
+    sheet = workbook.active
+    sheet.title = sheet_name
+    # 将字幕对应的那一列扩宽一些
+    sheet.column_dimensions['C'].width = 50
+    for i in range(0, index):
+        for j in range(0, len(value[i])):
+            sheet.cell(row=i + 1, column=j + 1, value=str(value[i][j]))
+            if value[i][j] == '' or value[i][j] == '插入旁白' or value[i][j] == '翻译':
+                sheet.cell(row=i + 1, column=j + 1).fill = PatternFill(fill_type='solid', fgColor='ffff00')
+    workbook.save(path)
+
+
+def detect_with_ocr(video_path, book_path, start_time, end_time):
+    book_name_xlsx = book_path
+    sheet_name_xlsx = "旁白插入位置建议"
+
+    # 获取字幕在画面中的上下边界，方便在后续视频遍历过程中直接对字幕对应区域进行分析
+    global up_b, down_b
+    up_b, down_b = get_position(video_path, start_time)
+
+    # 获取并构建输出信息
+    table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
+    table_content = table_head + process_video(video_path, start_time, end_time)
+
+    # 输出旁白位置推荐信息到表格
+    write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, table_content)
+
+
+if __name__ == '__main__':
+    video_path = "D:/heelo/hysxm_1.mp4"
+    book_path = '何以笙箫默.xlsx'
+    detect_with_ocr(video_path, book_path, 0, 300)
--- a/judge_subtitle.py
+++ b/judge_subtitle.py
+import random
+import time
+
+import cv2
+import numpy as np
+from paddleocr import PaddleOCR
+from collections import Counter
+
+ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
+
+
+def random_int_list(start, stop, length):
+    """
+    在某一段区间内取n个随机数
+    :param start: 随机数区间的最小值
+    :param stop: 随机数区间的最大值
+    :param length: 随机数个数
+    :return: 随机数数组[list]
+    """
+    start, stop = (int(start), int(stop)) if start <= stop else (int(stop), int(start))
+    length = int(abs(length)) if length else 0
+    random_list = []
+    while True:
+        tmp = random.randint(start, stop)
+        if tmp not in random_list:
+            random_list.append(tmp)
+        if len(random_list) == length:
+            break
+    return random_list
+
+
+def detect_subtitle(frame):
+    """
+    判断画面中是否含字幕
+    :param frame: 视频的某一帧画面
+    :return: Ture or False
+    """
+    frame = frame[int(frame.shape[0] * 0.7):]
+    subtitle = ocr.ocr(frame, cls=True)
+    print(subtitle)
+    for x in subtitle:
+        position, (txt, confidence) = x
+        height = position[2][1] - position[0][1]
+        mid = (position[0][0] + position[1][0]) / 2
+        print(height, txt)
+        # 求倾斜度
+        gradient = np.arctan(abs((position[1][1] - position[0][1]) / (position[1][0] - position[0][0])))
+        print(gradient)
+        if confidence > 0.7 and 0.4 * frame.shape[1] < mid < 0.6 * frame.shape[1] \
+                and gradient < 0.1:
+            return True
+        else:
+            continue
+    return False
+
+
+def detect_movie(video_path, start, interval):
+    """
+    使用整部视频进行测试，确定视频是否提供字幕
+    :param video_path: 视频的地址
+    :param start: 取随机帧的时间区间的开始时间
+    :param interval: 取随机帧的每段区间时长，单位为秒
+    :return: True or False（视频是否含字幕）
+    """
+    video = cv2.VideoCapture(video_path)
+    fps = np.ceil(video.get(cv2.CAP_PROP_FPS))
+    start = start * fps
+    interval = interval * fps
+    random_number = 50
+    ans = [False] * 3
+    print(ans)
+    for i in range(3):
+        random_list = random_int_list(start, start + interval, random_number)
+        start = start + interval
+        for _, random_point in enumerate(random_list):
+            video.set(cv2.CAP_PROP_POS_FRAMES, float(random_point))
+            if video.isOpened():
+                success, frame = video.read()
+                if not success:
+                    break
+                ans[i] = detect_subtitle(frame)
+            if ans[i]:
+                print(random_point)
+                break
+    video.release()
+    print(ans)
+    return Counter(ans).most_common(1)[0][0]
+
+
+if __name__ == '__main__':
+    video_path = r'D:\heelo\hysxm.mp4'
+    start_time = time.time()
+    start = 90
+    interval = 120
+    print(detect_movie(video_path, start, interval))
+    print(time.time() - start_time)
--- a/narratage_detection.py
+++ b/narratage_detection.py
+# encoding=utf8
+import os.path
+import argparse
+import time
+
+from judge_subtitle import detect_movie
+from detect_with_asr import detect_with_asr
+from detect_with_ocr import detect_with_ocr
+
+
+def trans_to_seconds(timepoint):
+    time_in_seconds = 0
+    timepoint = timepoint.split(':')
+    units = 1
+    for i in range(len(timepoint) - 1, -1, -1):
+        time_in_seconds += units * float(timepoint[i])
+        units *= 60
+    return time_in_seconds
+
+
+def detect(video_path, start_time, end_time, book_path):
+    if book_path is None:
+        book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
+    else:
+        book_path = book_path
+
+    start_time = trans_to_seconds(start_time)
+    end_time = trans_to_seconds(end_time)
+
+    has_subtitle = detect_movie(video_path, start_time, 60)
+    if has_subtitle:
+        detect_with_ocr(video_path, book_path, start_time, end_time)
+    else:
+        detect_with_asr(video_path, book_path, start_time, end_time)
+
+
+if __name__ == '__main__':
+    # 定义参数
+    parser = argparse.ArgumentParser(description='Speech Synthesis guideness')
+    parser.add_argument("--video_path", required=True, type=str, help="待处理的视频存储路径")
+    parser.add_argument("--start_time", required=True, type=str, help="视频中影片除开场动画外的实际开始时间点，格式为'时:分:秒'，也可以输入对应的秒数")
+    parser.add_argument("--end_time", required=True, type=str, help="视频中影片除演职表外的实际结束时间点，格式为'时:分:秒'，也可以输入对应的秒数")
+    parser.add_argument("--book_path", type=str, help='旁白解说表格存储路径，包含表格名，如"D:\AddCaption\hysxm.xlsx"')
+
+    args = parser.parse_args()
+
+    detect(args.video_path, args.start_time, args.end_time, args.book_path)
--- a/speech_synthesis.py
+++ b/speech_synthesis.py
+# coding=utf-8
+import os
+import argparse
+
+from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer
+from azure.cognitiveservices.speech.audio import AudioOutputConfig
+import openpyxl
+
+tmp_file = 'tmp.wav'
+
+
+def speech_synthesis(text, output_file, speed):
+    """
+    用于合成讲解音频并输出
+    :param text: 解说文本
+    :param output_file: 输出文件路径
+    :param speed: 指定的音频语速，默认为1.0
+    :return:
+    """
+    if float(speed) != 1.0:
+        audio_path = tmp_file
+    else:
+        audio_path = output_file
+    speech_config = SpeechConfig(subscription="ffa331815f0f4c7fa418bb6c2e1c4e17", region="eastus")
+
+    speech_config.speech_synthesis_language = "zh-CN"
+    speech_config.speech_synthesis_voice_name = 'zh-CN-XiaomoNeural'
+
+    # 先把合成的语音文件输出得到tmp.wav中，便于可能的调速需求
+
+    audio_config = AudioOutputConfig(filename=audio_path)
+    synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
+
+    synthesizer.speak_text_async(text)
+
+    if float(speed) != 1.0:
+        change_speed(output_file, speed)
+
+
+def change_speed(wav_path, speed=1.5):
+    """
+    调整语速
+    :param wav_path: 原音频路径
+    :param speed: 转换后的语速
+    :return:
+    """
+    cmd_line = 'ffmpeg -y -i {} -filter:a \"atempo={}\" {}'.format(tmp_file, speed, wav_path)
+    os.system(cmd_line)
+    # 删除临时文件
+    os.remove(tmp_file)
+
+
+def read_sheet(book_path, sheet_name=None):
+    """
+    从表格中读出所有的内容，用dict保存（表格的格式固定，第一行为表头（起始时间|终止时间|字幕|建议|解说脚本））
+    :param book_path: 表格的存储路径
+    :param sheet_name: 想要读取的表在excel表格中的名字（可选项）
+    :return: sheet_content (dict) 表格中的所有内容
+    """
+    workbook = openpyxl.load_workbook(book_path)
+    sheet = workbook.active
+    rows = sheet.max_row
+    cols = sheet.max_column
+    sheet_content = {}
+    # 读取xlsx中每列的内容，默认第一行是各列的列名
+    for i in range(1, rows + 1):
+        for j in range(1, cols + 1):
+            if i == 1:
+                sheet_content[sheet.cell(1, j).value] = []
+            else:
+                sheet_content[sheet.cell(1, j).value].append(sheet.cell(i, j).value)
+    return sheet_content
+
+
+def get_narratage_text(sheet_content):
+    """
+    根据从表格中获取到的内容，分析得到解说文本+对应开始时间
+    :param sheet_content: dict，keys=["起始时间","终止时间","字幕","建议","解说脚本"]
+    :return: narratage_text: list, 旁白文本，
+             narratage_start_time: list, 旁白对应开始时间
+    """
+    narratage = sheet_content['解说脚本']
+    subtitle = sheet_content['字幕']
+    start_time = sheet_content['起始时间']
+    end_time = sheet_content['终止时间']
+    narratage_start_time = []
+    narratage_text = []
+    for i, text in enumerate(narratage):
+        if text is not None:
+            if text == '翻译':
+                narratage_text.append(subtitle[i])
+                narratage_start_time.append(float(start_time[i]) + 0.1)
+            else:
+                # 如果旁白中有换行符，即分为n段，则按照换行符进行分割，并间隔0.5s
+                text_split = text.split('\n')
+                cur_start = float(end_time[i - 1]) + 0.1 if i > 0 else 0
+                for x in text_split:
+                    narratage_text.append(x)
+                    narratage_start_time.append(cur_start)
+                    cur_start = cur_start + len(x) / (4.5 * args.speed) + 0.5
+    return narratage_text, narratage_start_time
+
+
+def second_to_str(seconds):
+    seconds = float(seconds)
+    hour = int(seconds / 3600)
+    minute = int((seconds - hour * 3600) / 60)
+    second = int(seconds - hour * 3600 - minute * 60)
+    ms = int((seconds - second - minute * 60 - hour * 3600) * 1000)
+    time_str = "%02d:%02d:%02d,%03d" % (hour, minute, second, ms)
+    return time_str
+
+
+def export_caption(sheet_content, caption_file):
+    """
+    将用户校正后的字幕输出为字幕文件（srt格式）
+    :param sheet_content: 用户校正后的表格内容
+    :return:
+    """
+    caption = sheet_content["字幕"]
+    start_time = sheet_content['起始时间']
+    end_time = sheet_content['终止时间']
+    cnt = 0
+    with open(caption_file, "w", encoding="utf-8") as f:
+        for i, x in enumerate(caption):
+            if x is not None:
+                start, end = second_to_str(start_time[i]), second_to_str(end_time[i])
+                cnt += 1
+                f.write(str(cnt) + "\n")
+                f.write(start + " --> " + end + "\n")
+                f.write(x + "\n\n")
+
+
+def ss_and_export():
+    # 旁白解说表格的位置
+    book_path = args.sheet_path
+    # 音频输出位置路径
+    root_path = args.output_dir
+    # 语速
+    speed = args.speed
+    # 字幕文件路径
+    caption_file = args.caption_file
+
+    # 如果文件夹不存在，则新建文件夹
+    if not os.path.exists(root_path):
+        os.mkdir(root_path)
+
+    # 读取表格，并获取旁白及对应插入位置
+    sheet_content = read_sheet(book_path)
+    narratages, start_timepoint = get_narratage_text(sheet_content)
+    export_caption(sheet_content, caption_file)
+
+    # 生成旁白解说语音
+    for i, text in enumerate(narratages):
+        wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i])
+        speech_synthesis(text, wav_path, speed)
+
+
+if __name__ == '__main__':
+    # 定义参数
+    parser = argparse.ArgumentParser(description='Speech Synthesis guideness')
+    parser.add_argument("--output_dir", required=True, type=str, help="音频输出位置路径")
+    parser.add_argument("--sheet_path", required=True, type=str, help='旁白解说表格存储路径')
+    parser.add_argument("--caption_file", required=True, type=str, help="输出的字幕文件存储路径")
+    parser.add_argument("--speed", type=float, default=1.0, help="设置语速，默认为1.0")
+    args = parser.parse_args()
+
+    # 主函数执行
+    ss_and_export(args.output_dir,args.sheet_path,args.caption_file,args.speed)
--- a/try_with_gui.py
+++ b/try_with_gui.py
+# -*- coding:utf-8 -*-
+import threading
+from mttkinter import mtTkinter as tk
+from tkinter import filedialog, ttk, messagebox, Frame, Canvas
+import os
+
+import ffmpeg
+
+window = tk.Tk()
+window.title('无障碍电影辅助工具')  # 标题
+window.geometry('600x400')  # 窗口尺寸
+
+
+def open_video_file():
+    """
+    打开文件
+    :return:
+    """
+    video_path = filedialog.askopenfilename(title=u'选择文件', initialdir=(os.path.expanduser('D:/heelo')))
+    if len(video_path) != 0 and is_video(video_path):
+        print('打开文件：', video_path)
+        inputFilePath.set(video_path)
+        # 获取视频的时长等信息，初始化开始结束时间
+        info = ffmpeg.probe(video_path)
+        vs = next(c for c in info['streams'] if c['codec_type'] == 'video')
+        try:
+            duration = int(float(vs['duration']))
+            hours = int(duration / 3600)
+            minutes = int(duration / 60 - 60 * hours)
+            seconds = int(duration - 60 * minutes - 3600 * hours)
+            endTime.set("%02d:%02d:%02d" % (hours, minutes, seconds))
+        except:
+            endTime.set(vs['tags']['DURATION'])
+    if len(video_path) != 0 and not is_video(video_path):
+        messagebox.showinfo('警告', "请选择正确的视频格式，能够处理的视频格式如下所示：\n'.mkv', '.rmvb', '.mp4', '.avi'")
+
+
+def open_sheet_file():
+    sheet_path = filedialog.askopenfilename(title=u'选择文件', initialdir=(os.path.expanduser('D:/heelo')))
+    if len(sheet_path) != 0 and ".xlsx" in sheet_path:
+        print("打开表格", sheet_path)
+        narratagePath.set(sheet_path)
+
+
+def find_save_file():
+    """
+    找到保存表格的地址
+    :return:
+    """
+    video_path = inputFilePath.get()
+    defaultName = ""
+    if "." in os.path.basename(video_path):
+        defaultName = os.path.basename(video_path).split('.')[0]
+
+    book_path = filedialog.asksaveasfilename(title=u'保存文件至',
+                                             initialdir=os.path.dirname(video_path),
+                                             initialfile=defaultName,
+                                             filetype=[('excel文件', ".xlsx")])
+    if len(book_path) != 0 and ".xlsx" not in book_path:
+        book_path += ".xlsx"
+    print('保存文件至：', book_path)
+    outputFilePath.set(book_path)
+
+
+def is_video(filename):
+    video_tag = ['.mkv', '.rmvb', '.mp4', '.avi']
+    for x in video_tag:
+        if x in filename:
+            return True
+    return False
+
+
+def start_detect():
+    """
+    开始检测旁白
+    :return:
+    """
+    # 检测各种输入的合理性
+    video_path = inputFilePath.get()
+    book_path = outputFilePath.get()
+
+    if len(video_path) == 0:
+        messagebox.showinfo('警告', "请输入视频文件路径")
+        return
+    elif not is_video(video_path):
+        messagebox.showinfo('警告', "视频文件格式错误，请重新确认")
+        return
+
+    if len(book_path) == 0:
+        messagebox.showinfo("警告", "请输入表格存放路径")
+        return
+    # print("视频文件路径：", inputFilePath.get())
+    # print("开始时间：", startTime.get())
+    # print("结束时间：", endTime.get())
+    # print("输出表格文件路径：", outputFilePath.get())
+
+    from narratage_detection import detect
+    detect(video_path, startTime.get(), endTime.get(), book_path)
+    processState.set("任务已完成")
+
+
+def find_save_dir():
+    audio_dir = filedialog.askdirectory(title=u'保存文件至')
+    print('保存音频于：', audio_dir)
+    audioDir.set(audio_dir)
+
+
+def set_caption_file():
+    defaultName = os.path.basename(narratagePath.get()).split('.')[0] + ".srt"
+    defaultDir = audioDir.get()
+    caption_path = filedialog.asksaveasfilename(title=u'保存文件至',
+                                                initialdir=defaultDir,
+                                                initialfile=defaultName,
+                                                filetype=[('字幕文件', ".srt")])
+    print(caption_path)
+    captionPath.set(caption_path)
+
+
+def start_synthesis():
+    audio_dir = audioDir.get()
+    sheet_path = narratagePath.get()
+    speed = audio_speed.get()
+    caption_path = captionPath.get()
+    from speech_synthesis import ss_and_export
+    ss_and_export(audio_dir,sheet_path,speed,caption_path)
+
+
+def thread_it(func, *args):
+    # 创建线程
+    t = threading.Thread(target=func, args=args)
+    # 守护
+    t.setDaemon(True)
+    # 启动
+    t.start()
+
+
+def _quit():
+    window.quit()
+    window.destroy()
+    exit()
+
+
+# 创建tab栏
+tabControl = ttk.Notebook(window)
+
+tab1 = ttk.Frame(tabControl)
+tabControl.add(tab1, text="旁白位置推荐")
+tab2 = ttk.Frame(tabControl)
+tabControl.add(tab2, text="旁白语音合成")
+tabControl.pack(expand=1, fill="both")
+
+"""
+    为”旁白位置推荐“添加按钮、进度条等部件
+"""
+video_info = ttk.LabelFrame(tab1, text=" 视频信息操作 ")
+# video_info.grid(column=0, row=0, padx=8, pady=4)
+video_info.place(relx=0.1, rely=0.1, relwidth=0.8, relheight=0.3)
+
+input_label = ttk.Label(video_info, text="视频文件")
+input_label.grid(column=0, row=0)
+inputFilePath = tk.StringVar()
+inputFile = ttk.Entry(video_info, width=30, textvariable=inputFilePath)
+inputFile.grid(column=1, row=0)
+upload_button = ttk.Button(video_info, text="上传文件", command=open_video_file)
+upload_button.grid(column=2, row=0)
+
+startTime_label = ttk.Label(video_info, text="视频实际开始时间")
+startTime_label.grid(column=0, row=1)
+startTime = tk.StringVar(value="00:00:00")
+startTime_entered = ttk.Entry(video_info, width=11, textvariable=startTime)
+startTime_entered.grid(column=1, row=1, sticky="W")
+
+endTime_label = ttk.Label(video_info, text="视频实际结束时间")
+endTime_label.grid(column=0, row=2)
+endTime = tk.StringVar(value="23:59:59")
+endTime_entered = ttk.Entry(video_info, width=11, textvariable=endTime)
+endTime_entered.grid(column=1, row=2, sticky="W")
+
+detect_command = ttk.LabelFrame(tab1, text=" 检测步骤 ")
+detect_command.place(relx=0.1, rely=0.5, relwidth=0.8, relheight=0.3)
+
+output_label = ttk.Label(detect_command, text="输出表格路径")
+output_label.grid(column=0, row=0)
+outputFilePath = tk.StringVar()
+outputFile = ttk.Entry(detect_command, width=30, textvariable=outputFilePath)
+outputFile.grid(column=1, row=0)
+save_button = ttk.Button(detect_command, text="打开文件夹", command=find_save_file)
+save_button.grid(column=2, row=0)
+
+startDetection = ttk.Button(detect_command, text="开始检测", command=lambda: thread_it(start_detect))
+startDetection.grid(column=0, row=1)
+processState = tk.StringVar()
+stateLabel = tk.Label(detect_command, textvariable=processState, fg="green")
+stateLabel.grid(column=1, row=1, sticky="W")
+
+"""
+    为旁白语音合成添加部件
+"""
+audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ")
+audio_info.place(relx=0.1, rely=0.1, relwidth=0.8, relheight=0.3)
+
+narratage_label = ttk.Label(audio_info, text="旁白脚本表格")
+narratage_label.grid(column=0, row=0)
+narratagePath = tk.StringVar()
+narratagePath_input = ttk.Entry(audio_info, width=30, textvariable=narratagePath)
+narratagePath_input.grid(column=1, row=0)
+upload_button_2 = ttk.Button(audio_info, text="上传文件", command=open_sheet_file)
+upload_button_2.grid(column=2, row=0)
+
+speed_label = ttk.Label(audio_info, text="旁白语速")
+speed_label.grid(column=0, row=1)
+audio_speed = tk.StringVar(value="1.0")
+speed_input = ttk.Entry(audio_info, width=30, textvariable=audio_speed)
+speed_input.grid(column=1, row=1)
+
+synthesis_command = ttk.LabelFrame(tab2, text=" 语音合成步骤 ")
+synthesis_command.place(relx=0.1, rely=0.5, relwidth=0.8, relheight=0.3)
+
+caption_label = ttk.Label(synthesis_command, text="输出字幕文件")
+caption_label.grid(column=0, row=1)
+captionPath = tk.StringVar()
+captionPath_input = ttk.Entry(synthesis_command, width=30, textvariable=captionPath)
+captionPath_input.grid(column=1, row=1)
+save_button_2 = ttk.Button(synthesis_command, text="打开文件夹", command=set_caption_file)
+save_button_2.grid(column=2, row=1)
+
+audioDir_label = ttk.Label(synthesis_command, text="输出音频存放于")
+audioDir_label.grid(column=0, row=0)
+audioDir = tk.StringVar()
+audioDir_input = ttk.Entry(synthesis_command, width=30, textvariable=audioDir)
+audioDir_input.grid(column=1, row=0)
+save_button_2 = ttk.Button(synthesis_command, text="打开文件夹", command=find_save_dir)
+save_button_2.grid(column=2, row=0)
+
+startSynthesis = ttk.Button(synthesis_command, text="开始合成", command=lambda: thread_it(start_synthesis))
+startSynthesis.grid(column=0, row=2)
+processState_2 = tk.StringVar()
+stateLabel_2 = tk.Label(synthesis_command, textvariable=processState_2, fg="green")
+stateLabel_2.grid(column=1, row=2, sticky="W")
+
+# # creating a menu bar
+# menu_bar = Menu(window)
+# window.config(menu=menu_bar)
+#
+# # Add menu items
+# file_menu = Menu(menu_bar, tearoff=0)
+# # file_menu.add_command(label="", command=save_file)
+# # file_menu.add_separator()
+# file_menu.add_command(label="退出", command=_quit)
+#
+# # 添加下拉列表
+# menu_bar.add_cascade(label="文件", menu=file_menu)
+
+# # 更新进度条函数
+# def change_schedule(now_schedule, all_schedule):
+#     canvas.coords(fill_rec, (5, 5, 6 + (now_schedule / all_schedule) * 100, 25))
+#     tab2.update()
+#     x.set(str(round(now_schedule / all_schedule * 100, 2)) + '%')
+#     if round(now_schedule / all_schedule * 100, 2) == 100.00:
+#         x.set("完成")
+#
+#
+# # 创建画布
+# canvas = Canvas(tab2, width=120, height=30, bg="white")
+# canvas.grid(row=0, column=0)
+# x = tk.StringVar()
+# # 进度条以及完成程度
+# out_rec = canvas.create_rectangle(5, 5, 105, 25, outline="blue", width=1)
+# fill_rec = canvas.create_rectangle(5, 5, 5, 25, outline="", width=0, fill="blue")
+#
+# tk.Label(tab2, textvariable=x).grid(row=0, column=1)
+#
+# '''
+# 使用时直接调用函数change_schedule(now_schedule,all_schedule)
+# 下面就模拟一下....
+# '''
+#
+# for i in range(100):
+#     time.sleep(0.1)
+#     change_schedule(i, 99)
+
+window.mainloop()  # 显示