Commit d3fcd34f authored by smile2019's avatar smile2019

多行字幕检测,提取字幕颜色,台标水印排除

parent c035e726
...@@ -51,7 +51,38 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float] ...@@ -51,7 +51,38 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
start = int(start_time * fps) start = int(start_time * fps)
video.set(cv2.CAP_PROP_POS_FRAMES, start) video.set(cv2.CAP_PROP_POS_FRAMES, start)
frame_num = video.get(cv2.CAP_PROP_FRAME_COUNT) frame_num = video.get(cv2.CAP_PROP_FRAME_COUNT)
time1 = time.time()
# 一共有60*60*60*3 均匀取2000帧
gap = math.floor(frame_num/2000) # 前一帧与后一帧的跨度
# #读取方案1:跳转读写头到指定帧
# for i in range(2000):
# frame_index = i*gap + 1
# video.set(cv2.CAP_PROP_POS_FRAMES,frame_index)
# _, frame = video.read()
# cv2.imwrite(str.format('./extrated_imgs/{}.png',i),frame)
# 读写方案2:连续读取,读到指定帧保存
# i=1
# while i<frame_num:
# _, frame = video.read()
# if i%gap == 0:
# print(i,gap)
# cv2.imwrite(str.format('./extrated_imgs_2/{}.png',i),frame)
# i = i + 1
# 主要工作:
paddle_dir = "res/.paddleocr/2.3.0.1/ocr/"
cur_cls_model_dir = paddle_dir + "cls/ch_ppocr_mobile_v2.0_cls_infer"
cur_det_model_dir = paddle_dir + "det/ch/ch_PP-OCRv2_det_infer"
cur_rec_model_dir = paddle_dir + "rec/ch/ch_PP-OCRv2_rec_infer"
ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, cls_model_dir=cur_cls_model_dir, det_model_dir=cur_det_model_dir, rec_model_dir=cur_rec_model_dir)
position_map = {} position_map = {}
R = []
G = []
B = []
# 统计位置(高度)
# 分成五等份,1/5 2/5 3/5 4/5 # 分成五等份,1/5 2/5 3/5 4/5
for i in range(4): for i in range(4):
frame_index = math.floor((i+1)*frame_num/5) frame_index = math.floor((i+1)*frame_num/5)
...@@ -59,9 +90,10 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float] ...@@ -59,9 +90,10 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
for j in range(10):#每个阶段取10帧 for j in range(10):#每个阶段取10帧
for k in range(120):#隔120帧取一帧 for k in range(120):#隔120帧取一帧
_,frame = video.read() _,frame = video.read()
_,frame = video.read()
if (j+1)*120+frame_index>frame_num: if (j+1)*120+frame_index>frame_num:
break break
_,frame = video.read() # cv2.imwrite(str.format('./imgs_2/{}_{}.png',i+1,j),frame)
res = ocr.ocr(frame, cls=True) res = ocr.ocr(frame, cls=True)
for result_item in res: for result_item in res:
[x1,y1],[x2,y2],[x3,y3],[x4,y4] = result_item[0] [x1,y1],[x2,y2],[x3,y3],[x4,y4] = result_item[0]
...@@ -69,22 +101,87 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float] ...@@ -69,22 +101,87 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
english_text = ''.join(re.findall(r'[A-Za-z]',text)) english_text = ''.join(re.findall(r'[A-Za-z]',text))
# 跳过英文字幕 # 跳过英文字幕
if(len(english_text)/len(text)<0.1): if(len(english_text)/len(text)<0.1):
# 提取ocr区域像素点颜色
subtitle_img = frame[int(y1):int(y3),int(x1):int(x2)]
r = subtitle_img[:,:,0].copy()
r = r.reshape(-1)
g = subtitle_img[:,:,1].copy()
g = g.reshape(-1)
b = subtitle_img[:,:,2].copy()
b = b.reshape(-1)
R = np.append(R,r)
G = np.append(G,g)
B = np.append(B,b)
print(i+1,j,text) print(i+1,j,text)
# 分析文字位置
if len(position_map) == 0: if len(position_map) == 0:
position_map[(y1,y3)]=1 position_map[(y1,y3)]=1
else: else:
# 在列表中查找
keys = list(position_map.keys()) keys = list(position_map.keys())
flag = False
for key in keys: for key in keys:
if abs(y1-key[0])<2 and abs(y3-key[1])<2: height = y3-y1
threshold = int((y3-y1)*0.3)
# print(threshold)
if abs(y1-key[0])<threshold or abs(y3-key[1])<threshold:
position_map[key]+=1 position_map[key]+=1
# # 绘制字幕横线到图片上:
# frame1 = cv2.line(frame,(0,int(key[0])),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(key[0])),(255,0,0),2)
# frame1 = cv2.line(frame,(0,int(key[1])),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(key[1])),(255,0,0),2)
# file_name = './mytest('+str(key[0])+','+str(key[1])+').png'
# cv2.imwrite(file_name,frame1)
flag = True# 如果能归为某一类则flag置为True
break break
else: if flag == False:# 如果找不到,则向列表添加
position_map[(y1,y3)]=1 position_map[(y1,y3)]=1
print(sorted( position_map.items(), key = lambda kv:(kv[1], kv[0]))) # # 绘制字幕横线到图片上:
y1,y2 = sorted( position_map.items(), key = lambda kv:(kv[1], kv[0]))[-1][0] # frame1 = cv2.line(frame,(0,int(y1)),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(y1)),(255,0,0),2)
y1 = y1/video.get(cv2.CAP_PROP_FRAME_HEIGHT) # frame1 = cv2.line(frame,(0,int(y3)),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(y3)),(255,0,0),2)
y2 = y2/video.get(cv2.CAP_PROP_FRAME_HEIGHT) # file_name = './mytest('+str(y1)+','+str(y3)+').png'
return y1,y2 # cv2.imwrite(file_name,frame1)
# 给出字幕行数
position_rank_list = sorted( position_map.items(), key = lambda kv:(kv[1], kv[0]))
print(position_rank_list)
line_num = 0
num = position_rank_list[-1][1]
y1,y2 = position_rank_list[-1][0]
height = abs(y2-y1)
result = []
item_result = []
for item in position_rank_list:
if item[1]>num*0.3 and item[0][0]>0.5*video.get(cv2.CAP_PROP_FRAME_HEIGHT) and item[0][1]>0.5*video.get(cv2.CAP_PROP_FRAME_HEIGHT):#出现次数达到rank1的30%以上,判定为新的一行字幕
line_num += 1
y1,y2 = item[0]
y1 = y1/video.get(cv2.CAP_PROP_FRAME_HEIGHT)
y2 = y2/video.get(cv2.CAP_PROP_FRAME_HEIGHT)
item_result.append([y1,y2])
print([line_num,height,item_result])
# 给出文字和背景的颜色
import matplotlib.pyplot as plt
from sklearn import cluster
plt.hist(R)
plt.savefig('R-hist-all')
plt.close()
# 尝试聚成两类(k-means算法)
estimator = cluster.KMeans(n_clusters=2)
estimator.fit(R.reshape(-1,1))
# print(R[0].shape)
r_centers = estimator.cluster_centers_.reshape(-1)
estimator = cluster.KMeans(n_clusters=2)
estimator.fit(G.reshape(-1,1))
g_centers = estimator.cluster_centers_.reshape(-1)
estimator = cluster.KMeans(n_clusters=2)
estimator.fit(B.reshape(-1,1))
b_centers = estimator.cluster_centers_.reshape(-1)
r,g,b = max(r_centers),max(g_centers),max(b_centers)
print('颜色结果:',r,g,b)
# 返回值:字幕行数 字幕高度 字幕位置(比例形式) 字幕颜色(r,g,b)
# [2, 109.0, [[0.8637892376681614, 0.9243273542600897], [0.9304932735426009, 0.9915919282511211]],(r,g,b)]
return item_result[0][0],item_result[0][1]
return [line_num,height,item_result,(r,g,b)]
def get_position(video_path: str, start_time: float, rate: float, rate_bottom: float) -> Tuple[float, float]: def get_position(video_path: str, start_time: float, rate: float, rate_bottom: float) -> Tuple[float, float]:
# return (885.0, 989.0) # return (885.0, 989.0)
...@@ -118,6 +215,11 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f ...@@ -118,6 +215,11 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f
# down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73) # down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
print(up) print(up)
# print(down) # print(down)
up_rate,down_rate = evaluate_position(video_path,0)
up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)*up_rate)
down = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)*down_rate)
return int(up), int(down) return int(up), int(down)
# TODO 现阶段是主窗体设定字幕的开始位置和结束位置,传入该函数。现在希望做成自动检测的? # TODO 现阶段是主窗体设定字幕的开始位置和结束位置,传入该函数。现在希望做成自动检测的?
# while True: # while True:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment