Unverified Commit b41893c1 authored by NingMa's avatar NingMa Committed by GitHub

initial code submission

parent 61d30a2e
[environment]
pytorch=1.7
python=3.8.10
tqdm=4.54.1
pynvml=8.0.4
[dataset]
Due to the oversize dataset( beyond maximum limitation 100M, please download the public NTU RGB+D 120 dataset or use our subset when the work is accepted.
[run]
#run train.py with default parameters(DAST w/ RankMax, 5-way-1-shot, STGCN, on NTU RGB+D 120).
python train.py --SA 0 --reg 0.1
#run train.py with spatial activateion(DAST w/ SA, 5-way-1-shot, STGCN, on NTU RGB+D 120).
python train.py --SA 1 --reg 0
#run train.py with full model(DAST (full), 5-way-1-shot, STGCN, on NTU RGB+D 120).
python train.py --SA 1 --reg 0.1
\ No newline at end of file
import torch
from torch import nn
import math
class LayerNorm(nn.Module):
def __init__(self, hidden_size, eps=1e-12):
"""Construct a layernorm module in the TF style (epsilon inside the square root).
"""
super(LayerNorm, self).__init__()
self.weight = nn.Parameter(torch.ones(hidden_size))
self.bias = nn.Parameter(torch.zeros(hidden_size))
self.variance_epsilon = eps
def forward(self, x):
u = x.mean(-1, keepdim=True)
s = (x - u).pow(2).mean(-1, keepdim=True)
x = (x - u) / torch.sqrt(s + self.variance_epsilon)
return self.weight * x + self.bias
class CrossAttention(nn.Module):
def __init__(self, num_attention_heads, input_size, hidden_size, hidden_dropout_prob):
super(CrossAttention, self).__init__()
if hidden_size % num_attention_heads != 0:
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (hidden_size, num_attention_heads))
self.num_attention_heads = num_attention_heads
self.attention_head_size = int(hidden_size / num_attention_heads)
self.all_head_size = hidden_size
self.query = nn.Linear(input_size, self.all_head_size)
self.key = nn.Linear(input_size, self.all_head_size)
self.value = nn.Linear(input_size, self.all_head_size)
attention_probs_dropout_prob = 0.2
self.attn_dropout = nn.Dropout(attention_probs_dropout_prob)
# 做完self-attention 做一个前馈全连接 LayerNorm 输出
self.dense = nn.Linear(hidden_size, hidden_size)
self.LayerNorm = LayerNorm(hidden_size, eps=1e-12)
self.out_dropout = nn.Dropout(hidden_dropout_prob)
def transpose_for_scores(self, x):
new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
x = x.view(*new_x_shape)
return x.permute(0, 2, 1, 3)
def forward(self, x, y):
mixed_query_layer = self.query(x)
mixed_key_layer = self.key(y)
mixed_value_layer = self.value(x)
query_layer = self.transpose_for_scores(mixed_query_layer)
key_layer = self.transpose_for_scores(mixed_key_layer)
value_layer = self.transpose_for_scores(mixed_value_layer)
# Take the dot product between "query" and "key" to get the raw attention scores.
attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
attention_scores = attention_scores / math.sqrt(self.attention_head_size)
# Apply the attention mask is (precomputed for all layers in BertModel forward() function)
# [batch_size heads seq_len seq_len] scores
# [batch_size 1 1 seq_len]
# attention_scores = attention_scores + attention_mask
# Normalize the attention scores to probabilities.
attention_probs = nn.Softmax(dim=-1)(attention_scores)
# This is actually dropping out entire tokens to attend to, which might
# seem a bit unusual, but is taken from the original Transformer paper.
# Fixme
attention_probs = self.attn_dropout(attention_probs)
context_layer = torch.matmul(attention_probs, value_layer)
context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
context_layer = context_layer.view(*new_context_layer_shape)
hidden_states = self.dense(context_layer)
hidden_states = self.out_dropout(hidden_states)
hidden_states = self.LayerNorm(hidden_states + x) # residual
return hidden_states
import numpy as np
epoch=0
device='cuda:5'
experiment_root='../output'
debug=False
local_match=0
reg_rate=0
threshold=3
gamma=0.1
iter=0
R_=np.random.randn(250, 15, 15)
D_=np.random.randn(250, 15, 15)
mod='train'
backbone='st_gcn'
dataset='ntu120'
SA=0
\ No newline at end of file
from . import tools
from . import ntu_rgb_d
from . import kinetics
import sys
sys.path.insert(0, '')
sys.path.extend(['../'])
import numpy as np
from graph import tools
# Joint index:
# {0, "Nose"}
# {1, "Neck"},
# {2, "RShoulder"},
# {3, "RElbow"},
# {4, "RWrist"},
# {5, "LShoulder"},
# {6, "LElbow"},
# {7, "LWrist"},
# {8, "RHip"},
# {9, "RKnee"},
# {10, "RAnkle"},
# {11, "LHip"},
# {12, "LKnee"},
# {13, "LAnkle"},
# {14, "REye"},
# {15, "LEye"},
# {16, "REar"},
# {17, "LEar"},
num_node = 18
self_link = [(i, i) for i in range(num_node)]
inward = [(4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12, 11), (10, 9), (9, 8),
(11, 5), (8, 2), (5, 1), (2, 1), (0, 1), (15, 0), (14, 0), (17, 15),
(16, 14)]
outward = [(j, i) for (i, j) in inward]
neighbor = inward + outward
class AdjMatrixGraph:
def __init__(self, *args, **kwargs):
self.num_nodes = num_node
self.edges = neighbor
self.self_loops = [(i, i) for i in range(self.num_nodes)]
self.A_binary = tools.get_adjacency_matrix(self.edges, self.num_nodes)
self.A_binary_with_I = tools.get_adjacency_matrix(self.edges + self.self_loops, self.num_nodes)
if __name__ == '__main__':
graph = AdjMatrixGraph()
A_binary = graph.A_binary
import matplotlib.pyplot as plt
print(A_binary)
plt.matshow(A_binary)
plt.show()
import sys
sys.path.insert(0, '')
sys.path.extend(['../'])
import numpy as np
from graph import tools
num_node = 25
self_link = [(i, i) for i in range(num_node)]
inward_ori_index = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6),
(8, 7), (9, 21), (10, 9), (11, 10), (12, 11), (13, 1),
(14, 13), (15, 14), (16, 15), (17, 1), (18, 17), (19, 18),
(20, 19), (22, 23), (23, 8), (24, 25), (25, 12)]
inward = [(i - 1, j - 1) for (i, j) in inward_ori_index]
outward = [(j, i) for (i, j) in inward]
neighbor = inward + outward
class AdjMatrixGraph:
def __init__(self, *args, **kwargs):
self.edges = neighbor
self.num_nodes = num_node
self.self_loops = [(i, i) for i in range(self.num_nodes)]
self.A_binary = tools.get_adjacency_matrix(self.edges, self.num_nodes)
self.A_binary_with_I = tools.get_adjacency_matrix(self.edges + self.self_loops, self.num_nodes)
self.A = tools.normalize_adjacency_matrix(self.A_binary)
if __name__ == '__main__':
import matplotlib.pyplot as plt
graph = AdjMatrixGraph()
A, A_binary, A_binary_with_I = graph.A, graph.A_binary, graph.A_binary_with_I
f, ax = plt.subplots(1, 3)
ax[0].imshow(A_binary_with_I, cmap='gray')
ax[1].imshow(A_binary, cmap='gray')
ax[2].imshow(A, cmap='gray')
plt.show()
print(A_binary_with_I.shape, A_binary.shape, A.shape)
import numpy as np
def edge2mat(link, num_node):
A = np.zeros((num_node, num_node))
for i, j in link:
A[j, i] = 1
return A
def normalize_digraph(A):
Dl = np.sum(A, 0)
h, w = A.shape
Dn = np.zeros((w, w))
for i in range(w):
if Dl[i] > 0:
Dn[i, i] = Dl[i] ** (-1)
AD = np.dot(A, Dn)
return AD
def get_spatial_graph(num_node, self_link, inward, outward):
I = edge2mat(self_link, num_node)
In = normalize_digraph(edge2mat(inward, num_node))
Out = normalize_digraph(edge2mat(outward, num_node))
A = np.stack((I, In, Out))
return A
def k_adjacency(A, k, with_self=False, self_factor=1):
assert isinstance(A, np.ndarray)
I = np.eye(len(A), dtype=A.dtype)
if k == 0:
return I
Ak = np.minimum(np.linalg.matrix_power(A + I, k), 1) \
- np.minimum(np.linalg.matrix_power(A + I, k - 1), 1)
if with_self:
Ak += (self_factor * I)
return Ak
def normalize_adjacency_matrix(A):
node_degrees = A.sum(-1)
degs_inv_sqrt = np.power(node_degrees, -0.5)
norm_degs_matrix = np.eye(len(node_degrees)) * degs_inv_sqrt
return (norm_degs_matrix @ A @ norm_degs_matrix).astype(np.float32)
def get_adjacency_matrix(edges, num_nodes):
A = np.zeros((num_nodes, num_nodes), dtype=np.float32)
for edge in edges:
A[edge] = 1.
return A
\ No newline at end of file
from .st_gcn_aaai18 import ST_GCN_18
# from .hrnet import HRNet
\ No newline at end of file
# # ------------------------------------------------------------------------------
# # Copyright (c) Microsoft
# # Licensed under the MIT License.
# # Written by Bin Xiao (Bin.Xiao@microsoft.com)
# # ------------------------------------------------------------------------------
# import logging
#
# import torch.nn as nn
# from mmcv.cnn import constant_init, kaiming_init
# from mmcv.runner import load_checkpoint
# from torch.nn.modules.batchnorm import _BatchNorm
#
# BN_MOMENTUM = 0.1
#
# def conv3x3(in_planes, out_planes, stride=1):
# """3x3 convolution with padding"""
# return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
# padding=1, bias=False)
#
#
# class BasicBlock(nn.Module):
# expansion = 1
#
# def __init__(self, inplanes, planes, stride=1, downsample=None):
# super(BasicBlock, self).__init__()
# self.conv1 = conv3x3(inplanes, planes, stride)
# self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
# self.relu = nn.ReLU(inplace=True)
# self.conv2 = conv3x3(planes, planes)
# self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
# self.downsample = downsample
# self.stride = stride
#
# def forward(self, x):
# residual = x
#
# out = self.conv1(x)
# out = self.bn1(out)
# out = self.relu(out)
#
# out = self.conv2(out)
# out = self.bn2(out)
#
# if self.downsample is not None:
# residual = self.downsample(x)
#
# out += residual
# out = self.relu(out)
#
# return out
#
#
# class Bottleneck(nn.Module):
# expansion = 4
#
# def __init__(self, inplanes, planes, stride=1, downsample=None):
# super(Bottleneck, self).__init__()
# self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
# self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
# self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
# padding=1, bias=False)
# self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
# self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
# bias=False)
# self.bn3 = nn.BatchNorm2d(planes * self.expansion,
# momentum=BN_MOMENTUM)
# self.relu = nn.ReLU(inplace=True)
# self.downsample = downsample
# self.stride = stride
#
# def forward(self, x):
# residual = x
#
# out = self.conv1(x)
# out = self.bn1(out)
# out = self.relu(out)
#
# out = self.conv2(out)
# out = self.bn2(out)
# out = self.relu(out)
#
# out = self.conv3(out)
# out = self.bn3(out)
#
# if self.downsample is not None:
# residual = self.downsample(x)
#
# out += residual
# out = self.relu(out)
#
# return out
#
# class HRModule(nn.Module):
# def __init__(self, num_branches, blocks, num_blocks, num_inchannels,
# num_channels, fuse_method, multi_scale_output=True):
# super(HRModule, self).__init__()
# self._check_branches(
# num_branches, num_blocks, num_inchannels, num_channels)
#
# self.num_inchannels = num_inchannels
# self.fuse_method = fuse_method
# self.num_branches = num_branches
#
# self.multi_scale_output = multi_scale_output
#
# self.branches = self._make_branches(
# num_branches, blocks, num_blocks, num_channels)
# self.fuse_layers = self._make_fuse_layers()
# self.relu = nn.ReLU(True)
#
# def _check_branches(self, num_branches, num_blocks,
# num_inchannels, num_channels):
# if num_branches != len(num_blocks):
# error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format(
# num_branches, len(num_blocks))
# raise ValueError(error_msg)
#
# if num_branches != len(num_channels):
# error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format(
# num_branches, len(num_channels))
# raise ValueError(error_msg)
#
# if num_branches != len(num_inchannels):
# error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format(
# num_branches, len(num_inchannels))
# raise ValueError(error_msg)
#
# def _make_one_branch(self, branch_index, block, num_blocks, num_channels,
# stride=1):
# downsample = None
# if stride != 1 or \
# self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion:
# downsample = nn.Sequential(
# nn.Conv2d(
# self.num_inchannels[branch_index],
# num_channels[branch_index] * block.expansion,
# kernel_size=1, stride=stride, bias=False
# ),
# nn.BatchNorm2d(
# num_channels[branch_index] * block.expansion,
# momentum=BN_MOMENTUM
# ),
# )
#
# layers = []
# layers.append(
# block(
# self.num_inchannels[branch_index],
# num_channels[branch_index],
# stride,
# downsample
# )
# )
# self.num_inchannels[branch_index] = \
# num_channels[branch_index] * block.expansion
# for i in range(1, num_blocks[branch_index]):
# layers.append(
# block(
# self.num_inchannels[branch_index],
# num_channels[branch_index]
# )
# )
#
# return nn.Sequential(*layers)
#
# def _make_branches(self, num_branches, block, num_blocks, num_channels):
# branches = []
#
# for i in range(num_branches):
# branches.append(
# self._make_one_branch(i, block, num_blocks, num_channels)
# )
#
# return nn.ModuleList(branches)
#
# def _make_fuse_layers(self):
# if self.num_branches == 1:
# return None
#
# num_branches = self.num_branches
# num_inchannels = self.num_inchannels
# fuse_layers = []
# for i in range(num_branches if self.multi_scale_output else 1):
# fuse_layer = []
# for j in range(num_branches):
# if j > i:
# fuse_layer.append(
# nn.Sequential(
# nn.Conv2d(
# num_inchannels[j],
# num_inchannels[i],
# 1, 1, 0, bias=False
# ),
# nn.BatchNorm2d(num_inchannels[i]),
# nn.Upsample(scale_factor=2**(j-i), mode='nearest')
# )
# )
# elif j == i:
# fuse_layer.append(None)
# else:
# conv3x3s = []
# for k in range(i-j):
# if k == i - j - 1:
# num_outchannels_conv3x3 = num_inchannels[i]
# conv3x3s.append(
# nn.Sequential(
# nn.Conv2d(
# num_inchannels[j],
# num_outchannels_conv3x3,
# 3, 2, 1, bias=False
# ),
# nn.BatchNorm2d(num_outchannels_conv3x3)
# )
# )
# else:
# num_outchannels_conv3x3 = num_inchannels[j]
# conv3x3s.append(
# nn.Sequential(
# nn.Conv2d(
# num_inchannels[j],
# num_outchannels_conv3x3,
# 3, 2, 1, bias=False
# ),
# nn.BatchNorm2d(num_outchannels_conv3x3),
# nn.ReLU(True)
# )
# )
# fuse_layer.append(nn.Sequential(*conv3x3s))
# fuse_layers.append(nn.ModuleList(fuse_layer))
#
# return nn.ModuleList(fuse_layers)
#
# def get_num_inchannels(self):
# return self.num_inchannels
#
# def forward(self, x):
# if self.num_branches == 1:
# return [self.branches[0](x[0])]
#
# for i in range(self.num_branches):
# x[i] = self.branches[i](x[i])
#
# x_fuse = []
#
# for i in range(len(self.fuse_layers)):
# y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
# for j in range(1, self.num_branches):
# if i == j:
# y = y + x[j]
# else:
# y = y + self.fuse_layers[i][j](x[j])
# x_fuse.append(self.relu(y))
#
# return x_fuse
#
# class HRNet(nn.Module):
# blocks_dict = {
# 'BASIC' : BasicBlock,
# 'BOTTLENECK':Bottleneck
# }
#
# def __init__(self, extra, **kwargs):
# self.inplanes = 64
# self.extra = extra
# super(HRNet, self).__init__()
#
# # stem net
# self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1,
# bias=False)
# self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
# self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1,
# bias=False)
# self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
# self.relu = nn.ReLU(inplace=True)
# # stage1
# self.stage1_cfg = self.extra['stage1']
# num_channels = self.stage1_cfg['num_channels'][0]
# block_type = self.stage1_cfg['block']
# num_blocks = self.stage1_cfg['num_blocks'][0]
#
# block = self.blocks_dict[block_type]
# stage1_out_channels = num_channels * block.expansion
# self.layer1 = self._make_layer(block,
# num_channels,
# num_blocks
# )
#
# self.stage2_cfg = self.extra['stage2']
# num_channels = self.stage2_cfg['num_channels']
# block_type = self.stage2_cfg['block']
#
# block = self.blocks_dict[block_type]
# num_channels = [channel * block.expansion for channel in num_channels]
# self.transition1 = self._make_transition_layer([stage1_out_channels],
# num_channels)
# self.stage2, pre_stage_channels = self._make_stage(
# self.stage2_cfg, num_channels)
#
# # stage 3
#
# self.stage3_cfg = self.extra['stage3']
# num_channels = self.stage3_cfg['num_channels']
# block_type = self.stage3_cfg['block']
#
# block = self.blocks_dict[block_type]
# num_channels = [channel * block.expansion for channel in num_channels]
# self.transition2 = self._make_transition_layer(pre_stage_channels,
# num_channels)
# self.stage3, pre_stage_channels = self._make_stage(
# self.stage3_cfg, num_channels)
#
# # stage 4
# self.stage4_cfg = self.extra['stage4']
# num_channels = self.stage4_cfg['num_channels']
# block_type = self.stage4_cfg['block']
#
# block = self.blocks_dict[block_type]
# num_channels = [channel * block.expansion for channel in num_channels]
# self.transition3 = self._make_transition_layer(pre_stage_channels,
# num_channels)
# self.stage4, pre_stage_channels = self._make_stage(
# self.stage4_cfg, num_channels)
# self.init_weights()
#
#
# def _make_transition_layer(
# self,
# num_channels_pre_layer,
# num_channels_cur_layer):
# num_branches_cur = len(num_channels_cur_layer)
# num_branches_pre = len(num_channels_pre_layer)
#
# transition_layers = []
# for i in range(num_branches_cur):
# if i < num_branches_pre:
# if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
# transition_layers.append(
# nn.Sequential(
# nn.Conv2d(
# num_channels_pre_layer[i],
# num_channels_cur_layer[i],
# 3, 1, 1, bias=False
# ),
# nn.BatchNorm2d(num_channels_cur_layer[i]),
# nn.ReLU(inplace=True)
# )
# )
# else:
# transition_layers.append(None)
# else:
# conv3x3s = []
# for j in range(i+1-num_branches_pre):
# inchannels = num_channels_pre_layer[-1]
# outchannels = num_channels_cur_layer[i] \
# if j == i-num_branches_pre else inchannels
# conv3x3s.append(
# nn.Sequential(
# nn.Conv2d(
# inchannels, outchannels, 3, 2, 1, bias=False
# ),
# nn.BatchNorm2d(outchannels),
# nn.ReLU(inplace=True)
# )
# )
# transition_layers.append(nn.Sequential(*conv3x3s))
#
# return nn.ModuleList(transition_layers)
#
# def _make_layer(self,
# block,
# planes,
# blocks,
# stride=1):
# downsample = None
# if stride != 1 or self.inplanes != planes * block.expansion:
# downsample = nn.Sequential(
# nn.Conv2d(
# self.inplanes, planes * block.expansion,
# kernel_size=1, stride=stride, bias=False
# ),
# nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
# )
#
# layers = []
# layers.append(block(self.inplanes, planes, stride, downsample))
# self.inplanes = planes * block.expansion
# for i in range(1, blocks):
# layers.append(block(self.inplanes, planes))
#
# return nn.Sequential(*layers)
#
# def _make_stage(self, layer_config, num_inchannels,
# multi_scale_output=True):
# num_modules = layer_config['num_modules']
# num_branches = layer_config['num_branches']
# num_blocks = layer_config['num_blocks']
# num_channels = layer_config['num_channels']
# block = self.blocks_dict[layer_config['block']]
# fuse_method = layer_config['fuse_method']
#
# modules = []
# for i in range(num_modules):
# # multi_scale_output is only used last module
# if not multi_scale_output and i == num_modules - 1:
# reset_multi_scale_output = False
# else:
# reset_multi_scale_output = True
#
# modules.append(
# HRModule(
# num_branches,
# block,
# num_blocks,
# num_inchannels,
# num_channels,
# fuse_method,
# reset_multi_scale_output
# )
# )
# num_inchannels = modules[-1].get_num_inchannels()
#
# return nn.Sequential(*modules), num_inchannels
#
# def forward(self, x):
# x = self.conv1(x)
# x = self.bn1(x)
# x = self.relu(x)
# x = self.conv2(x)
# x = self.bn2(x)
# x = self.relu(x)
# x = self.layer1(x)
#
# x_list = []
# for i in range(self.stage2_cfg['num_branches']):
# if self.transition1[i] is not None:
# x_list.append(self.transition1[i](x))
# else:
# x_list.append(x)
# y_list = self.stage2(x_list)
#
# x_list = []
# for i in range(self.stage3_cfg['num_branches']):
# if self.transition2[i] is not None:
# x_list.append(self.transition2[i](y_list[-1]))
# else:
# x_list.append(y_list[i])
# y_list = self.stage3(x_list)
#
# x_list = []
# for i in range(self.stage4_cfg['num_branches']):
# if self.transition3[i] is not None:
# x_list.append(self.transition3[i](y_list[-1]))
# else:
# x_list.append(y_list[i])
# y_list = self.stage4(x_list)
# return y_list
#
# def init_weights(self, pretrained=None):
# if isinstance(pretrained, str):
# logger = logging.getLogger()
# load_checkpoint(self, pretrained, strict=False, logger=logger)
# elif pretrained is None:
# for m in self.modules():
# if isinstance(m, nn.Conv2d):
# kaiming_init(m)
# elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
# constant_init(m, 1)
\ No newline at end of file
from .gconv_origin import ConvTemporalGraphical
from .graph import Graph
\ No newline at end of file
# The based unit of graph convolutional networks.
import torch
import torch.nn as nn
class GraphConvND(nn.Module):
def __init__(self, N, in_channels, out_channels, kernel_size, stride,
padding, dilation, groups, bias, padding_mode):
graph_kernel_size = kernel_size[0]
graph_stride = stride[0]
graph_padding = padding[0]
graph_dilation = dilation[0]
if graph_stride != 1 or graph_padding != 0 or graph_dilation != 1:
raise NotImplementedError
if N == 1:
conv_type = nn.Conv1d
self.einsum_func = 'nkcv,kvw->ncw'
elif N == 2:
conv_type = nn.Conv2d
self.einsum_func = 'nkcvx,kvw->ncwx'
elif N == 3:
conv_type = nn.Conv3d
self.einsum_func = 'nkcvxy,kvw->ncwxy'
self.out_channels = out_channels
self.graph_kernel_size = graph_kernel_size
self.conv = conv_type(in_channels,
out_channels * graph_kernel_size,
kernel_size=[1] + kernel_size[1:],
stride=[1] + stride[1:],
padding=[0] + padding[1:],
dilation=[1] + dilation[1:],
groups=groups,
bias=bias,
padding_mode=padding_mode)
def forward(self, x, graph):
# graph is an adjacency matrix
if graph.dim() == 2:
A, out_graph = self.normalize_adjacency_matrix(graph)
# graph is a weight matrix
elif graph.dim() == 3:
A, out_graph = graph, None
else:
raise ValueError('input[1].dim() should be 2 or 3.')
x = self.conv(x)
x = x.view((x.size(0), self.graph_kernel_size, self.out_channels) +
x.size()[2:])
x = torch.einsum(self.einsum_func, (x, A))
return x.contiguous(), out_graph
def normalize_adjacency_matrix(self, graph):
raise NotImplementedError
return None, graph
class GraphConv(GraphConvND):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=True,
padding_mode='zeros'):
super().__init__(1, in_channels, out_channels, kernel_size, stride,
padding, dilation, groups, bias, padding_mode)
class GraphConv2D(GraphConvND):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=(1, 1),
padding=(0, 0),
dilation=(1, 1),
groups=1,
bias=True,
padding_mode='zeros'):
super().__init__(2, in_channels, out_channels, kernel_size, stride,
padding, dilation, groups, bias, padding_mode)
class GraphConv3D(GraphConvND):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=(1, 1, 1),
padding=(0, 0, 0),
dilation=(1, 1, 1),
groups=1,
bias=True,
padding_mode='zeros'):
super().__init__(3, in_channels, out_channels, kernel_size, stride,
padding, dilation, groups, bias, padding_mode)
\ No newline at end of file
# The based unit of graph convolutional networks.
# This is the original implementation for ST-GCN papers.
import numpy as np
import torch
import torch.nn as nn
class ConvTemporalGraphical(nn.Module):
r"""The basic module for applying a graph convolution.
Args:
in_channels (int): Number of channels in the input sequence data
out_channels (int): Number of channels produced by the convolution
kernel_size (int): Size of the graph convolving kernel
t_kernel_size (int): Size of the temporal convolving kernel
t_stride (int, optional): Stride of the temporal convolution. Default: 1
t_padding (int, optional): Temporal zero-padding added to both sides of
the input. Default: 0
t_dilation (int, optional): Spacing between temporal kernel elements.
Default: 1
bias (bool, optional): If ``True``, adds a learnable bias to the output.
Default: ``True``
Shape:
- Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format
- Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
- Output[0]: Output graph sequence in :math:`(N, out_channels, T_{out}, V)` format
- Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format
where
:math:`N` is a batch size,
:math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
:math:`T_{in}/T_{out}` is a length of input/output sequence,
:math:`V` is the number of graph nodes.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
t_kernel_size=1,
t_stride=1,
t_padding=0,
t_dilation=1,
bias=True):
super().__init__()
self.kernel_size = kernel_size
self.conv = nn.Conv2d(in_channels,
out_channels * kernel_size,
kernel_size=(t_kernel_size, 1),
padding=(t_padding, 0),
stride=(t_stride, 1),
dilation=(t_dilation, 1),
bias=bias)
self.out_channels = out_channels
def forward(self, x, A):
assert A.size(0) == self.kernel_size
x = self.conv(x)
n, kc, t, v = x.size()
x = x.view(n, self.kernel_size, kc // self.kernel_size, t, v)
x = torch.einsum('nkctv,kvw->nctw', (x, A))
return x.contiguous(), A
class Gconv(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size):
if isinstance(kernel_size, int):
gcn_kernel_size = kernel_size
feature_dim = 0
if isinstance(kernel_size, list) or isinstance(kernel_size, tuple):
gcn_kernel_size = kernel_size[0]
cnn_kernel_size = [1] + kernel_size[1:]
feature_dim = len(kernel_size) - 1
else:
raise ValueError(
'The type of kernel_size should be int, list or tuple.')
if feature_dim == 1:
self.conv = nn.Conv1d(in_channels,
out_channels * gcn_kernel_size,
kernel_size=cnn_kernel_size)
elif feature_dim == 2:
pass
elif feature_dim == 3:
pass
elif feature_dim == 0:
pass
else:
raise ValueError(
'The length of kernel_size should be 1, 2, 3, or 4')
def forward(self, X, A):
pass
\ No newline at end of file
import numpy as np
class Graph():
""" The Graph to model the skeletons extracted by the openpose
Args:
strategy (string): must be one of the follow candidates
- uniform: Uniform Labeling
- distance: Distance Partitioning
- spatial: Spatial Configuration
For more information, please refer to the section 'Partition Strategies'
in our paper (https://arxiv.org/abs/1801.07455).
layout (string): must be one of the follow candidates
- openpose: Is consists of 18 joints. For more information, please
refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose#output
- ntu-rgb+d: Is consists of 25 joints. For more information, please
refer to https://github.com/shahroudy/NTURGB-D
max_hop (int): the maximal distance between two connected nodes
dilation (int): controls the spacing between the kernel points
"""
def __init__(self,
layout='openpose',
strategy='uniform',
max_hop=1,
dilation=1):
self.max_hop = max_hop
self.dilation = dilation
self.get_edge(layout)
#self_dis 0, edge_dis 1, not_connect_dis inf
self.hop_dis = get_hop_distance(self.num_node,
self.edge,
max_hop=max_hop)
self.get_adjacency(strategy)
def __str__(self):
return self.A
def get_edge(self, layout):
# edge is a list of [child, parent] paris
if layout == 'openpose':
self.num_node = 18
self_link = [(i, i) for i in range(self.num_node)]
neighbor_link = [(4, 3), (3, 2), (7, 6), (6, 5),
(13, 12), (12, 11), (10, 9), (9, 8), (11, 5),
(8, 2), (5, 1), (2, 1), (0, 1), (15, 0), (14, 0),
(17, 15), (16, 14)]
self.edge = self_link + neighbor_link
self.center = 1
elif layout == 'ntu-rgb+d':
self.num_node = 25
self_link = [(i, i) for i in range(self.num_node)]
neighbor_1base = [(1, 2), (2, 21), (3, 21),
(4, 3), (5, 21), (6, 5), (7, 6), (8, 7), (9, 21),
(10, 9), (11, 10), (12, 11), (13, 1), (14, 13),
(15, 14), (16, 15), (17, 1), (18, 17), (19, 18),
(20, 19), (22, 23), (23, 8), (24, 25), (25, 12)]
neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base]
self.edge = self_link + neighbor_link
self.center = 21 - 1
elif layout == 'ntu_edge':
self.num_node = 24
self_link = [(i, i) for i in range(self.num_node)]
neighbor_1base = [(1, 2), (3, 2), (4, 3), (5, 2), (6, 5), (7, 6),
(8, 7), (9, 2), (10, 9), (11, 10), (12, 11),
(13, 1), (14, 13), (15, 14), (16, 15), (17, 1),
(18, 17), (19, 18), (20, 19), (21, 22), (22, 8),
(23, 24), (24, 12)]
neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base]
self.edge = self_link + neighbor_link
self.center = 2
elif layout == 'coco':
self.num_node = 17
self_link = [(i, i) for i in range(self.num_node)]
neighbor_1base = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13],
[6, 12], [7, 13], [6, 7], [8, 6], [9, 7],
[10, 8], [11, 9], [2, 3], [2, 1], [3, 1], [4, 2],
[5, 3], [4, 6], [5, 7]]
neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base]
self.edge = self_link + neighbor_link
self.center = 0
# elif layout=='customer settings'
# pass
else:
raise ValueError("Do Not Exist This Layout.")
def get_adjacency(self, strategy):
valid_hop = range(0, self.max_hop + 1, self.dilation)
adjacency = np.zeros((self.num_node, self.num_node))
for hop in valid_hop:
adjacency[self.hop_dis == hop] = 1 #self and connect =1
normalize_adjacency = normalize_digraph(adjacency)
if strategy == 'uniform':
A = np.zeros((1, self.num_node, self.num_node))
A[0] = normalize_adjacency
self.A = A
elif strategy == 'distance':
A = np.zeros((len(valid_hop), self.num_node, self.num_node))
for i, hop in enumerate(valid_hop):
A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
hop]
self.A = A
elif strategy == 'spatial':
A = []
for hop in valid_hop:
a_root = np.zeros((self.num_node, self.num_node))
a_close = np.zeros((self.num_node, self.num_node))
a_further = np.zeros((self.num_node, self.num_node))
for i in range(self.num_node):
for j in range(self.num_node):
if self.hop_dis[j, i] == hop:
if self.hop_dis[j, self.center] == self.hop_dis[
i, self.center]:
a_root[j, i] = normalize_adjacency[j, i]
elif self.hop_dis[j, self.center] > self.hop_dis[
i, self.center]:
a_close[j, i] = normalize_adjacency[j, i]
else:
a_further[j, i] = normalize_adjacency[j, i]
if hop == 0:
A.append(a_root)
else:
A.append(a_root + a_close)
A.append(a_further)
A = np.stack(A)
self.A = A
else:
raise ValueError("Do Not Exist This Strategy")
def get_hop_distance(num_node, edge, max_hop=1):
A = np.zeros((num_node, num_node))
for i, j in edge:
A[j, i] = 1
A[i, j] = 1
# compute hop steps
hop_dis = np.zeros((num_node, num_node)) + np.inf
transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
arrive_mat = (np.stack(transfer_mat) > 0)
for d in range(max_hop, -1, -1):
hop_dis[arrive_mat[d]] = d
return hop_dis
def normalize_digraph(A):
Dl = np.sum(A, 0)
num_node = A.shape[0]
Dn = np.zeros((num_node, num_node))
for i in range(num_node):
if Dl[i] > 0:
Dn[i, i] = Dl[i]**(-1)
AD = np.dot(A, Dn)
return AD
def normalize_undigraph(A):
Dl = np.sum(A, 0)
num_node = A.shape[0]
Dn = np.zeros((num_node, num_node))
for i in range(num_node):
if Dl[i] > 0:
Dn[i, i] = Dl[i]**(-0.5)
DAD = np.dot(np.dot(Dn, A), Dn)
return DAD
\ No newline at end of file
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from mmskl.st_gcn import ConvTemporalGraphical, Graph
import numpy as np
def zero(x):
return 0
def iden(x):
return x
class ST_GCN_18(nn.Module):
r"""Spatial temporal graph convolutional networks.
Args:
in_channels (int): Number of channels in the input data
num_class (int): Number of classes for the classification task
graph_cfg (dict): The arguments for building the graph
edge_importance_weighting (bool): If ``True``, adds a learnable
importance weighting to the edges of the graph
**kwargs (optional): Other parameters for graph convolution units
Shape:
- Input: :math:`(N, in_channels, T_{in}, V_{in}, M_{in})`
- Output: :math:`(N, num_class)` where
:math:`N` is a batch size,
:math:`T_{in}` is a length of input sequence,
:math:`V_{in}` is the number of graph nodes,
:math:`M_{in}` is the number of instance in a frame.
"""
def __init__(self,
in_channels,
num_class,
graph_cfg,
edge_importance_weighting=True,
data_bn=True,
**kwargs):
super().__init__()
# load graph
self.graph = Graph(**graph_cfg)
A = torch.tensor(self.graph.A,
dtype=torch.float32,
requires_grad=False)
self.register_buffer('A', A)
# build networks
spatial_kernel_size = A.size(0)
temporal_kernel_size = 9
kernel_size = (temporal_kernel_size, spatial_kernel_size)
self.data_bn = nn.BatchNorm1d(in_channels *
A.size(1)) if data_bn else iden
kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}
# self.st_gcn_networks = nn.ModuleList((
# st_gcn_block(in_channels,
# 64,
# kernel_size,
# 1,
# residual=False,
# **kwargs0),
# st_gcn_block(64, 64, kernel_size, 2, **kwargs),
# st_gcn_block(64, 128, kernel_size, 2, **kwargs),
# st_gcn_block(128, 128, kernel_size, 1, **kwargs),
# st_gcn_block(128, 256, kernel_size, 2, **kwargs),
# st_gcn_block(256, 256, kernel_size, 2, **kwargs),
# st_gcn_block(256, 64, kernel_size, 1, **kwargs),
# ))
# self.st_gcn_networks = nn.ModuleList((
# st_gcn_block(in_channels,
# 64,
# kernel_size,
# 1,
# residual=False,
# **kwargs0),
# st_gcn_block(64, 64, kernel_size, 1, **kwargs),
# st_gcn_block(64, 128, kernel_size, 2, **kwargs),
# st_gcn_block(128, 128, kernel_size, 1, **kwargs),
# st_gcn_block(128, 256, kernel_size, 2, **kwargs),
# st_gcn_block(256, 256, kernel_size, 1, **kwargs),
# st_gcn_block(256, 64, kernel_size, 1, **kwargs),
# ))
self.st_gcn_networks = nn.ModuleList((
st_gcn_block(in_channels,
64,
kernel_size,
1,
residual=False,
**kwargs0),
st_gcn_block(64, 64, kernel_size, 1, **kwargs),
st_gcn_block(64, 64, kernel_size, 1, **kwargs),
st_gcn_block(64, 64, kernel_size, 1, **kwargs),
st_gcn_block(64, 128, kernel_size, 2, **kwargs),
st_gcn_block(128, 128, kernel_size, 1, **kwargs),
st_gcn_block(128, 128, kernel_size, 1, **kwargs),
st_gcn_block(128, 256, kernel_size, 2, **kwargs),
st_gcn_block(256, 256, kernel_size, 1, **kwargs),
st_gcn_block(256, 256, kernel_size, 1, **kwargs),
))
# initialize parameters for edge importance weighting
if edge_importance_weighting:
self.edge_importance = nn.ParameterList([
nn.Parameter(torch.ones(self.A.size()))
for i in self.st_gcn_networks
])
else:
self.edge_importance = [1] * len(self.st_gcn_networks)
# fcn for prediction
# self.fcn = nn.Conv2d(256, num_class, kernel_size=1)
def forward(self, x):
# data normalization
N, C, T, V, M = x.size()
x = x.permute(0, 4, 3, 1, 2).contiguous()
x = x.view(N * M, V * C, T)
x = self.data_bn(x)
x = x.view(N, M, V, C, T)
x = x.permute(0, 1, 3, 4, 2).contiguous()
x = x.view(N * M, C, T, V)
# forward
for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
x, _ = gcn(x, self.A * importance)
#N channel 50 25
# global pooling x.size()[2:] = (300, 25)
# x = F.avg_pool2d(x, x.size()[2:])
NM, C, T, V = x.size()
x = x.view(N, M, -1, T, V).mean(dim=1)
# prediction
# x = self.fcn(x)
# x = x.view(x.size(0), -1)
return x
def extract_feature(self, x):
# data normalization
N, C, T, V, M = x.size()
x = x.permute(0, 4, 3, 1, 2).contiguous()
x = x.view(N * M, V * C, T)
x = self.data_bn(x)
x = x.view(N, M, V, C, T)
x = x.permute(0, 1, 3, 4, 2).contiguous()
x = x.view(N * M, C, T, V)
# forwad
for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
x, _ = gcn(x, self.A * importance)
_, c, t, v = x.size()
feature = x.view(N, M, c, t, v).permute(0, 2, 3, 4, 1)
# prediction
x = self.fcn(x)
output = x.view(N, M, -1, t, v).permute(0, 2, 3, 4, 1)
return output, feature
class st_gcn_block(nn.Module):
r"""Applies a spatial temporal graph convolution over an input graph sequence.
Args:
in_channels (int): Number of channels in the input sequence data
out_channels (int): Number of channels produced by the convolution
kernel_size (tuple): Size of the temporal convolving kernel and graph convolving kernel
stride (int, optional): Stride of the temporal convolution. Default: 1
dropout (int, optional): Dropout rate of the final output. Default: 0
residual (bool, optional): If ``True``, applies a residual mechanism. Default: ``True``
Shape:
- Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format
- Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
- Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format
- Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format
where
:math:`N` is a batch size,
:math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
:math:`T_{in}/T_{out}` is a length of input/output sequence,
:math:`V` is the number of graph nodes.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
dropout=0,
residual=True):
super().__init__()
assert len(kernel_size) == 2
assert kernel_size[0] % 2 == 1
padding = ((kernel_size[0] - 1) // 2, 0)
self.gcn = ConvTemporalGraphical(in_channels, out_channels,
kernel_size[1])
self.tcn = nn.Sequential(
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(
out_channels,
out_channels,
(kernel_size[0], 1),
(stride, 1),
padding,
),
nn.BatchNorm2d(out_channels),
nn.Dropout(dropout, inplace=True),
)
if not residual:
self.residual = zero
elif (in_channels == out_channels) and (stride == 1):
self.residual = iden
else:
self.residual = nn.Sequential(
nn.Conv2d(in_channels,
out_channels,
kernel_size=1,
stride=(stride, 1)),
nn.BatchNorm2d(out_channels),
)
self.relu = nn.ReLU(inplace=True)
self.out_channels = out_channels
self.in_channels = in_channels
def forward(self, x, A):
res = self.residual(x)
x, A = self.gcn(x, A)
x = self.tcn(x) + res
return self.relu(x), A
# coding=utf-8
from __future__ import print_function
import torch.utils.data as data
from PIL import Image
import numpy as np
import shutil
import errno
import torch
import os
import pickle
import random
import gl
'''
Inspired by https://github.com/pytorch/vision/pull/46
'''
IMG_CACHE = {}
class NTU_RGBD_Dataset(data.Dataset):
def __init__(self, mode='train', data_list=None, debug=False, extract_frame=1, transform=None, target_transform=None):
'''
The items are (filename,category). The index of all the categories can be found in self.idx_classes
Args:
- root: the directory where the dataset will be stored
- transform: how to transform the input
- target_transform: how to transform the target
'''
super(NTU_RGBD_Dataset, self).__init__()
self.transform = transform
self.target_transform = target_transform
if gl.dataset == 'ntu120_30':
path="********************************to be specified********************************"
segment = 30
print('data_path :{}'.format(path))
if mode == 'train':
data_path = os.path.join(path, 'train_data.npy')
label_path = os.path.join(path, 'train_label.npy')
num_frame = os.path.join(path, 'train_frame.npy')
elif mode == 'val':
data_path = os.path.join(path, 'val_data.npy')
label_path = os.path.join(path, 'val_label.npy')
num_frame = os.path.join(path, 'val_frame.npy')
else:
data_path = os.path.join(path, 'test_data.npy')
label_path = os.path.join(path, 'test_label.npy')
num_frame = os.path.join(path, 'test_frame.npy')
self.data, self.label, self.num_frame = np.load(data_path), np.load(label_path), np.load(num_frame)
# print('min = ', np.min(self.data), ' max = ', np.max(self.data))
if debug:
data_len = len(self.label)
data_len = int(0.1 * data_len)
self.label = self.label[0:data_len]
self.data = self.data[0:data_len]
self.num_frame = self.num_frame[0:data_len]
if extract_frame == 1:
self.data = self.extract_frame(self.data, self.num_frame, segment)
print('sample_num in {}'.format(mode), len(self.label))
n_classes = len(np.unique(self.label))
print('n_class', n_classes)
def __getitem__(self, idx):
x = self.data[idx]
if self.transform:
x = self.transform(x)
return x, self.label[idx]
def __len__(self):
return len(self.label)
def extract_frame(self, x, num_frame, segment):
n, c, t, v, m = x.shape
assert n == len(num_frame)
num_frame = np.array(num_frame)
step = num_frame // segment
new_x = []
for i in range(n):
if num_frame[i] < segment:
new_x.append(np.expand_dims(x[i, :, 0:segment, :, :], 0).reshape(1, c, segment, v, m))
continue
idx = [random.randint(j * step[i], (j + 1) * step[i] - 1) for j in range(segment)]
new_x.append(np.expand_dims(x[i, :, idx, :, :], 0).reshape(1, c, segment, v, m))
new_x = np.concatenate(new_x, 0)
return new_x
# coding=utf-8
import os
import argparse
def get_parser():
parser = argparse.ArgumentParser()
parser.add_argument('-root', '--dataset',
type=str,
help='type of dataset',
default='ntu120_30')
parser.add_argument('-mode', '--mode',
type=str,
help='mode',
default='train')
parser.add_argument('-reg', '--reg_rate',
type=float,
help='reg',
default=0.1)
parser.add_argument('-reg_thred', '--thred',
type=int,
help='threshold',
default=3)
parser.add_argument('-gama', '--gamma',
type=float,
help='reg',
default=0.01)
parser.add_argument('-dbg', '--debug',
type=int,
help='debug to save x and sim_tenor',
default=0)
parser.add_argument('-model', '--model',
type=int,
help='use or not best model',
default=0)
parser.add_argument('-backbone', '--backbone',
type=str,
help='backbone type st_gcn, 2s_AGCN, ms_g3d',
default='stgcn')
parser.add_argument('-extrf', '--extract_frame',
type=int,
help='is or not extract frame',
default=1)
parser.add_argument('-exp', '--experiment_root',
type=str,
help='root where to store models, losses and accuracies',
default='test')
parser.add_argument('-d', '--device',
type=int,
help='GPU device',
default=0)
parser.add_argument('-dt', '--dtw',
type=int,
help='if using dtw',
default=1)
parser.add_argument('-nep', '--epochs',
type=int,
help='number of epochs to train for',
default=100)
parser.add_argument('-lr', '--learning_rate',
type=float,
help='learning rate for the model, default=0.001',
default=0.001)
parser.add_argument('-lrf', '--lr_flag',
type=str,
help='lr_scheduler type',
default='reduceLR')
parser.add_argument('-lrS', '--lr_scheduler_step',
type=int,
help='StepLR learning rate scheduler step, default=20',
default=20)
parser.add_argument('-lrG', '--lr_scheduler_gamma',
type=float,
help='StepLR learning rate scheduler gamma, default=0.5',
default=0.5)
parser.add_argument('-its', '--train_iterations',
type=int,
help='number of episodes per epoch, default=100',
default=1000)
parser.add_argument('-cTr', '--classes_per_it_tr',
type=int,
help='number of random classes per episode for training, default=60',
default=5)
parser.add_argument('-nsTr', '--num_support_tr',
type=int,
help='number of samples per class to use as support for training, default=5',
default=1)
parser.add_argument('-nqTr', '--num_query_tr',
type=int,
help='number of samples per class to use as query for training, default=5',
default=10)
parser.add_argument('-test_its', '--test_iterations',
type=int,
help='number of episodes per epoch, default=100',
default=500)
parser.add_argument('-cVa', '--classes_per_it_val',
type=int,
help='number of random classes per episode for validation, default=5',
default=5)
parser.add_argument('-nsVa', '--num_support_val',
type=int,
help='number of samples per class to use as support for validation, default=5',
default=1)
parser.add_argument('-nqVa', '--num_query_val',
type=int,
help='number of samples per class to use as query for validation, default=15',
default=10)
parser.add_argument('-seed', '--manual_seed',
type=int,
help='input for the manual seeds initializations',
default=7)
parser.add_argument('--cuda',
action='store_true',
help='enables cuda')
parser.add_argument('--SA',
type=int,
help='input for the manual seeds initializations',
default=0)
return parser
import time
import torch.nn as nn
import torch
import numpy as np
from mmskl.st_gcn_aaai18 import ST_GCN_18
from utils import get_support_query_data, extract_k_segement, compute_similarity, euclidean_dist, euclidean_distance
from torch.nn import functional as F
import gl
from soft_dtw import SoftDTW
from cross_attention import CrossAttention
class ProtoNet(nn.Module):
def __init__(self, opt):
super(ProtoNet, self).__init__()
if 'ntu' in gl.dataset:
node = 25
ms_graph = 'graph.ntu_rgb_d.AdjMatrixGraph'
sh_grpah = 'shift_gcn_graph.ntu_rgb_d.Graph'
st_graph = {'layout': 'ntu-rgb+d', 'strategy': 'spatial'}
elif gl.dataset == 'kinetics':
node = 18
ms_graph = 'graph.kinetics.AdjMatrixGraph'
sh_grpah = 'shift_gcn_graph.kinetics.Graph'
st_graph = {'layout': 'openpose', 'strategy': 'spatial'}
else:
ms_graph = None
sh_grpah = None
st_graph = None
node = 0
self.model = ST_GCN_18(
in_channels=3,
num_class=60,
dropout=0.1,
edge_importance_weighting=False,
graph_cfg=st_graph
)
self.out_channel = 256
if gl.SA == 1:
self.attention_x = CrossAttention(num_attention_heads=1, input_size=self.out_channel, hidden_size=self.out_channel, hidden_dropout_prob=0.2)
self.attention_y = CrossAttention(num_attention_heads=1, input_size=self.out_channel, hidden_size=self.out_channel, hidden_dropout_prob=0.2)
else:
self.attention_x = None
self.attention_y = None
def loss(self, input, target, n_support, dtw):
# input is encoder by ST_GCN
n, c, t, v = input.size()
def supp_idxs(cc):
# FIXME when torch will support where as np
return torch.nonzero(target.eq(cc))[:n_support].squeeze(1)
# FIXME when torch.unique will be available on cuda too
classes = torch.unique(target)
n_class = len(classes)
# FIXME when torch will support where as np
# assuming n_query, n_target constants
n_query = target.eq(classes[0].item()).sum().item() - n_support
support_idxs = list(map(supp_idxs, classes))
z_proto = torch.stack([input[idx_list] for idx_list in support_idxs]).view(-1, c, t, v)
# FIXME when torch will support where as np
query_idxs = torch.stack(list(map(lambda c: torch.nonzero(target.eq(c))[n_support:], classes))).view(-1)
zq = input[query_idxs.long()]
z_proto = z_proto.view(n_class, n_support, c, t, v).mean(1) # n, c, t, v
if dtw > 0:
dist, reg_loss = self.dtw_loss(zq, z_proto)
else:
#zq, z_proto = F.avg_pool2d(zq, zq.size()[2:]).view(n_class * n_query, c), F.avg_pool2d(z_proto, z_proto.size()[2:]).view(n_class, c)
zq = zq.view(n_class * n_query, -1)
z_proto = z_proto.view(n_class, -1)
dist = euclidean_dist(zq, z_proto)
reg_loss = torch.tensor(0).float().to(gl.device)
log_p_y = F.log_softmax(-dist, dim=1).view(n_class, n_query, -1)
target_inds = torch.arange(0, n_class).to(gl.device)
target_inds = target_inds.view(n_class, 1, 1)
target_inds = target_inds.expand(n_class, n_query, 1).long()
loss_val = -log_p_y.gather(2, target_inds).squeeze().view(-1).mean()
_, y_hat = log_p_y.max(2)
acc_val = y_hat.eq(target_inds.squeeze()).float().mean()
if gl.reg_rate > 0:
loss_val += reg_loss
return loss_val, acc_val, reg_loss
def dtw_loss(self, zq, z_proto):
if self.attention_x != None:
zq = zq.permute(0, 2, 3, 1).contiguous() # n, t, v, c
z_proto = z_proto.permute(0, 2, 3, 1).contiguous()
dist = self.attention_dtw_dist(zq, z_proto)
else:
z_proto = z_proto.permute(0, 2, 3, 1).contiguous()
zq = zq.permute(0, 2, 3, 1).contiguous()
dist = self.dtw_dist(zq, z_proto)
reg_loss = torch.tensor(0).float().to(gl.device)
if gl.reg_rate > 0:
reg_loss = self.svd_reg_spatial(z_proto) + self.svd_reg_spatial(zq)
rate = gl.reg_rate
reg_loss = reg_loss * rate
return dist, reg_loss
def attention_dtw_dist(self, x, y):
'''
:param x: [n, t, c] z_query
:param y: [m, t, c] z_proto
:return: [n, m]
'''
n, t, v, c = x.size()
m, _, _, _ = y.size()
x = x.unsqueeze(1).expand(n, m, t, v, c).reshape(n * m, t, v, c)
y = y.unsqueeze(0).expand(n, m, t, v, c).reshape(n * m, t, v, c)
sdtw = SoftDTW(gamma=gl.gamma, normalize=False, attention=self.attention_x, attention_y=self.attention_y)
loss = sdtw(x, y)
return loss.view(n, m)
def dtw_dist(self, x, y):
if len(x.size()) == 4:
n, t, v, c = x.size()
x = x.view(n, t, v * c)
y = y.view(-1, t, v * c)
n, t, c = x.size()
m, _, _ = y.size()
x = x.unsqueeze(1).expand(n, m, t, c).reshape(n * m, t, c)
y = y.unsqueeze(0).expand(n, m, t, c).reshape(n * m, t, c)
sdtw = SoftDTW(gamma=gl.gamma, normalize=False, attention=self.attention_x, attention_y=self.attention_y)
loss = sdtw(x, y)
return loss.view(n, m)
def svd_reg_spatial(self, x):
if len(x.size()) == 4:
n, t, v, c = x.size()
x = x.view(-1,v,c)
loss = torch.tensor(0).float().to(gl.device)
for i in range(x.size()[0]):
transpose_X = x[i]
# fast version
softmax_tgt = torch.softmax((transpose_X - torch.max(transpose_X)), dim=1)
list_svd, _ = torch.sort(torch.sqrt(torch.sum(torch.pow(softmax_tgt, 2), dim=0)), descending=True)
method_loss = -torch.mean(list_svd[:min(softmax_tgt.shape[0], softmax_tgt.shape[1])])
loss += method_loss
return loss / x.size()[0]
def idm_reg(self, x):
n, t, c = x.size()
reg_loss = torch.tensor(0).float().to(gl.device)
thred = 5
margin = 2
weight, inverse_weight = self.get_W(x, thred)
for i in range(n):
dist = euclidean_dist(x[i, :, :], x[i, :, :]) # t * t
inverse_dist = torch.max(torch.zeros(t, t).to(gl.device), margin - dist).to(gl.device)
reg_loss += (inverse_dist * inverse_weight + dist * weight).sum()
return reg_loss / n
def forward(self, x):
x = self.model(x)
return x
# coding=utf-8
import numpy as np
import torch
class PrototypicalBatchSampler(object):
'''
PrototypicalBatchSampler: yield a batch of indexes at each iteration.
Indexes are calculated by keeping in account 'classes_per_it' and 'num_samples',
In fact at every iteration the batch indexes will refer to 'num_support' + 'num_query' samples
for 'classes_per_it' random classes.
__len__ returns the number of episodes per epoch (same as 'self.iterations').
'''
def __init__(self, labels, classes_per_it, num_samples, iterations):
'''
Initialize the PrototypicalBatchSampler object
Args:
- labels: an iterable containing all the labels for the current dataset
samples indexes will be infered from this iterable.
- classes_per_it: number of random classes for each iteration
- num_samples: number of samples for each iteration for each class (support + query)
- iterations: number of iterations (episodes) per epoch
'''
super(PrototypicalBatchSampler, self).__init__()
self.labels = labels
# print(labels,len(labels))
self.classes_per_it = classes_per_it
self.sample_per_class = num_samples
self.iterations = iterations
self.classes, self.counts = np.unique(self.labels, return_counts=True)
self.classes = torch.LongTensor(self.classes)
# create a matrix, indexes, of dim: classes X max(elements per class)
# fill it with nans
# for every class c, fill the relative row with the indices samples belonging to c
# in numel_per_class we store the number of samples for each class/row
self.idxs = range(len(self.labels))
self.indexes = np.empty((len(self.classes), max(self.counts)), dtype=int) * np.nan
self.indexes = torch.Tensor(self.indexes)
self.numel_per_class = torch.zeros_like(self.classes)
for idx, label in enumerate(self.labels):
# print((self.classes == label).numpy().astype(int))
label_idx = np.argwhere((self.classes == label).numpy().astype(int)).item()
# print(label_idx)
self.indexes[label_idx, np.where(np.isnan(self.indexes[label_idx]))[0][0]] = idx
self.numel_per_class[label_idx] += 1
def __iter__(self):
'''
yield a batch of indexes
'''
spc = self.sample_per_class
cpi = self.classes_per_it
for it in range(self.iterations):
batch_size = spc * cpi
batch = torch.LongTensor(batch_size)
c_idxs = torch.randperm(len(self.classes))[:cpi]
for i, c in enumerate(self.classes[c_idxs]):
s = slice(i * spc, (i + 1) * spc)
# FIXME when torch.argwhere will exists
label_idx = torch.arange(len(self.classes)).long()[self.classes == c].item()
sample_idxs = torch.randperm(self.numel_per_class[label_idx])[:spc]
batch[s] = self.indexes[label_idx][sample_idxs]
batch = batch[torch.randperm(len(batch))]
yield batch
def __len__(self):
'''
returns the number of iterations (episodes) per epoch
'''
return self.iterations
# coding=utf-8
import torch
from torch.nn import functional as F
from torch.nn.modules import Module
class PrototypicalLoss(Module):
'''
Loss class deriving from Module for the prototypical loss function defined below
'''
def __init__(self, n_support):
super(PrototypicalLoss, self).__init__()
self.n_support = n_support
def forward(self, input, target):
return prototypical_loss(input, target, self.n_support)
def euclidean_dist(x, y):
'''
Compute euclidean distance between two tensors
'''
# x: N x D
# y: M x D
n = x.size(0)
m = y.size(0)
d = x.size(1)
if d != y.size(1):
raise Exception
x = x.unsqueeze(1).expand(n, m, d)
y = y.unsqueeze(0).expand(n, m, d)
return torch.pow(x - y, 2).sum(2)
def prototypical_loss(input, target, n_support):
'''
Inspired by https://github.com/jakesnell/prototypical-networks/blob/master/protonets/models/few_shot.py
Compute the barycentres by averaging the features of n_support
samples for each class in target, computes then the distances from each
samples' features to each one of the barycentres, computes the
log_probability for each n_query samples for each one of the current
classes, of appartaining to a class c, loss and accuracy are then computed
and returned
Args:
- input: the model output for a batch of samples
- target: ground truth for the above batch of samples
- n_support: number of samples to keep in account when computing
barycentres, for each one of the current classes
'''
target_cpu = target.to('cpu')
input_cpu = input.to('cpu')
def supp_idxs(c):
# FIXME when torch will support where as np
return torch.nonzero(target_cpu.eq(c), as_tuple=False)[:n_support].squeeze(1)
# FIXME when torch.unique will be available on cuda too
classes = torch.unique(target_cpu)
n_classes = len(classes)
# FIXME when torch will support where as np
# assuming n_query, n_target constants
n_query = target_cpu.eq(classes[0].item()).sum().item() - n_support
support_idxs = list(map(supp_idxs, classes))
prototypes = torch.stack([input_cpu[idx_list].mean(0) for idx_list in support_idxs])
# FIXME when torch will support where as np
query_idxs = torch.stack(list(map(lambda c: torch.nonzero(target_cpu.eq(c), as_tuple=False)[n_support:], classes))).view(-1)
query_samples = input.to('cpu')[query_idxs]
dists = euclidean_dist(query_samples, prototypes)
log_p_y = F.log_softmax(-dists, dim=1).view(n_classes, n_query, -1)
target_inds = torch.arange(0, n_classes)
target_inds = target_inds.view(n_classes, 1, 1)
target_inds = target_inds.expand(n_classes, n_query, 1).long()
loss_val = -log_p_y.gather(2, target_inds).squeeze().view(-1).mean()
_, y_hat = log_p_y.max(2)
acc_val = y_hat.eq(target_inds.squeeze()).float().mean()
return loss_val, acc_val
import numpy as np
import torch
from numba import jit
from torch.autograd import Function
import gl
@jit(nopython = True)
def compute_softdtw(D, gamma):
B = D.shape[0]
N = D.shape[1]
M = D.shape[2]
R = np.ones((B, N + 2, M + 2)) * np.inf
R[:, 0, 0] = 0
for k in range(B):
for j in range(1, M + 1):
for i in range(1, N + 1):
r0 = -R[k, i - 1, j - 1] / gamma
r1 = -R[k, i - 1, j] / gamma
r2 = -R[k, i, j - 1] / gamma
rmax = max(max(r0, r1), r2)
rsum = np.exp(r0 - rmax) + np.exp(r1 - rmax) + np.exp(r2 - rmax)
softmin = - gamma * (np.log(rsum) + rmax)
R[k, i, j] = D[k, i - 1, j - 1] + softmin
return R
@jit(nopython = True)
def compute_softdtw_backward(D_, R, gamma):
B = D_.shape[0]
N = D_.shape[1]
M = D_.shape[2]
D = np.zeros((B, N + 2, M + 2))
E = np.zeros((B, N + 2, M + 2))
D[:, 1:N + 1, 1:M + 1] = D_
E[:, -1, -1] = 1
R[:, : , -1] = -np.inf
R[:, -1, :] = -np.inf
R[:, -1, -1] = R[:, -2, -2]
for k in range(B):
for j in range(M, 0, -1):
for i in range(N, 0, -1):
a0 = (R[k, i + 1, j] - R[k, i, j] - D[k, i + 1, j]) / gamma
b0 = (R[k, i, j + 1] - R[k, i, j] - D[k, i, j + 1]) / gamma
c0 = (R[k, i + 1, j + 1] - R[k, i, j] - D[k, i + 1, j + 1]) / gamma
a = np.exp(a0)
b = np.exp(b0)
c = np.exp(c0)
E[k, i, j] = E[k, i + 1, j] * a + E[k, i, j + 1] * b + E[k, i + 1, j + 1] * c
return E[:, 1:N + 1, 1:M + 1]
class _SoftDTW(Function):
@staticmethod
def forward(ctx, D, gamma):
dev = D.device
dtype = D.dtype
gamma = torch.Tensor([gamma]).to(dev).type(dtype) # dtype fixed
D_ = D.detach().cpu().numpy()
gl.D_ = D_
g_ = gamma.item()
R = torch.Tensor(compute_softdtw(D_, g_)).to(dev).type(dtype)
gl.R_ = R.detach().cpu().numpy()
ctx.save_for_backward(D, R, gamma)
return R[:, -2, -2]
@staticmethod
def backward(ctx, grad_output):
dev = grad_output.device
dtype = grad_output.dtype
D, R, gamma = ctx.saved_tensors
D_ = D.detach().cpu().numpy()
R_ = R.detach().cpu().numpy()
g_ = gamma.item()
E = torch.Tensor(compute_softdtw_backward(D_, R_, g_)).to(dev).type(dtype)
return grad_output.view(-1, 1, 1).expand_as(E) * E, None
class SoftDTW(torch.nn.Module):
def __init__(self, gamma=1.0, normalize=False, attention=None, attention_y=None):
super(SoftDTW, self).__init__()
self.normalize = normalize
self.gamma = gamma
self.func_dtw = _SoftDTW.apply
self.attention = attention
self.attention_y = attention_y
if attention != None:
self.calc_matrix_func = self.attention_calc_distance_matrix
else:
self.calc_matrix_func = self.calc_distance_matrix
def attention_calc_distance_matrix(self, x, y):
n, t, v, c = x.size()
x = x.view(n * t, v, c)
y = y.view(n * t, v, c)
# print("x,y",x.shape,y.shape)
attention_x = self.attention(x, y)
attention_y = self.attention_y(y, x)
attention_x = attention_x.view(n, t, -1)
attention_y = attention_y.view(n, t, -1)
# attention_x = attention_x.unsqueeze(2).expand(n, t, t, -1)
# attention_y = attention_y.unsqueeze(1).expand(n, t, t, -1)
# dist = torch.pow(attention_x - attention_y, 2).sum(3)
return self.calc_distance_matrix(attention_x,attention_y)
def calc_distance_matrix(self, x, y):
n = x.size(1)
m = y.size(1)
d = x.size(2)
x = x.unsqueeze(2).expand(-1, n, m, d)
y = y.unsqueeze(1).expand(-1, n, m, d)
x = x.reshape(-1, d)
y = y.reshape(-1, d)
x = x / (x.norm(dim=1, keepdim=True) + 1e-8)
y = y / (y.norm(dim=1, keepdim=True) + 1e-8)
# e_cos=torch.matmul(x,y.transpose(0,1))
cos = x * y
e_cos = cos.sum(1)
# e_cos = torch.exp(sum_cos)
e_cos = e_cos.view(-1, n, m)
# dist = e_cos
# dist = torch.pow(x - y, 2).sum(3)
# print(1-e_cos)
return 1-e_cos
def forward(self, x, y):
assert len(x.shape) == len(y.shape)
squeeze = False
if len(x.shape) < 3:
x = x.unsqueeze(0)
y = y.unsqueeze(0)
squeeze = True
if self.normalize:
D_xx = self.calc_matrix_func(x, x)
out_xx = self.func_dtw(D_xx, self.gamma)
D_yy = self.calc_matrix_func(y, y)
out_yy = self.func_dtw(D_yy, self.gamma)
D_xy = self.calc_matrix_func(x, y)
out_xy = self.func_dtw(D_xy, self.gamma)
result = out_xy - 1/2 * (out_xx + out_yy) # distance
else:
D_xy = self.calc_matrix_func(x, y)
out_xy = self.func_dtw(D_xy, self.gamma)
result = out_xy # discrepancy
gl.iter += 1
import os
# save_dir = '{}/R_'.format(gl.experiment_root)
# if not os.path.exists(save_dir):
# os.mkdir(save_dir)
# save_dir_D = '{}/D_'.format(gl.experiment_root)
# if not os.path.exists(save_dir_D):
# os.mkdir(save_dir_D)
# if gl.iter % 100 == 0 and gl.mod == 'val':
# np.save(os.path.join(save_dir, 'epoch{}_iter_{}.npy'.format(gl.epoch, gl.iter)), gl.R_)
# np.save(os.path.join(save_dir_D, 'epoch{}_iter_{}.npy'.format(gl.epoch, gl.iter)), gl.D_)
# if gl.epoch == 0 and gl.iter <= 100:
# np.save(os.path.join(save_dir, 'epoch{}_iter_{}.npy'.format(gl.epoch, gl.iter)), gl.R_)
# np.save(os.path.join(save_dir_D, 'epoch{}_iter_{}.npy'.format(gl.epoch, gl.iter)), gl.D_)
return result.squeeze(0) if squeeze else result
# coding=utf-8
import random
from prototypical_batch_sampler import PrototypicalBatchSampler
from prototypical_loss import prototypical_loss as loss_fn
from nturgbd_dataset import NTU_RGBD_Dataset
from protonet import ProtoNet
from parser_util import get_parser
from utils import load_data, get_para_num, setup_seed,getAvaliableDevice
from tqdm import tqdm
import numpy as np
import torch
import pickle
import os
import time
import gl
import warnings
from utils import *
def init_seed(opt):
'''
Disable cudnn to maximize reproducibility
'''
torch.cuda.cudnn_enabled = False
np.random.seed(opt.manual_seed)
torch.manual_seed(opt.manual_seed)
torch.cuda.manual_seed(opt.manual_seed)
def init_dataset(opt, data_list, mode):
# print('not extract frame')
# opt.extract_frame = 0
debug = False
dataset = NTU_RGBD_Dataset(mode=mode, data_list=data_list, debug=debug, extract_frame=opt.extract_frame)
n_classes = len(np.unique(dataset.label))
if n_classes < opt.classes_per_it_tr or n_classes < opt.classes_per_it_val:
raise(Exception('There are not enough classes in the dataset in order ' +
'to satisfy the chosen classes_per_it. Decrease the ' +
'classes_per_it_{tr/val} option and try again.'))
return dataset
def init_sampler(opt, labels, mode):
if 'train' in mode:
classes_per_it = opt.classes_per_it_tr
num_samples = opt.num_support_tr + opt.num_query_tr
iters = opt.train_iterations
else:
classes_per_it = opt.classes_per_it_val
num_samples = opt.num_support_val + opt.num_query_val
iters = opt.test_iterations
return PrototypicalBatchSampler(labels=labels,
classes_per_it=classes_per_it,
num_samples=num_samples,
iterations=iters)
def init_dataloader(opt, data_list, mode):
dataset = init_dataset(opt, data_list, mode)
sampler = init_sampler(opt, dataset.label, mode)
dataloader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, num_workers=4)
return dataloader
def init_protonet(opt):
'''
Initialize the ProtoNet
'''
model = ProtoNet(opt).to(gl.device)
if opt.model == 1:
model_path = os.path.join(opt.experiment_root, 'best_model.pth')
# print('model_path', model_path)
model.load_state_dict(torch.load(model_path))
# print(get_para_num(model))
return model
def init_optim(opt, model):
'''
Initialize optimizer
'''
# optimizer = torch.optim.SGD(model.parameters(), lr=opt.learning_rate, momentum=0.9, weight_decay=5e-4, nesterov=True)
optimizer = torch.optim.Adam(params=model.parameters(), lr=opt.learning_rate, weight_decay=5e-4)
return optimizer
def init_lr_scheduler(opt, optim):
'''
Initialize the learning rate scheduler
'''
if opt.lr_flag == 'reduceLR':
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim, mode='min', factor=0.5, patience=10, verbose=True, min_lr=1e-5)
elif opt.lr_flag == 'stepLR':
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optim, gamma=opt.lr_scheduler_gamma,
step_size=opt.lr_scheduler_step)
return lr_scheduler
def save_list_to_file(path, thelist):
with open(path, 'w') as f:
for item in thelist:
f.write("%s\n" % item)
def cosine(x, str):
if str == 'not_encoder':
t_path = os.path.join(gl.experiment_root, 'origin_t')
n, c, t, v, m = x.size()
x = x.mean(4)
else :
t_path = os.path.join(gl.experiment_root, 't')
n, c, t, v = x.size()
for i in range(t - 1):
if not os.path.exists(t_path):
os.mkdir(t_path)
f_path = os.path.join(t_path, '{}_{}.txt'.format(i, i + 1))
t1, t2 = torch.transpose(x[0, :, i, :], 1, 0), torch.transpose(x[0, :, i + 1, :], 1, 0)
t1 = t1 / (t1.norm(dim=1, keepdim=True) + 1e-8)
t2 = t2 / (t2.norm(dim=1, keepdim=True) + 1e-8)
cos = torch.mm(t1, torch.transpose(t2, 1, 0))
# print(cos)
np.savetxt(f_path, cos.cpu().detach().numpy(), fmt='%.2f')
# print('--------------------')
t1, t2 = torch.transpose(x[0, :, 0, :], 1, 0), torch.transpose(x[0, :, t - 1, :], 1, 0)
t1 = t1 / (t1.norm(dim=1, keepdim=True) + 1e-8)
t2 = t2 / (t2.norm(dim=1, keepdim=True) + 1e-8)
cos = torch.mm(t1, torch.transpose(t2, 1, 0))
# print(cos)
f_path = os.path.join(t_path, '{}_{}.txt'.format(0, t - 1))
np.savetxt(f_path, cos.cpu().detach().numpy(), fmt='%.2f')
def train(opt, tr_dataloader, model, optim, lr_scheduler, val_dataloader=None, test_dataloader=None):
'''
Train the model with the prototypical learning algorithm
'''
import json
with open(os.path.join(opt.experiment_root, 'opt.json'), 'w') as f:
j = vars(opt)
json.dump(j, f)
f.write('\n')
if val_dataloader is None:
best_state = None
best_acc = 0
last_acc = 0
acc_reduce_num = 0
best_model_path = os.path.join(opt.experiment_root, 'best_model.pth')
last_model_path = os.path.join(opt.experiment_root, 'last_model.pth')
trace_file = os.path.join(opt.experiment_root, 'trace.txt')
start_epoch = 0
patience=0
for epoch in range(start_epoch, opt.epochs):
gl.epoch = epoch
gl.iter = 0
# print('=== Epoch: {} ==='.format(epoch))
tr_iter = iter(tr_dataloader)
model.train()
lr = opt.learning_rate
train_acc = []
reg_loss = []
train_loss = []
for batch in tqdm(tr_iter):
# for batch in tr_iter:
optim.zero_grad()
gl.mod = 'train'
x, y = batch
x, y = x.to(gl.device).float(), y.to(gl.device)
model_output = model(x)
loss, acc, reg = model.loss(model_output, y, opt.num_support_tr,opt.dtw)
train_loss.append(loss.item())
train_acc.append(acc.item())
reg_loss.append(reg.item())
loss.backward()
optim.step()
avg_loss = np.mean(train_loss)
avg_reg = np.mean(reg_loss)
avg_acc = np.mean(train_acc)
t_loss, t_acc = avg_loss, avg_acc
string = 'train loss: {}, classfier loss:{} reg loss: {}, train Acc: {}'.format(avg_loss, avg_loss - avg_reg, avg_reg, avg_acc)
if opt.lr_flag == 'reduceLR':
lr_scheduler.step(avg_loss)
elif opt.lr_flag == 'stepLR':
lr_scheduler.step()
lr = optim.state_dict()['param_groups'][0]['lr']
if val_dataloader is None:
continue
val_iter = iter(val_dataloader)
model.eval()
val_loss = []
val_acc = []
for batch in tqdm(val_iter):
# for batch in val_iter:
x, y = batch
x, y = x.to(gl.device).float(), y.to(gl.device)
gl.mod = 'val'
model_output = model(x)
loss, acc, reg = model.loss(model_output, target=y, n_support=opt.num_support_val,dtw=opt.dtw)
val_loss.append(loss.item())
val_acc.append(acc.item())
avg_loss = np.mean(val_loss)
avg_acc = np.mean(val_acc)
# if acc reduce 10 times, break
if last_acc == 0:
last_acc = avg_acc
else:
if last_acc >= avg_acc:
acc_reduce_num += 1
else:
acc_reduce_num = 0
last_acc = avg_acc
if acc_reduce_num >= 10:
print('acc already reduce more than 10 times!! end training...')
break
v_loss, v_acc = avg_loss, avg_acc
postfix = ' (Best)' if avg_acc >= best_acc else ' (Best: {})'.format(best_acc)
string_val = 'val loss: {}, val acc: {}{} lr:{}'.format(avg_loss, avg_acc, postfix, lr)
print(string + '\t' + string_val)
with open(trace_file, 'a') as f:
f.write(string + '\t' + string_val)
f.write('\n')
if avg_acc >= best_acc:
torch.save(model.state_dict(), best_model_path)
patience=0
best_acc = avg_acc
best_state = model.state_dict()
else :
patience+=1
if patience >40:
break
torch.save(model.state_dict(), last_model_path)
return best_state, best_acc
def test(opt, test_dataloader, model):
'''
Test the model trained with the prototypical learning algorithm
'''
print('testing model...')
avg_acc = list()
trace_file = os.path.join(opt.experiment_root, 'test.txt')
n_class_val, n_query_val = opt.classes_per_it_val, opt.num_query_val
for epoch in range(10):
# print('=== Epoch: {} ==='.format(epoch))
model.eval()
gl.epoch = epoch
test_iter = iter(test_dataloader)
for batch in test_iter:
x, y = batch
x, y = x.to(gl.device).float(), y.to(gl.device)
model_output = model(x)
_, acc, _ = model.loss(model_output, target=y, n_support=opt.num_support_val,dtw=opt.dtw)
avg_acc.append(acc.item())
# print('test avg_acc', np.mean(avg_acc))
avg_acc = np.mean(avg_acc)
with open(trace_file, 'a') as f:
f.write('test acc: {}'.format(avg_acc))
f.write('\n')
print('Test Acc: {}'.format(avg_acc))
return avg_acc
def eval(opt):
'''
Initialize everything and train
'''
options = get_parser().parse_args()
if torch.cuda.is_available() and not options.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda")
init_seed(options)
test_dataloader = init_dataset(options)[-1]
model = init_protonet(options)
model_path = os.path.join(opt.experiment_root, 'best_model.pth')
model.load_state_dict(torch.load(model_path))
test(opt=options,
test_dataloader=test_dataloader,
model=model)
def main():
'''
Initialize everything and train
'''
options = get_parser().parse_args()
options.experiment_root=os.path.join(options.experiment_root, "seed_"+str(str(options.manual_seed)),
"_dataset"+str(options.dataset),"_back"+str(options.backbone),"_reg"+str(options.reg_rate)+"_att"+str(options.SA)+"_dtw"+str(options.dtw))
options.cuda=True
options.device=str(1)
if options.debug == 1:
gl.debug = True
device = 'cuda:{}'.format(options.device) if torch.cuda.is_available() and options.cuda else 'cpu'
gl.device = device
# print("device",device)
gl.gamma = options.gamma
options.experiment_root = "../log/"+options.experiment_root
gl.experiment_root=options.experiment_root
gl.reg_rate = options.reg_rate
gl.threshold = options.thred
gl.backbone = options.backbone
gl.dataset = options.dataset
gl.SA = options.SA
if not os.path.exists(gl.experiment_root):
os.makedirs(gl.experiment_root)
if torch.cuda.is_available() and not options.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda")
init_seed(options)
setup_seed(options.manual_seed)
data_list = []
tr_dataloader = init_dataloader(options, data_list, 'train')
val_dataloader = init_dataloader(options, data_list, 'val')
test_dataloader = init_dataloader(options, data_list, 'test')
model = init_protonet(options)
optim = init_optim(options, model)
lr_scheduler = init_lr_scheduler(options, optim)
if options.mode == 'train':
res = train(opt=options,
tr_dataloader=tr_dataloader,
val_dataloader=val_dataloader,
test_dataloader=test_dataloader,
model=model,
optim=optim,
lr_scheduler=lr_scheduler)
best_state, best_acc = res
# print('Testing with last model..')
# test(opt=options,
# test_dataloader=test_dataloader,
# model=model)
model.load_state_dict(best_state)
model_path = os.path.join(options.experiment_root, 'best_model.pth')
model.load_state_dict(torch.load(model_path))
print('Testing with best model..')
test(opt=options,
test_dataloader=test_dataloader,
model=model)
elif options.mode == 'test':
print('Testing with best model..')
test(opt=options,
test_dataloader=
test_dataloader,
model=model)
if __name__ == '__main__':
main()
import pickle
import csv
import fcntl
import torch
import time
import os
import numpy as np
import random
import gl
from soft_dtw import SoftDTW
import pynvml,time
def getAvaliableDevice(gpu=[0],min_mem=24000,left=False):
# def getAvaliableDevice(gpu=[6],min_mem=10000,left=False):
"""
:param gpu:
:param min_mem:
:param left:
:return:
"""
pynvml.nvmlInit()
t=int(time.strftime("%H", time.localtime()))
if t>=23 or t <8:
left=False # do not leave any GPUs
#else:
#left=True
min_num=3
dic = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6:6, 7:7, -1: -1} # just for 207 server
ava_gpu = -1
while ava_gpu == -1:
avaliable=[]
for i in gpu:
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
if (meminfo.free / 1024 ** 2)>min_mem and utilization.gpu<10:
avaliable.append(dic[i])
if len(avaliable)==0 or (left and len(avaliable)<=1):
# if len(avaliable)==1:
# if avaliable[0] not in [4,5,6]:
# ava_gpu= -1
# time.sleep(5)
# continue
# else :
ava_gpu = -1
time.sleep(20)
continue
ava_gpu= avaliable[0]
return ava_gpu
def write_shared_file(file_name,content):
nowtime=time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
content[0]=nowtime+" "+content[0]
with open(file_name,'a+') as f:
fcntl.flock(f,fcntl.LOCK_EX)
f.writelines(content)
fcntl.flock(f,fcntl.LOCK_UN)
def write_csv_file(file_name,content):
nowtime=time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
content["time"]=nowtime
to_write_head = False
if not os.path.exists(file_name):
to_write_head=True
with open(file_name,'a+') as f:
writer=csv.DictWriter(f,content.keys())
fcntl.flock(f,fcntl.LOCK_EX)
if to_write_head:
writer.writeheader()
writer.writerow(content)
# for key, value in content.items:
# writer.writerow([key, value])
fcntl.flock(f,fcntl.LOCK_UN)
def get_para_num(net):
total_num = sum(p.numel() for p in net.parameters())
trainable_num = sum(p.numel() for p in net.parameters() if p.requires_grad)
return {'Total': total_num, 'Trainable': trainable_num}
def setup_seed(seed=0):
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
# np.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
def euclidean_dist(x, y):
# x: N x D
# y: M x D
n = x.size(0)
m = y.size(0)
d = x.size(1)
assert d == y.size(1)
x = x.unsqueeze(1).expand(n, m, d)
y = y.unsqueeze(0).expand(n, m, d)
return torch.pow(x - y, 2).sum(2)
def get_support_query_data(support, query, device):
'''
:param support:[n_class, c, v]
:param query: [n_class * n_query, c, v]
:return: sq: [n_class * (n_class * n_query) * 2, c, v]
'''
n_class, c, v = support.size()
all_query = query.size(0)
sum_matching_graph = n_class * all_query * 2
node_features = torch.zeros(sum_matching_graph, c, v).to(device)
idx, idx2= torch.arange(0, sum_matching_graph, 2).to(device), torch.arange(1, sum_matching_graph, 2).to(device)
node_features[idx] = query.unsqueeze(1).repeat(1, n_class, 1, 1).reshape(-1, c, v)
node_features[idx2] = support.unsqueeze(0).repeat(all_query, 1, 1, 1).reshape(-1, c, v)
node_features = node_features.permute(0, 2, 1).reshape(sum_matching_graph * v, c)
return node_features
def euclidean_distance(x, y):
"""This is the squared Euclidean distance."""
return torch.sum((x - y) ** 2, dim=-1)
def compute_similarity(x, y):
"""Compute the distance between x and y vectors.
The distance will be computed based on the training loss type.
Args:
config: a config dict.
x: [n_examples, feature_dim] float tensor.
y: [n_examples, feature_dim] float tensor.
Returns:
dist: [n_examples] float tensor.
Raises:
ValueError: if loss type is not supported.
"""
return -euclidean_distance(x, y)
def extract_k_segement(x, num_frame, segement):
n, c, t, v = x.size()
assert n == len(num_frame)
step = num_frame // segement
new_x = []
for i in range(n):
idx = [ random.randint(j * step[i], (j + 1) * step[i] - 1) for j in range(segement)]
new_x.append(x[i, :, idx, :].unsqueeze(0))
new_x = torch.cat(new_x, dim=0)
return new_x
def load_data(path, train_class_name, val_class_name, test_class_name):
data_path = os.path.join(path, 'train_data.npy')
label_path = os.path.join(path, 'train_label.pkl')
# num_frame_path = os.path.join(path, 'train_num_frame.npy')
num_class = np.zeros(125)
try:
with open(label_path) as f:
sample_name, label = pickle.load(f)
except:
# for pickle file from python2
with open(label_path, 'rb') as f:
sample_name, label = pickle.load(f, encoding='latin1')
# load data
data = np.load(data_path)
# num_frame = np.load(num_frame_path)
num_frame = np.ones(len(label)) * 300
train_data, val_data, test_data = [], [], []
train_label, val_label, test_label = [], [], []
train_num_frame, val_num_frame, test_num_frame = [], [], []
for i in range(len(label)):
if label[i] > 120 :
continue
num_class[label[i]] += 1
if label[i] in train_class_name:
if num_class[label[i]] >= 500:
continue
train_data.append(np.expand_dims(data[i], axis=0))
train_label.append(label[i])
train_num_frame.append(num_frame[i])
elif label[i] in val_class_name:
if num_class[label[i]] >= 100:
continue
val_data.append(np.expand_dims(data[i], axis=0))
val_label.append(label[i])
val_num_frame.append(num_frame[i])
elif label[i] in test_class_name:
if num_class[label[i]] >= 100:
continue
test_data.append(np.expand_dims(data[i], axis=0))
test_label.append(label[i])
test_num_frame.append(num_frame[i])
train_data, val_data, test_data = np.concatenate(train_data, 0), np.concatenate(val_data, 0), np.concatenate(test_data, 0)
save_path = '/mnt/data1/kinetics-skeleton/train_500_val_100'
np.save(os.path.join(save_path, 'train_data.npy'), train_data)
np.save(os.path.join(save_path, 'train_label.npy'), train_label)
np.save(os.path.join(save_path, 'train_frame.npy'), train_num_frame)
np.save(os.path.join(save_path, 'val_data.npy'), val_data)
np.save(os.path.join(save_path, 'val_label.npy'), val_label)
np.save(os.path.join(save_path, 'val_frame.npy'), val_num_frame)
np.save(os.path.join(save_path, 'test_data.npy'), test_data)
np.save(os.path.join(save_path, 'test_label.npy'), test_label)
np.save(os.path.join(save_path, 'test_frame.npy'), test_num_frame)
data_list = [train_data, train_label, np.array(train_num_frame), val_data, val_label, np.array(val_num_frame), test_data, test_label, np.array(test_num_frame)]
return data_list
def import_class(name):
components = name.split('.')
mod = __import__(components[0])
for comp in components[1:]:
mod = getattr(mod, comp)
return mod
def count_params(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
if __name__ == "__main__":
a = 0
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment