Add backbones for Ms-G3d and AGCN

Add backbones for Ms-G3d and AGCN

Add backbones for Ms-G3d and AGCN
243b5241 · NingMa · GitHub · 85f97f76 · 243b5241 · 243b5241
Unverified Commit 243b5241 authored Sep 11, 2022 by NingMa Committed by GitHub Sep 11, 2022
23 changed files
--- a/mmskl/AGCN/__init__.py
+++ b/mmskl/AGCN/__init__.py
+from . import agcn, aagcn
--- a/mmskl/AGCN/aagcn.py
+++ b/mmskl/AGCN/aagcn.py
--- a/mmskl/AGCN/agcn.py
+++ b/mmskl/AGCN/agcn.py
+import math
+
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+
+
+def import_class(name):
+    components = name.split('.')
+    mod = __import__(components[0])
+    for comp in components[1:]:
+        mod = getattr(mod, comp)
+    return mod
+
+
+def conv_branch_init(conv, branches):
+    weight = conv.weight
+    n = weight.size(0)
+    k1 = weight.size(1)
+    k2 = weight.size(2)
+    nn.init.normal_(weight, 0, math.sqrt(2. / (n * k1 * k2 * branches)))
+    nn.init.constant_(conv.bias, 0)
+
+
+def conv_init(conv):
+    nn.init.kaiming_normal_(conv.weight, mode='fan_out')
+    nn.init.constant_(conv.bias, 0)
+
+
+def bn_init(bn, scale):
+    nn.init.constant_(bn.weight, scale)
+    nn.init.constant_(bn.bias, 0)
+
+
+class unit_tcn(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=9, stride=1):
+        super(unit_tcn, self).__init__()
+        pad = int((kernel_size - 1) / 2)
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=(kernel_size, 1), padding=(pad, 0),
+                              stride=(stride, 1))
+
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+        conv_init(self.conv)
+        bn_init(self.bn, 1)
+
+    def forward(self, x):
+        x = self.bn(self.conv(x))
+        return x
+
+
+class unit_gcn(nn.Module):
+    def __init__(self, in_channels, out_channels, A, coff_embedding=4, num_subset=3):
+        super(unit_gcn, self).__init__()
+        inter_channels = out_channels // coff_embedding
+        self.inter_c = inter_channels
+        self.PA = nn.Parameter(torch.from_numpy(A.astype(np.float32)))
+        nn.init.constant_(self.PA, 1e-6)
+        self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False)
+        self.num_subset = num_subset
+
+        self.conv_a = nn.ModuleList()
+        self.conv_b = nn.ModuleList()
+        self.conv_d = nn.ModuleList()
+        for i in range(self.num_subset):
+            self.conv_a.append(nn.Conv2d(in_channels, inter_channels, 1))
+            self.conv_b.append(nn.Conv2d(in_channels, inter_channels, 1))
+            self.conv_d.append(nn.Conv2d(in_channels, out_channels, 1))
+
+        if in_channels != out_channels:
+            self.down = nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, 1),
+                nn.BatchNorm2d(out_channels)
+            )
+        else:
+            self.down = lambda x: x
+
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.soft = nn.Softmax(-2)
+        self.relu = nn.ReLU()
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                conv_init(m)
+            elif isinstance(m, nn.BatchNorm2d):
+                bn_init(m, 1)
+        bn_init(self.bn, 1e-6)
+        for i in range(self.num_subset):
+            conv_branch_init(self.conv_d[i], self.num_subset)
+
+    def forward(self, x):
+        N, C, T, V = x.size()
+        A = self.A.cuda(x.get_device())
+        A = A + self.PA
+
+        y = None
+        for i in range(self.num_subset):
+            A1 = self.conv_a[i](x).permute(0, 3, 1, 2).contiguous().view(N, V, self.inter_c * T)
+            A2 = self.conv_b[i](x).view(N, self.inter_c * T, V)
+            A1 = self.soft(torch.matmul(A1, A2) / A1.size(-1))  # N V V
+            A1 = A1 + A[i]
+            A2 = x.view(N, C * T, V)
+            z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V))
+            y = z + y if y is not None else z
+
+        y = self.bn(y)
+        y += self.down(x)
+        return self.relu(y)
+
+
+class TCN_GCN_unit(nn.Module):
+    def __init__(self, in_channels, out_channels, A, stride=1, residual=True):
+        super(TCN_GCN_unit, self).__init__()
+        self.gcn1 = unit_gcn(in_channels, out_channels, A)
+        self.tcn1 = unit_tcn(out_channels, out_channels, stride=stride)
+        self.relu = nn.ReLU()
+        if not residual:
+            self.residual = lambda x: 0
+
+        elif (in_channels == out_channels) and (stride == 1):
+            self.residual = lambda x: x
+
+        else:
+            self.residual = unit_tcn(in_channels, out_channels, kernel_size=1, stride=stride)
+
+    def forward(self, x):
+        x = self.tcn1(self.gcn1(x)) + self.residual(x)
+        return self.relu(x)
+
+
+class AGCN_Model(nn.Module):
+    def __init__(self, num_class=60, num_point=25, num_person=2, graph=None, graph_args=dict(), in_channels=3):
+        super(AGCN_Model, self).__init__()
+
+        if graph is None:
+            raise ValueError()
+        else:
+            Graph = import_class(graph)
+            self.graph = Graph(**graph_args)
+
+        A = self.graph.A
+        self.data_bn = nn.BatchNorm1d(num_person * in_channels * num_point)
+
+        self.l1 = TCN_GCN_unit(3, 64, A, residual=False)
+        self.l2 = TCN_GCN_unit(64, 64, A)
+        self.l3 = TCN_GCN_unit(64, 64, A)
+        self.l4 = TCN_GCN_unit(64, 64, A)
+        self.l5 = TCN_GCN_unit(64, 128, A, stride=2)
+        self.l6 = TCN_GCN_unit(128, 128, A)
+        self.l7 = TCN_GCN_unit(128, 128, A)
+        self.l8 = TCN_GCN_unit(128, 256, A, stride=2)
+        self.l9 = TCN_GCN_unit(256, 256, A)
+        self.l10 = TCN_GCN_unit(256, 256, A)
+
+        self.fc = nn.Linear(256, num_class)
+        nn.init.normal_(self.fc.weight, 0, math.sqrt(2. / num_class))
+        bn_init(self.data_bn, 1)
+
+    def forward(self, x):
+        N, C, T, V, M = x.size()
+
+        x = x.permute(0, 4, 3, 1, 2).contiguous().view(N, M * V * C, T)
+        x = self.data_bn(x)
+        x = x.view(N, M, V, C, T).permute(0, 1, 3, 4, 2).contiguous().view(N * M, C, T, V)
+
+        x = self.l1(x)
+        x = self.l2(x)
+        x = self.l3(x)
+        x = self.l4(x)
+        x = self.l5(x)
+        x = self.l6(x)
+        x = self.l7(x)
+        x = self.l8(x)
+        x = self.l9(x)
+        x = self.l10(x)
+
+        # N*M,C,T,V
+        c_new = x.size(1)
+        x = x.view(N, M, c_new, -1, V)
+        x = x.mean(1)
+
+
+        return x
+        # return self.fc(x)
--- a/mmskl/AGCN_graph/__init__.py
+++ b/mmskl/AGCN_graph/__init__.py
+from . import tools
+from . import ntu_rgb_d
+from . import kinetics
--- a/mmskl/AGCN_graph/kinetics.py
+++ b/mmskl/AGCN_graph/kinetics.py
+import numpy as np
+import sys
+
+sys.path.extend(['../'])
+from shift_gcn_graph import tools
+import networkx as nx
+
+# Joint index:
+# {0,  "Nose"}
+# {1,  "Neck"},
+# {2,  "RShoulder"},
+# {3,  "RElbow"},
+# {4,  "RWrist"},
+# {5,  "LShoulder"},
+# {6,  "LElbow"},
+# {7,  "LWrist"},
+# {8,  "RHip"},
+# {9,  "RKnee"},
+# {10, "RAnkle"},
+# {11, "LHip"},
+# {12, "LKnee"},
+# {13, "LAnkle"},
+# {14, "REye"},
+# {15, "LEye"},
+# {16, "REar"},
+# {17, "LEar"},
+
+# Edge format: (origin, neighbor)
+num_node = 18
+self_link = [(i, i) for i in range(num_node)]
+inward = [(4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12, 11), (10, 9), (9, 8),
+          (11, 5), (8, 2), (5, 1), (2, 1), (0, 1), (15, 0), (14, 0), (17, 15),
+          (16, 14)]
+outward = [(j, i) for (i, j) in inward]
+neighbor = inward + outward
+
+
+class Graph:
+    def __init__(self, labeling_mode='spatial'):
+        self.A = self.get_adjacency_matrix(labeling_mode)
+        self.num_node = num_node
+        self.self_link = self_link
+        self.inward = inward
+        self.outward = outward
+        self.neighbor = neighbor
+
+    def get_adjacency_matrix(self, labeling_mode=None):
+        if labeling_mode is None:
+            return self.A
+        if labeling_mode == 'spatial':
+            A = tools.get_spatial_graph(num_node, self_link, inward, outward)
+        else:
+            raise ValueError()
+        return A
+
+
+if __name__ == '__main__':
+    A = Graph('spatial').get_adjacency_matrix()
+    print('')
\ No newline at end of file
--- a/mmskl/AGCN_graph/ntu_rgb_d.py
+++ b/mmskl/AGCN_graph/ntu_rgb_d.py
+import sys
+
+sys.path.extend(['../'])
+from shift_gcn_graph import tools
+
+num_node = 25
+self_link = [(i, i) for i in range(num_node)]
+inward_ori_index = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6),
+                    (8, 7), (9, 21), (10, 9), (11, 10), (12, 11), (13, 1),
+                    (14, 13), (15, 14), (16, 15), (17, 1), (18, 17), (19, 18),
+                    (20, 19), (22, 23), (23, 8), (24, 25), (25, 12)]
+inward = [(i - 1, j - 1) for (i, j) in inward_ori_index]
+outward = [(j, i) for (i, j) in inward]
+neighbor = inward + outward
+
+
+class Graph:
+    def __init__(self, labeling_mode='spatial'):
+        self.A = self.get_adjacency_matrix(labeling_mode)
+        self.num_node = num_node
+        self.self_link = self_link
+        self.inward = inward
+        self.outward = outward
+        self.neighbor = neighbor
+
+    def get_adjacency_matrix(self, labeling_mode=None):
+        if labeling_mode is None:
+            return self.A
+        if labeling_mode == 'spatial':
+            A = tools.get_spatial_graph(num_node, self_link, inward, outward)
+        else:
+            raise ValueError()
+        return A
+
+
+if __name__ == '__main__':
+    import matplotlib.pyplot as plt
+    import os
+
+    # os.environ['DISPLAY'] = 'localhost:11.0'
+    A = Graph('spatial').get_adjacency_matrix()
+    for i in A:
+        plt.imshow(i, cmap='gray')
+        plt.show()
+    print(A)
--- a/mmskl/AGCN_graph/tools.py
+++ b/mmskl/AGCN_graph/tools.py
+import numpy as np
+
+
+def edge2mat(link, num_node):
+    A = np.zeros((num_node, num_node))
+    for i, j in link:
+        A[j, i] = 1
+    return A
+
+
+def normalize_digraph(A):  # 除以每列的和
+    Dl = np.sum(A, 0)
+    h, w = A.shape
+    Dn = np.zeros((w, w))
+    for i in range(w):
+        if Dl[i] > 0:
+            Dn[i, i] = Dl[i] ** (-1)
+    AD = np.dot(A, Dn)
+    return AD
+
+
+def get_spatial_graph(num_node, self_link, inward, outward):
+    I = edge2mat(self_link, num_node)
+    In = normalize_digraph(edge2mat(inward, num_node))
+    Out = normalize_digraph(edge2mat(outward, num_node))
+    A = np.stack((I, In, Out))
+    return A
--- a/mmskl/ms_g3d/__init__.py
+++ b/mmskl/ms_g3d/__init__.py
+from . import msg3d
--- a/mmskl/ms_g3d/__pycache__/__init__.cpython-39.pyc
+++ b/mmskl/ms_g3d/__pycache__/__init__.cpython-39.pyc
--- a/mmskl/ms_g3d/__pycache__/msg3d.cpython-39.pyc
+++ b/mmskl/ms_g3d/__pycache__/msg3d.cpython-39.pyc
--- a/mmskl/ms_g3d/activation.py
+++ b/mmskl/ms_g3d/activation.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def activation_factory(name, inplace=True):
+    if name == 'relu':
+        return nn.ReLU(inplace=inplace)
+    elif name == 'leakyrelu':
+        return nn.LeakyReLU(0.2, inplace=inplace)
+    elif name == 'tanh':
+        return nn.Tanh()
+    elif name == 'linear' or name is None:
+        return nn.Identity()
+    else:
+        raise ValueError('Not supported activation:', name)
\ No newline at end of file
--- a/mmskl/ms_g3d/mlp.py
+++ b/mmskl/ms_g3d/mlp.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ms_g3d.activation import activation_factory
+
+
+class MLP(nn.Module):
+    def __init__(self, in_channels, out_channels, activation='relu', dropout=0):
+        super().__init__()
+        channels = [in_channels] + out_channels
+        self.layers = nn.ModuleList()
+        for i in range(1, len(channels)):
+            if dropout > 0.001:
+                self.layers.append(nn.Dropout(p=dropout))
+            self.layers.append(nn.Conv2d(channels[i-1], channels[i], kernel_size=1))
+            self.layers.append(nn.BatchNorm2d(channels[i]))
+            self.layers.append(activation_factory(activation))
+
+    def forward(self, x):
+        # Input shape: (N,C,T,V)
+        for layer in self.layers:
+            x = layer(x)
+        return x
+
--- a/mmskl/ms_g3d/ms_gcn.py
+++ b/mmskl/ms_g3d/ms_gcn.py
+import sys
+sys.path.insert(0, '')
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+from ms_g3d_graph.tools import k_adjacency, normalize_adjacency_matrix
+from ms_g3d.mlp import MLP
+from ms_g3d.activation import activation_factory
+
+
+class MultiScale_GraphConv(nn.Module):
+    def __init__(self,
+                 num_scales,
+                 in_channels,
+                 out_channels,
+                 A_binary,
+                 disentangled_agg=True,
+                 use_mask=True,
+                 dropout=0,
+                 activation='relu'):
+        super().__init__()
+        self.num_scales = num_scales
+
+        if disentangled_agg:
+            A_powers = [k_adjacency(A_binary, k, with_self=True) for k in range(num_scales)]
+            A_powers = np.concatenate([normalize_adjacency_matrix(g) for g in A_powers])
+        else:
+            A_powers = [A_binary + np.eye(len(A_binary)) for k in range(num_scales)]
+            A_powers = [normalize_adjacency_matrix(g) for g in A_powers]
+            A_powers = [np.linalg.matrix_power(g, k) for k, g in enumerate(A_powers)]
+            A_powers = np.concatenate(A_powers)
+
+        self.A_powers = torch.Tensor(A_powers)
+        self.use_mask = use_mask
+        if use_mask:
+            # NOTE: the inclusion of residual mask appears to slow down training noticeably
+            self.A_res = nn.init.uniform_(nn.Parameter(torch.Tensor(self.A_powers.shape)), -1e-6, 1e-6)
+
+        self.mlp = MLP(in_channels * num_scales, [out_channels], dropout=dropout, activation=activation)
+
+    def forward(self, x):
+        N, C, T, V = x.shape
+        self.A_powers = self.A_powers.to(x.device)
+        A = self.A_powers.to(x.dtype)
+        if self.use_mask:
+            A = A + self.A_res.to(x.dtype)
+        support = torch.einsum('vu,nctu->nctv', A, x)
+        support = support.view(N, C, T, self.num_scales, V)
+        support = support.permute(0,3,1,2,4).contiguous().view(N, self.num_scales*C, T, V)
+        out = self.mlp(support)
+        return out
+
+
+if __name__ == "__main__":
+    from graph.ntu_rgb_d import AdjMatrixGraph
+    graph = AdjMatrixGraph()
+    A_binary = graph.A_binary
+    msgcn = MultiScale_GraphConv(num_scales=15, in_channels=3, out_channels=64, A_binary=A_binary)
+    msgcn.forward(torch.randn(16,3,30,25))
--- a/mmskl/ms_g3d/ms_gtcn.py
+++ b/mmskl/ms_g3d/ms_gtcn.py
+import sys
+sys.path.insert(0, '')
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+from ms_g3d.ms_tcn import MultiScale_TemporalConv as MS_TCN
+from ms_g3d.mlp import MLP
+from ms_g3d.activation import activation_factory
+from ms_g3d_graph.tools import k_adjacency, normalize_adjacency_matrix
+
+
+class UnfoldTemporalWindows(nn.Module):
+    def __init__(self, window_size, window_stride, window_dilation=1):
+        super().__init__()
+        self.window_size = window_size
+        self.window_stride = window_stride
+        self.window_dilation = window_dilation
+
+        self.padding = (window_size + (window_size-1) * (window_dilation-1) - 1) // 2
+        self.unfold = nn.Unfold(kernel_size=(self.window_size, 1),
+                                dilation=(self.window_dilation, 1),
+                                stride=(self.window_stride, 1),
+                                padding=(self.padding, 0))
+
+    def forward(self, x):
+        # Input shape: (N,C,T,V), out: (N,C,T,V*window_size)
+        N, C, T, V = x.shape
+        x = self.unfold(x)
+        # Permute extra channels from window size to the graph dimension; -1 for number of windows
+        x = x.view(N, C, self.window_size, -1, V).permute(0,1,3,2,4).contiguous()
+        x = x.view(N, C, -1, self.window_size * V)
+        return x
+
+
+class SpatialTemporal_MS_GCN(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 A_binary,
+                 num_scales,
+                 window_size,
+                 disentangled_agg=True,
+                 use_Ares=True,
+                 residual=False,
+                 dropout=0,
+                 activation='relu'):
+
+        super().__init__()
+        self.num_scales = num_scales
+        self.window_size = window_size
+        self.use_Ares = use_Ares
+        A = self.build_spatial_temporal_graph(A_binary, window_size)
+
+        if disentangled_agg:
+            A_scales = [k_adjacency(A, k, with_self=True) for k in range(num_scales)]
+            A_scales = np.concatenate([normalize_adjacency_matrix(g) for g in A_scales])
+        else:
+            # Self-loops have already been included in A
+            A_scales = [normalize_adjacency_matrix(A) for k in range(num_scales)]
+            A_scales = [np.linalg.matrix_power(g, k) for k, g in enumerate(A_scales)]
+            A_scales = np.concatenate(A_scales)
+
+        self.A_scales = torch.Tensor(A_scales)
+        self.V = len(A_binary)
+
+        if use_Ares:
+            self.A_res = nn.init.uniform_(nn.Parameter(torch.randn(self.A_scales.shape)), -1e-6, 1e-6)
+        else:
+            self.A_res = torch.tensor(0)
+
+        self.mlp = MLP(in_channels * num_scales, [out_channels], dropout=dropout, activation='linear')
+
+        # Residual connection
+        if not residual:
+            self.residual = lambda x: 0
+        elif (in_channels == out_channels):
+            self.residual = lambda x: x
+        else:
+            self.residual = MLP(in_channels, [out_channels], activation='linear')
+
+        self.act = activation_factory(activation)
+
+    def build_spatial_temporal_graph(self, A_binary, window_size):
+        assert isinstance(A_binary, np.ndarray), 'A_binary should be of type `np.ndarray`'
+        V = len(A_binary)
+        V_large = V * window_size
+        A_binary_with_I = A_binary + np.eye(len(A_binary), dtype=A_binary.dtype)
+        # Build spatial-temporal graph
+        A_large = np.tile(A_binary_with_I, (window_size, window_size)).copy()
+        return A_large
+
+    def forward(self, x):
+        N, C, T, V = x.shape    # T = number of windows
+
+        # Build graphs
+        A = self.A_scales.to(x.dtype).to(x.device) + self.A_res.to(x.dtype).to(x.device)
+
+        # Perform Graph Convolution
+        res = self.residual(x)
+        agg = torch.einsum('vu,nctu->nctv', A, x)
+        agg = agg.view(N, C, T, self.num_scales, V)
+        agg = agg.permute(0,3,1,2,4).contiguous().view(N, self.num_scales*C, T, V)
+        out = self.mlp(agg)
+        out += res
+        return self.act(out)
+
--- a/mmskl/ms_g3d/ms_tcn.py
+++ b/mmskl/ms_g3d/ms_tcn.py
+import sys
+sys.path.insert(0, '')
+
+import torch
+import torch.nn as nn
+
+from ms_g3d.activation import activation_factory
+
+
+class TemporalConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1):
+        super(TemporalConv, self).__init__()
+        pad = (kernel_size + (kernel_size-1) * (dilation-1) - 1) // 2
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=(kernel_size, 1),
+            padding=(pad, 0),
+            stride=(stride, 1),
+            dilation=(dilation, 1))
+
+        self.bn = nn.BatchNorm2d(out_channels)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class MultiScale_TemporalConv(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 stride=1,
+                 dilations=[1,2,3,4],
+                 residual=True,
+                 residual_kernel_size=1,
+                 activation='relu'):
+
+        super().__init__()
+        assert out_channels % (len(dilations) + 2) == 0, '# out channels should be multiples of # branches'
+
+        # Multiple branches of temporal convolution
+        self.num_branches = len(dilations) + 2
+        branch_channels = out_channels // self.num_branches
+
+        # Temporal Convolution branches
+        self.branches = nn.ModuleList([
+            nn.Sequential(
+                nn.Conv2d(
+                    in_channels,
+                    branch_channels,
+                    kernel_size=1,
+                    padding=0),
+                nn.BatchNorm2d(branch_channels),
+                activation_factory(activation),
+                TemporalConv(
+                    branch_channels,
+                    branch_channels,
+                    kernel_size=kernel_size,
+                    stride=stride,
+                    dilation=dilation),
+            )
+            for dilation in dilations
+        ])
+
+        # Additional Max & 1x1 branch
+        self.branches.append(nn.Sequential(
+            nn.Conv2d(in_channels, branch_channels, kernel_size=1, padding=0),
+            nn.BatchNorm2d(branch_channels),
+            activation_factory(activation),
+            nn.MaxPool2d(kernel_size=(3,1), stride=(stride,1), padding=(1,0)),
+            nn.BatchNorm2d(branch_channels)
+        ))
+
+        self.branches.append(nn.Sequential(
+            nn.Conv2d(in_channels, branch_channels, kernel_size=1, padding=0, stride=(stride,1)),
+            nn.BatchNorm2d(branch_channels)
+        ))
+
+        # Residual connection
+        if not residual:
+            self.residual = lambda x: 0
+        elif (in_channels == out_channels) and (stride == 1):
+            self.residual = lambda x: x
+        else:
+            self.residual = TemporalConv(in_channels, out_channels, kernel_size=residual_kernel_size, stride=stride)
+
+        self.act = activation_factory(activation)
+
+    def forward(self, x):
+        # Input dim: (N,C,T,V)
+        res = self.residual(x)
+        branch_outs = []
+        for tempconv in self.branches:
+            out = tempconv(x)
+            branch_outs.append(out)
+
+        out = torch.cat(branch_outs, dim=1)
+        out += res
+        out = self.act(out)
+        return out
+
+
+if __name__ == "__main__":
+    mstcn = MultiScale_TemporalConv(288, 288)
+    x = torch.randn(32, 288, 100, 20)
+    mstcn.forward(x)
+    for name, param in mstcn.named_parameters():
+        print(f'{name}: {param.numel()}')
+    print(sum(p.numel() for p in mstcn.parameters() if p.requires_grad))
\ No newline at end of file
--- a/mmskl/ms_g3d/msg3d.py
+++ b/mmskl/ms_g3d/msg3d.py
+import sys
+sys.path.insert(0, '')
+
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from utils import import_class, count_params
+from ms_g3d.ms_gcn import MultiScale_GraphConv as MS_GCN
+from ms_g3d.ms_tcn import MultiScale_TemporalConv as MS_TCN
+from ms_g3d.ms_gtcn import SpatialTemporal_MS_GCN, UnfoldTemporalWindows
+from ms_g3d.mlp import MLP
+from ms_g3d.activation import activation_factory
+
+
+class MS_G3D(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 A_binary,
+                 num_scales,
+                 window_size,
+                 window_stride,
+                 window_dilation,
+                 embed_factor=1,
+                 activation='relu'):
+        super().__init__()
+        self.window_size = window_size
+        self.out_channels = out_channels
+        self.embed_channels_in = self.embed_channels_out = out_channels // embed_factor
+        if embed_factor == 1:
+            self.in1x1 = nn.Identity()
+            self.embed_channels_in = self.embed_channels_out = in_channels
+            # The first STGC block changes channels right away; others change at collapse
+            if in_channels == 3:
+                self.embed_channels_out = out_channels
+        else:
+            self.in1x1 = MLP(in_channels, [self.embed_channels_in])
+
+        self.gcn3d = nn.Sequential(
+            UnfoldTemporalWindows(window_size, window_stride, window_dilation),
+            SpatialTemporal_MS_GCN(
+                in_channels=self.embed_channels_in,
+                out_channels=self.embed_channels_out,
+                A_binary=A_binary,
+                num_scales=num_scales,
+                window_size=window_size,
+                use_Ares=True
+            )
+        )
+
+        self.out_conv = nn.Conv3d(self.embed_channels_out, out_channels, kernel_size=(1, self.window_size, 1))
+        self.out_bn = nn.BatchNorm2d(out_channels)
+
+    def forward(self, x):
+        N, _, T, V = x.shape
+        x = self.in1x1(x)
+        # Construct temporal windows and apply MS-GCN
+        x = self.gcn3d(x)
+
+        # Collapse the window dimension
+        x = x.view(N, self.embed_channels_out, -1, self.window_size, V)
+        x = self.out_conv(x).squeeze(dim=3)
+        x = self.out_bn(x)
+
+        # no activation
+        return x
+
+
+class MultiWindow_MS_G3D(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 A_binary,
+                 num_scales,
+                 window_sizes=[3,5],
+                 window_stride=1,
+                 window_dilations=[1,1]):
+
+        super().__init__()
+        self.gcn3d = nn.ModuleList([
+            MS_G3D(
+                in_channels,
+                out_channels,
+                A_binary,
+                num_scales,
+                window_size,
+                window_stride,
+                window_dilation
+            )
+            for window_size, window_dilation in zip(window_sizes, window_dilations)
+        ])
+
+    def forward(self, x):
+        # Input shape: (N, C, T, V)
+        out_sum = 0
+        for gcn3d in self.gcn3d:
+            out_sum += gcn3d(x)
+        # no activation
+        return out_sum
+
+
+class MS_G3D_Model(nn.Module):
+    def __init__(self,
+                 num_class,
+                 num_point,
+                 num_person,
+                 num_gcn_scales,
+                 num_g3d_scales,
+                 graph,
+                 in_channels=3):
+        super(MS_G3D_Model, self).__init__()
+
+        Graph = import_class(graph)
+        A_binary = Graph().A_binary
+
+        self.data_bn = nn.BatchNorm1d(num_person * in_channels * num_point)
+
+        # channels
+        c1 = 96
+        c2 = c1 * 2     # 192
+        # c3 = c2 * 2     # 384
+
+        # r=3 STGC blocks
+        self.gcn3d1 = MultiWindow_MS_G3D(3, c1, A_binary, num_g3d_scales, window_stride=2)
+        self.sgcn1 = nn.Sequential(
+            MS_GCN(num_gcn_scales, 3, c1, A_binary, disentangled_agg=True),
+            MS_TCN(c1, c1, stride=2),
+            MS_TCN(c1, c1))
+        self.sgcn1[-1].act = nn.Identity()
+        self.tcn1 = MS_TCN(c1, c1)
+
+        self.gcn3d2 = MultiWindow_MS_G3D(c1, c2, A_binary, num_g3d_scales, window_stride=2)
+        self.sgcn2 = nn.Sequential(
+            MS_GCN(num_gcn_scales, c1, c1, A_binary, disentangled_agg=True),
+            MS_TCN(c1, c2, stride=2),
+            MS_TCN(c2, c2))
+        self.sgcn2[-1].act = nn.Identity()
+        self.tcn2 = MS_TCN(c2, c2)
+
+        # self.gcn3d3 = MultiWindow_MS_G3D(c2, c3, A_binary, num_g3d_scales, window_stride=2)
+        # self.sgcn3 = nn.Sequential(
+        #     MS_GCN(num_gcn_scales, c2, c2, A_binary, disentangled_agg=True),
+        #     MS_TCN(c2, c3, stride=2),
+        #     MS_TCN(c3, c3))
+        # self.sgcn3[-1].act = nn.Identity()
+        # self.tcn3 = MS_TCN(c3, c3)
+
+        # self.fc = nn.Linear(c3, num_class)
+
+    def forward(self, x):
+        N, C, T, V, M = x.size()
+        x = x.permute(0, 4, 3, 1, 2).contiguous().view(N, M * V * C, T)
+        x = self.data_bn(x)
+        x = x.view(N * M, V, C, T).permute(0,2,3,1).contiguous()
+
+
+        # Apply activation to the sum of the pathways
+        x = F.relu(self.sgcn1(x) + self.gcn3d1(x), inplace=True)
+        x = self.tcn1(x)
+
+        x = F.relu(self.sgcn2(x) + self.gcn3d2(x), inplace=True)
+        x = self.tcn2(x)
+
+        # x = F.relu(self.sgcn3(x) + self.gcn3d3(x), inplace=True)
+        # x = self.tcn3(x)
+
+        out = x
+        out_channels = out.size(1)
+
+        _,  c, t, v = out.size()
+
+        out = out.view(N, M, c, t, v)
+
+        #out = out.mean(3)   # Global Average Pooling (Spatial+Temporal)
+
+        out = out.mean(1)   # Average pool number of bodies in the sequence
+
+
+
+        # out = self.fc(out)
+
+        return out
+
+
+if __name__ == "__main__":
+    # For debugging purposes
+    import sys
+    sys.path.append('..')
+
+    model = MS_G3D_Model(
+        num_class=60,
+        num_point=25,
+        num_person=2,
+        num_gcn_scales=13,
+        num_g3d_scales=6,
+        graph='graph.ntu_rgb_d.AdjMatrixGraph'
+    )
+
+    N, C, T, V, M = 6, 3, 50, 25, 2
+    x = torch.randn(N,C,T,V,M)
+    print('before in mode x.shape', x.shape)
+    x = model(x)
+
+    print('Model total # params:', count_params(model))
--- a/mmskl/ms_g3d/trainlogs/train/events.out.tfevents.1606230517.user
+++ b/mmskl/ms_g3d/trainlogs/train/events.out.tfevents.1606230517.user
--- a/mmskl/ms_g3d/trainlogs/val/events.out.tfevents.1606230517.user
+++ b/mmskl/ms_g3d/trainlogs/val/events.out.tfevents.1606230517.user
--- a/mmskl/ms_g3d_graph/__init__.py
+++ b/mmskl/ms_g3d_graph/__init__.py
+from . import tools
+from . import ntu_rgb_d
+from . import kinetics
--- a/mmskl/ms_g3d_graph/kinetics.py
+++ b/mmskl/ms_g3d_graph/kinetics.py
+import sys
+sys.path.insert(0, '')
+sys.path.extend(['../'])
+
+import numpy as np
+
+from ms_g3d_graph import tools
+
+# Joint index:
+# {0,  "Nose"}
+# {1,  "Neck"},
+# {2,  "RShoulder"},
+# {3,  "RElbow"},
+# {4,  "RWrist"},
+# {5,  "LShoulder"},
+# {6,  "LElbow"},
+# {7,  "LWrist"},
+# {8,  "RHip"},
+# {9,  "RKnee"},
+# {10, "RAnkle"},
+# {11, "LHip"},
+# {12, "LKnee"},
+# {13, "LAnkle"},
+# {14, "REye"},
+# {15, "LEye"},
+# {16, "REar"},
+# {17, "LEar"},
+
+num_node = 18
+self_link = [(i, i) for i in range(num_node)]
+inward = [(4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12, 11), (10, 9), (9, 8),
+          (11, 5), (8, 2), (5, 1), (2, 1), (0, 1), (15, 0), (14, 0), (17, 15),
+          (16, 14)]
+outward = [(j, i) for (i, j) in inward]
+neighbor = inward + outward
+
+
+class AdjMatrixGraph:
+    def __init__(self, *args, **kwargs):
+        self.num_nodes = num_node
+        self.edges = neighbor
+        self.self_loops = [(i, i) for i in range(self.num_nodes)]
+        self.A_binary = tools.get_adjacency_matrix(self.edges, self.num_nodes)
+        self.A_binary_with_I = tools.get_adjacency_matrix(self.edges + self.self_loops, self.num_nodes)
+
+
+if __name__ == '__main__':
+    graph = AdjMatrixGraph()
+    A_binary = graph.A_binary
+    import matplotlib.pyplot as plt
+    print(A_binary)
+    plt.matshow(A_binary)
+    plt.show()
--- a/mmskl/ms_g3d_graph/ntu_rgb_d.py
+++ b/mmskl/ms_g3d_graph/ntu_rgb_d.py
+import sys
+sys.path.insert(0, '')
+sys.path.extend(['../'])
+
+import numpy as np
+
+from ms_g3d_graph import tools
+
+num_node = 25
+self_link = [(i, i) for i in range(num_node)]
+inward_ori_index = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6),
+                    (8, 7), (9, 21), (10, 9), (11, 10), (12, 11), (13, 1),
+                    (14, 13), (15, 14), (16, 15), (17, 1), (18, 17), (19, 18),
+                    (20, 19), (22, 23), (23, 8), (24, 25), (25, 12)]
+inward = [(i - 1, j - 1) for (i, j) in inward_ori_index]
+outward = [(j, i) for (i, j) in inward]
+neighbor = inward + outward
+
+
+class AdjMatrixGraph:
+    def __init__(self, *args, **kwargs):
+        self.edges = neighbor
+        self.num_nodes = num_node
+        self.self_loops = [(i, i) for i in range(self.num_nodes)]
+        self.A_binary = tools.get_adjacency_matrix(self.edges, self.num_nodes)
+        self.A_binary_with_I = tools.get_adjacency_matrix(self.edges + self.self_loops, self.num_nodes)
+        self.A = tools.normalize_adjacency_matrix(self.A_binary)
+
+
+if __name__ == '__main__':
+    import matplotlib.pyplot as plt
+    graph = AdjMatrixGraph()
+    A, A_binary, A_binary_with_I = graph.A, graph.A_binary, graph.A_binary_with_I
+    f, ax = plt.subplots(1, 3)
+    ax[0].imshow(A_binary_with_I, cmap='gray')
+    ax[1].imshow(A_binary, cmap='gray')
+    ax[2].imshow(A, cmap='gray')
+    plt.show()
+    print(A_binary_with_I.shape, A_binary.shape, A.shape)
--- a/mmskl/ms_g3d_graph/tools.py
+++ b/mmskl/ms_g3d_graph/tools.py
+import numpy as np
+
+
+def edge2mat(link, num_node):
+    A = np.zeros((num_node, num_node))
+    for i, j in link:
+        A[j, i] = 1
+    return A
+
+
+def normalize_digraph(A):
+    Dl = np.sum(A, 0)
+    h, w = A.shape
+    Dn = np.zeros((w, w))
+    for i in range(w):
+        if Dl[i] > 0:
+            Dn[i, i] = Dl[i] ** (-1)
+    AD = np.dot(A, Dn)
+    return AD
+
+
+def get_spatial_graph(num_node, self_link, inward, outward):
+    I = edge2mat(self_link, num_node)
+    In = normalize_digraph(edge2mat(inward, num_node))
+    Out = normalize_digraph(edge2mat(outward, num_node))
+    A = np.stack((I, In, Out))
+    return A
+
+
+def k_adjacency(A, k, with_self=False, self_factor=1):
+    assert isinstance(A, np.ndarray)
+    I = np.eye(len(A), dtype=A.dtype)
+    if k == 0:
+        return I
+    Ak = np.minimum(np.linalg.matrix_power(A + I, k), 1) \
+       - np.minimum(np.linalg.matrix_power(A + I, k - 1), 1)
+    if with_self:
+        Ak += (self_factor * I)
+    return Ak
+
+
+def normalize_adjacency_matrix(A):
+    node_degrees = A.sum(-1)
+    degs_inv_sqrt = np.power(node_degrees, -0.5)
+    norm_degs_matrix = np.eye(len(node_degrees)) * degs_inv_sqrt
+    return (norm_degs_matrix @ A @ norm_degs_matrix).astype(np.float32)
+
+
+def get_adjacency_matrix(edges, num_nodes):
+    A = np.zeros((num_nodes, num_nodes), dtype=np.float32)
+    for edge in edges:
+        A[edge] = 1.
+    return A
\ No newline at end of file
--- a/mmskl/test.py
+++ b/mmskl/test.py
+from ms_g3d.msg3d import MS_G3D_Model
+from AGCN.agcn import AGCN_Model
+
+backbone = "ms_g3d"
+dataset = "ntu_rgb_d"
+
+if backbone == 'ms_g3d':
+    graph = 'ms_g3d_graph.ntu_rgb_d.AdjMatrixGraph'
+    model = MS_G3D_Model(
+        num_class=0,
+        num_point=node,
+        num_person=2,
+        num_gcn_scales=13,
+        num_g3d_scales=6,
+        graph=graph,
+    )
+    in_channel = 192
+elif backbone == '2s_AGCN':
+    graph = 'AGCN_gcn_graph.ntu_rgb_d.Graph'
+    model = AGCN_Model(
+        num_class=60,
+        num_point=node,
+        num_person=2,
+        graph=grpah,
+        graph_args={'labeling_mode': 'spatial'}
+    )
+    in_channel = 256