Commit 8802dece authored by 刘哲(21硕)'s avatar 刘哲(21硕)

code

parents
# Default ignored files
/shelf/
/workspace.xml
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Remote Python 3.5.2 (sftp://liuzhe@10.214.199.205:22/usr/bin/python3.5)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="pytest" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PublishConfigData" autoUpload="Always" serverName="liuzhe@10.214.199.205:22 password" remoteFilesAllowedToDisappearOnAutoupload="false">
<serverData>
<paths name="liuzhe@10.214.199.205:22 password">
<serverdata>
<mappings>
<mapping deploy="/home/liuzhe/Unet" local="$PROJECT_DIR$" />
</mappings>
</serverdata>
</paths>
<paths name="server">
<serverdata>
<mappings>
<mapping deploy="/" local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
</serverData>
<option name="myAutoUpload" value="ALWAYS" />
</component>
</project>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="N803" />
</list>
</option>
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.5.2 (sftp://liuzhe@10.214.199.205:22/usr/bin/python3.5)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/QDlab.iml" filepath="$PROJECT_DIR$/.idea/QDlab.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="WebServers">
<option name="servers">
<webServer id="2cf9e263-2be4-41ec-9190-ed844cb67638" name="server">
<fileTransfer rootFolder="/home/liuzhe/Unet" accessType="SFTP" host="10.214.199.205" port="22" sshConfigId="42d80445-df33-442f-97f6-eee45e44b904" sshConfig="liuzhe@10.214.199.205:22 password">
<advancedOptions>
<advancedOptions dataProtectionLevel="Private" passiveMode="true" shareSSLContext="true" />
</advancedOptions>
</fileTransfer>
</webServer>
</option>
</component>
</project>
\ No newline at end of file
This is QD's private research program
\ No newline at end of file
"""
@Time : 2020/6/1 16:27
@Author : Qin Dian
@Manual :
"""
\ No newline at end of file
"""
@Time : 2020/6/30 11:23
@Author : Qin Dian
@Manual : liver tumor dataset
"""
from torch.utils.data import Dataset
import numpy as np
import os
import torch
import utils.data_utils as du
from glob import glob
LOWER_BOUND = -60
UPPER_BOUND = 140
class SliceDataset(Dataset):
def __init__(self, load_path, index_path, task='3tumors', series_index=None):
super(SliceDataset, self).__init__()
self.load_path = load_path
self.series_index = series_index
self.task = task
if series_index is not None:
self.tumor_slices = np.load(os.path.join(index_path, f'tumor_slices_s{series_index}.npy'))
def __len__(self):
return len(self.tumor_slices)
def __getitem__(self, item):
# Data loading
f_name = self.tumor_slices[item]
npz = np.load(os.path.join(self.load_path, f_name), allow_pickle=True)
ct = npz.get('ct')
mask = npz.get('mask') >> 1
# Preprocess
if self.task == '3tumors':
mask[(mask == 6) | (mask == 2) | (mask == 3)] = 1
mask[mask > 3] -= 2
if self.series_index is not None:
ct = ct[self.series_index]
mask = mask[self.series_index]
ct = du.window_standardize(ct, LOWER_BOUND, UPPER_BOUND)
ct = du.to_3channel_replica(ct)
mask = du.to_3channel_replica(mask)
# To tensor & cut to 384
ct = torch.from_numpy(du.cut_384(ct.copy())).float()
# self replication to 3 channel for adapting RGB channel
mask = torch.from_numpy(du.cut_384(mask.copy())).float()
return ct, mask
class CTDataset(Dataset):
def __init__(self, load_path, task='3tumors', series_index=None):
self.files = glob(os.path.join(load_path, 'ct*.npz'))
self.series_index = series_index
self.task = task
def __len__(self):
return len(self.files)
def __getitem__(self, item):
file = np.load(self.files[item], allow_pickle=True)
ct = file.get('arrays', file.get('arr_0'))
ct = ct.reshape(3, -1, 512, 512)
if self.series_index is not None:
ct = ct[self.series_index]
ct = du.window_standardize(ct, LOWER_BOUND, UPPER_BOUND)
ct = du.cut_384(ct)
ct = du.to_3channel_replica(ct)
ct = torch.from_numpy(ct.copy()).float()
return ct
# class Images_Dataset_folder(torch.utils.data.Dataset):
# """Class for getting individual transformations and data
# Args:
# images_dir = path of input images
# labels_dir = path of labeled images
# transformI = Input Images transformation (default: None)
# transformM = Input Labels transformation (default: None)
# Output:
# tx = Transformed images
# lx = Transformed labels
# """
#
# def __init__(self, images_dir, labels_dir, transformI=None, transformM=None):
# self.images = sorted(os.listdir(images_dir))
# self.labels = sorted(os.listdir(labels_dir))
# self.images_dir = images_dir
# self.labels_dir = labels_dir
# self.transformI = transformI
# self.transformM = transformM
#
# if self.transformI:
# self.tx = self.transformI
# else:
# self.tx = torchvision.transforms.Compose([
# # torchvision.transforms.Resize((128,128)),
# torchvision.transforms.CenterCrop(96),
# torchvision.transforms.RandomRotation((-10, 10)),
# # torchvision.transforms.RandomHorizontalFlip(),
# torchvision.transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
# torchvision.transforms.ToTensor(),
# torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
# ])
#
# if self.transformM:
# self.lx = self.transformM
# else:
# self.lx = torchvision.transforms.Compose([
# # torchvision.transforms.Resize((128,128)),
# torchvision.transforms.CenterCrop(96),
# torchvision.transforms.RandomRotation((-10, 10)),
# torchvision.transforms.Grayscale(),
# torchvision.transforms.ToTensor(),
# # torchvision.transforms.Lambda(lambda x: torch.cat([x, 1 - x], dim=0))
# ])
#
# def __len__(self):
#
# return len(self.images)
#
# def __getitem__(self, i):
# i1 = Image.open(self.images_dir + self.images[i])
# l1 = Image.open(self.labels_dir + self.labels[i])
#
# seed = np.random.randint(0, 2 ** 32) # make a seed with numpy generator
#
# # apply this seed to img tranfsorms
# random.seed(seed)
# torch.manual_seed(seed)
# img = self.tx(i1)
#
# # apply this seed to target/label tranfsorms
# random.seed(seed)
# torch.manual_seed(seed)
# label = self.lx(l1)
# return img, label
"""
@Time : 2020/6/2 14:55
@Author : Qin Dian
@Manual :
"""
import os
import torch
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.utils.data import DataLoader
import argparse
from networks.deep_snake import U_Net
from datasets.liver import SliceDataset, CTDataset
import utils.net_utils as nu
import supports.loss_functions as lf
import torch.nn.functional as F
pl.seed_everything(123)
parser = argparse.ArgumentParser('deep snake')
parser.add_argument('--data_path', type=str, default=r'D:\A_UNET\out')
parser.add_argument('--tumor_index_path', type=str, default=r'D:\A_UNET')
parser.add_argument('--checkpoint_path', type=str, default=r'D:\A_UNET\out\checkpoints')
parser.add_argument('--test_path', type=str, default='/data/ts4.0')
parser.add_argument('--series_index', type=int, default=0)
parser.add_argument('--batch_size', type=int, default=2)
parser.add_argument('--max_epochs', type=int, default=50)
parser.add_argument('--gpu', type=list, default=[0])
def dice_loss(prediction, target):
"""Calculating the dice loss
Args:
prediction = predicted image
target = Targeted image
Output:
dice_loss"""
smooth = 1.0
i_flat = prediction.view(-1)
t_flat = target.view(-1)
intersection = (i_flat * t_flat).sum()
return 1 - ((2. * intersection + smooth) / (i_flat.sum() + t_flat.sum() + smooth))
def calc_loss(prediction, target, bce_weight=0.5):
"""Calculating the loss and metrics
Args:
prediction = predicted image
target = Targeted image
metrics = Metrics printed
bce_weight = 0.5 (default)
Output:
loss : dice loss of the epoch """
bce = F.binary_cross_entropy_with_logits(prediction, target)
prediction = F.sigmoid(prediction)
dice = dice_loss(prediction, target)
loss = bce * bce_weight + dice * (1 - bce_weight)
return loss
class DeepSnake(pl.LightningModule):
def __init__(self, params):
super(DeepSnake, self).__init__()
self.params = params
self.net = U_Net(in_ch=3, out_ch=3)
def forward(self, x):
return self.net(x)
def training_step(self, batch, batch_idx):
ct, mask = batch
output = self.forward(ct)
lossT = calc_loss(output, mask) # Dice_loss Used
loss = lossT.item() * ct.size(0)
logs = {'train_loss': loss}
return {'loss': loss, 'log': logs}
def test_step(self, batch, batch_idx):
x = batch[0]
num = int(x.size()[0])
for i in range(num//8 + 1):
if i < num//8:
out = self(x[i*8:i*8+8])
out = nu.clipped_sigmoid(out['ct_hm'])
else:
out = self(x[-8:])
out = nu.clipped_sigmoid(out['ct_hm'])
def train_dataloader(self):
return DataLoader(SliceDataset(load_path=self.params.data_path, index_path=self.params.tumor_index_path,
series_index=self.params.series_index), batch_size=self.params.batch_size,
num_workers=8, pin_memory=True, shuffle=True)
def test_dataloader(self):
return DataLoader(CTDataset(load_path='/data/ts4.0', series_index=2),
batch_size=1, num_workers=2, pin_memory=True)
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=1e-4, betas=(0.5, 0.999))
def main():
args = parser.parse_args()
model = DeepSnake(args)
# checkpoint
checkpoint_callback = ModelCheckpoint(
filepath=os.path.join(args.checkpoint_path, 'checkpoint_{epoch}')
# save_last=True # always saves the model at the end of the epoch
)
# trainer = Trainer.from_argparse_args(args, gpus=args.gpu, amp_level='O2', precision=16)
trainer = Trainer.from_argparse_args(args, gpus=args.gpu, checkpoint_callback=checkpoint_callback)
trainer.fit(model)
def test():
args = parser.parse_args()
model = DeepSnake.load_from_checkpoint(
'/data/checkpoints/deep_snake/last.ckpt',
)
trainer = Trainer(gpus=[0])
trainer.test(model)
if __name__ == '__main__':
main()
# test()
\ No newline at end of file
"""
@Time : 2020/6/17 13:38
@Author : Qin Dian
@Manual :
"""
\ No newline at end of file
"""
@Time : 2020/6/17 13:38
@Author : Qin Dian
@Manual :
"""
\ No newline at end of file
.vscode
.idea
*.so
*.o
*pyc
_ext
build
DCNv2.egg-info
dist
\ No newline at end of file
BSD 3-Clause License
Copyright (c) 2019, Charles Shang
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
## Deformable Convolutional Networks V2 with Pytorch 1.0
### Build
```bash
./make.sh # build
python test.py # run examples and gradient check
```
### An Example
- deformable conv
```python
from dcn_v2 import DCN
input = torch.randn(2, 64, 128, 128).cuda()
# wrap all things (offset and mask) in DCN
dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
output = dcn(input)
print(output.shape)
```
- deformable roi pooling
```python
from dcn_v2 import DCNPooling
input = torch.randn(2, 32, 64, 64).cuda()
batch_inds = torch.randint(2, (20, 1)).cuda().float()
x = torch.randint(256, (20, 1)).cuda().float()
y = torch.randint(256, (20, 1)).cuda().float()
w = torch.randint(64, (20, 1)).cuda().float()
h = torch.randint(64, (20, 1)).cuda().float()
rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
# mdformable pooling (V2)
# wrap all things (offset and mask) in DCNPooling
dpooling = DCNPooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=32,
no_trans=False,
group_size=1,
trans_std=0.1).cuda()
dout = dpooling(input, rois)
```
### Note
Now the master branch is for pytorch 1.0 (new ATen API), you can switch back to pytorch 0.4 with,
```bash
git checkout pytorch_0.4
```
### Known Issues:
- [x] Gradient check w.r.t offset (solved)
- [ ] Backward is not reentrant (minor)
This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
<s>I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes.
However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some
non-differential points? </s>
Update: all gradient check passes with double precision.
Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for
float `<1e-15` for double),
so it may not be a serious problem (?)
Please post an issue or PR if you have any comments.
\ No newline at end of file
#!/usr/bin/env python
import os
import glob
import torch
from torch.utils.cpp_extension import CUDA_HOME
from torch.utils.cpp_extension import CppExtension
from torch.utils.cpp_extension import CUDAExtension
from setuptools import find_packages
from setuptools import setup
requirements = ["torch", "torchvision"]
def get_extensions():
this_dir = os.path.dirname(os.path.abspath(__file__))
extensions_dir = os.path.join(this_dir, "src")
main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
sources = main_file + source_cpu
extension = CppExtension
extra_compile_args = {"cxx": []}
define_macros = []
if torch.cuda.is_available() and CUDA_HOME is not None:
extension = CUDAExtension
sources += source_cuda
define_macros += [("WITH_CUDA", None)]
extra_compile_args["nvcc"] = [
"-DCUDA_HAS_FP16=1",
"-D__CUDA_NO_HALF_OPERATORS__",
"-D__CUDA_NO_HALF_CONVERSIONS__",
"-D__CUDA_NO_HALF2_OPERATORS__",
]
else:
raise NotImplementedError('Cuda is not availabel')
sources = [os.path.join(extensions_dir, s) for s in sources]
include_dirs = [extensions_dir]
ext_modules = [
extension(
"_ext",
sources,
include_dirs=include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args,
)
]
return ext_modules
setup(
name="DCNv2",
version="0.1",
author="charlesshang",
url="https://github.com/charlesshang/DCNv2",
description="deformable convolutional networks",
packages=find_packages(exclude=("configs", "tests",)),
# install_requires=requirements,
ext_modules=get_extensions(),
cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
)
\ No newline at end of file
#include <vector>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
at::Tensor
dcn_v2_cpu_forward(const at::Tensor &input,
const at::Tensor &weight,
const at::Tensor &bias,
const at::Tensor &offset,
const at::Tensor &mask,
const int kernel_h,
const int kernel_w,
const int stride_h,
const int stride_w,
const int pad_h,
const int pad_w,
const int dilation_h,
const int dilation_w,
const int deformable_group)
{
AT_ERROR("Not implement on cpu");
}
std::vector<at::Tensor>
dcn_v2_cpu_backward(const at::Tensor &input,
const at::Tensor &weight,
const at::Tensor &bias,
const at::Tensor &offset,
const at::Tensor &mask,
const at::Tensor &grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group)
{
AT_ERROR("Not implement on cpu");
}
std::tuple<at::Tensor, at::Tensor>
dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
const at::Tensor &bbox,
const at::Tensor &trans,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std)
{
AT_ERROR("Not implement on cpu");
}
std::tuple<at::Tensor, at::Tensor>
dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
const at::Tensor &input,
const at::Tensor &bbox,
const at::Tensor &trans,
const at::Tensor &top_count,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std)
{
AT_ERROR("Not implement on cpu");
}
\ No newline at end of file
#pragma once
#include <torch/extension.h>
at::Tensor
dcn_v2_cpu_forward(const at::Tensor &input,
const at::Tensor &weight,
const at::Tensor &bias,
const at::Tensor &offset,
const at::Tensor &mask,
const int kernel_h,
const int kernel_w,
const int stride_h,
const int stride_w,
const int pad_h,
const int pad_w,
const int dilation_h,
const int dilation_w,
const int deformable_group);
std::vector<at::Tensor>
dcn_v2_cpu_backward(const at::Tensor &input,
const at::Tensor &weight,
const at::Tensor &bias,
const at::Tensor &offset,
const at::Tensor &mask,
const at::Tensor &grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group);
std::tuple<at::Tensor, at::Tensor>
dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
const at::Tensor &bbox,
const at::Tensor &trans,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std);
std::tuple<at::Tensor, at::Tensor>
dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
const at::Tensor &input,
const at::Tensor &bbox,
const at::Tensor &trans,
const at::Tensor &top_count,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std);
\ No newline at end of file
#include <vector>
#include "cuda/dcn_v2_im2col_cuda.h"
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <THC/THC.h>
#include <THC/THCAtomics.cuh>
#include <THC/THCDeviceUtils.cuh>
extern THCState *state;
// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
// [batch gemm]
// https://github.com/pytorch/pytorch/blob/master/aten/src/THC/generic/THCTensorMathBlas.cu
__global__ void createBatchGemmBuffer(const float **input_b, float **output_b,
float **columns_b, const float **ones_b,
const float **weight_b, const float **bias_b,
float *input, float *output,
float *columns, float *ones,
float *weight, float *bias,
const int input_stride, const int output_stride,
const int columns_stride, const int ones_stride,
const int num_batches)
{
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < num_batches)
{
input_b[idx] = input + idx * input_stride;
output_b[idx] = output + idx * output_stride;
columns_b[idx] = columns + idx * columns_stride;
ones_b[idx] = ones + idx * ones_stride;
// share weights and bias within a Mini-Batch
weight_b[idx] = weight;
bias_b[idx] = bias;
}
}
at::Tensor
dcn_v2_cuda_forward(const at::Tensor &input,
const at::Tensor &weight,
const at::Tensor &bias,
const at::Tensor &offset,
const at::Tensor &mask,
const int kernel_h,
const int kernel_w,
const int stride_h,
const int stride_w,
const int pad_h,
const int pad_w,
const int dilation_h,
const int dilation_w,
const int deformable_group)
{
using scalar_t = float;
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, weight, bias, offset, mask));
AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor");
AT_ASSERTM(bias.type().is_cuda(), "bias must be a CUDA tensor");
AT_ASSERTM(offset.type().is_cuda(), "offset must be a CUDA tensor");
AT_ASSERTM(mask.type().is_cuda(), "mask must be a CUDA tensor");
const int batch = input.size(0);
const int channels = input.size(1);
const int height = input.size(2);
const int width = input.size(3);
const int channels_out = weight.size(0);
const int channels_kernel = weight.size(1);
const int kernel_h_ = weight.size(2);
const int kernel_w_ = weight.size(3);
// printf("Kernels: %d %d %d %d\n", kernel_h_, kernel_w_, kernel_w, kernel_h);
// printf("Channels: %d %d\n", channels, channels_kernel);
// printf("Channels: %d %d\n", channels_out, channels_kernel);
AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w,
"Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_);
AT_ASSERTM(channels == channels_kernel,
"Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel);
const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
auto ones = at::ones({batch, height_out, width_out}, input.options());
auto columns = at::empty({batch, channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options());
auto output = at::empty({batch, channels_out, height_out, width_out}, input.options());
// prepare for batch-wise computing, which is significantly faster than instance-wise computing
// when batch size is large.
// launch batch threads
int matrices_size = batch * sizeof(float *);
auto input_b = static_cast<const float **>(THCudaMalloc(state, matrices_size));
auto output_b = static_cast<float **>(THCudaMalloc(state, matrices_size));
auto columns_b = static_cast<float **>(THCudaMalloc(state, matrices_size));
auto ones_b = static_cast<const float **>(THCudaMalloc(state, matrices_size));
auto weight_b = static_cast<const float **>(THCudaMalloc(state, matrices_size));
auto bias_b = static_cast<const float **>(THCudaMalloc(state, matrices_size));
const int block = 128;
const int grid = (batch + block - 1) / block;
createBatchGemmBuffer<<<grid, block, 0, THCState_getCurrentStream(state)>>>(
input_b, output_b,
columns_b, ones_b,
weight_b, bias_b,
input.data<scalar_t>(),
output.data<scalar_t>(),
columns.data<scalar_t>(),
ones.data<scalar_t>(),
weight.data<scalar_t>(),
bias.data<scalar_t>(),
channels * width * height,
channels_out * width_out * height_out,
channels * kernel_h * kernel_w * height_out * width_out,
height_out * width_out,
batch);
long m_ = channels_out;
long n_ = height_out * width_out;
long k_ = 1;
THCudaBlas_SgemmBatched(state,
't',
'n',
n_,
m_,
k_,
1.0f,
ones_b, k_,
bias_b, k_,
0.0f,
output_b, n_,
batch);
modulated_deformable_im2col_cuda(THCState_getCurrentStream(state),
input.data<scalar_t>(),
offset.data<scalar_t>(),
mask.data<scalar_t>(),
batch, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
deformable_group,
columns.data<scalar_t>());
long m = channels_out;
long n = height_out * width_out;
long k = channels * kernel_h * kernel_w;
THCudaBlas_SgemmBatched(state,
'n',
'n',
n,
m,
k,
1.0f,
(const float **)columns_b, n,
weight_b, k,
1.0f,
output_b, n,
batch);
THCudaFree(state, input_b);
THCudaFree(state, output_b);
THCudaFree(state, columns_b);
THCudaFree(state, ones_b);
THCudaFree(state, weight_b);
THCudaFree(state, bias_b);
return output;
}
__global__ void createBatchGemmBufferBackward(
float **grad_output_b,
float **columns_b,
float **ones_b,
float **weight_b,
float **grad_weight_b,
float **grad_bias_b,
float *grad_output,
float *columns,
float *ones,
float *weight,
float *grad_weight,
float *grad_bias,
const int grad_output_stride,
const int columns_stride,
const int ones_stride,
const int num_batches)
{
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < num_batches)
{
grad_output_b[idx] = grad_output + idx * grad_output_stride;
columns_b[idx] = columns + idx * columns_stride;
ones_b[idx] = ones + idx * ones_stride;
// share weights and bias within a Mini-Batch
weight_b[idx] = weight;
grad_weight_b[idx] = grad_weight;
grad_bias_b[idx] = grad_bias;
}
}
std::vector<at::Tensor> dcn_v2_cuda_backward(const at::Tensor &input,
const at::Tensor &weight,
const at::Tensor &bias,
const at::Tensor &offset,
const at::Tensor &mask,
const at::Tensor &grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group)
{
THArgCheck(input.is_contiguous(), 1, "input tensor has to be contiguous");
THArgCheck(weight.is_contiguous(), 2, "weight tensor has to be contiguous");
AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor");
AT_ASSERTM(bias.type().is_cuda(), "bias must be a CUDA tensor");
AT_ASSERTM(offset.type().is_cuda(), "offset must be a CUDA tensor");
AT_ASSERTM(mask.type().is_cuda(), "mask must be a CUDA tensor");
const int batch = input.size(0);
const int channels = input.size(1);
const int height = input.size(2);
const int width = input.size(3);
const int channels_out = weight.size(0);
const int channels_kernel = weight.size(1);
const int kernel_h_ = weight.size(2);
const int kernel_w_ = weight.size(3);
AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w,
"Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_);
AT_ASSERTM(channels == channels_kernel,
"Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel);
const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
auto ones = at::ones({height_out, width_out}, input.options());
auto columns = at::empty({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options());
auto output = at::empty({batch, channels_out, height_out, width_out}, input.options());
auto grad_input = at::zeros_like(input);
auto grad_weight = at::zeros_like(weight);
auto grad_bias = at::zeros_like(bias);
auto grad_offset = at::zeros_like(offset);
auto grad_mask = at::zeros_like(mask);
using scalar_t = float;
for (int b = 0; b < batch; b++)
{
auto input_n = input.select(0, b);
auto offset_n = offset.select(0, b);
auto mask_n = mask.select(0, b);
auto grad_output_n = grad_output.select(0, b);
auto grad_input_n = grad_input.select(0, b);
auto grad_offset_n = grad_offset.select(0, b);
auto grad_mask_n = grad_mask.select(0, b);
long m = channels * kernel_h * kernel_w;
long n = height_out * width_out;
long k = channels_out;
THCudaBlas_Sgemm(state, 'n', 't', n, m, k, 1.0f,
grad_output_n.data<scalar_t>(), n,
weight.data<scalar_t>(), m, 0.0f,
columns.data<scalar_t>(), n);
// gradient w.r.t. input coordinate data
modulated_deformable_col2im_coord_cuda(THCState_getCurrentStream(state),
columns.data<scalar_t>(),
input_n.data<scalar_t>(),
offset_n.data<scalar_t>(),
mask_n.data<scalar_t>(),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
grad_offset_n.data<scalar_t>(),
grad_mask_n.data<scalar_t>());
// gradient w.r.t. input data
modulated_deformable_col2im_cuda(THCState_getCurrentStream(state),
columns.data<scalar_t>(),
offset_n.data<scalar_t>(),
mask_n.data<scalar_t>(),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
grad_input_n.data<scalar_t>());
// gradient w.r.t. weight, dWeight should accumulate across the batch and group
modulated_deformable_im2col_cuda(THCState_getCurrentStream(state),
input_n.data<scalar_t>(),
offset_n.data<scalar_t>(),
mask_n.data<scalar_t>(),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
columns.data<scalar_t>());
long m_ = channels_out;
long n_ = channels * kernel_h * kernel_w;
long k_ = height_out * width_out;
THCudaBlas_Sgemm(state, 't', 'n', n_, m_, k_, 1.0f,
columns.data<scalar_t>(), k_,
grad_output_n.data<scalar_t>(), k_, 1.0f,
grad_weight.data<scalar_t>(), n_);
// gradient w.r.t. bias
// long m_ = channels_out;
// long k__ = height_out * width_out;
THCudaBlas_Sgemv(state,
't',
k_, m_, 1.0f,
grad_output_n.data<scalar_t>(), k_,
ones.data<scalar_t>(), 1, 1.0f,
grad_bias.data<scalar_t>(), 1);
}
return {
grad_input, grad_offset, grad_mask, grad_weight, grad_bias
};
}
\ No newline at end of file
#include "dcn_v2_im2col_cuda.h"
#include <cstdio>
#include <algorithm>
#include <cstring>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <THC/THC.h>
#include <THC/THCAtomics.cuh>
#include <THC/THCDeviceUtils.cuh>
#define CUDA_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
i < (n); \
i += blockDim.x * gridDim.x)
const int CUDA_NUM_THREADS = 1024;
inline int GET_BLOCKS(const int N)
{
return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
}
__device__ float dmcn_im2col_bilinear(const float *bottom_data, const int data_width,
const int height, const int width, float h, float w)
{
int h_low = floor(h);
int w_low = floor(w);
int h_high = h_low + 1;
int w_high = w_low + 1;
float lh = h - h_low;
float lw = w - w_low;
float hh = 1 - lh, hw = 1 - lw;
float v1 = 0;
if (h_low >= 0 && w_low >= 0)
v1 = bottom_data[h_low * data_width + w_low];
float v2 = 0;
if (h_low >= 0 && w_high <= width - 1)
v2 = bottom_data[h_low * data_width + w_high];
float v3 = 0;
if (h_high <= height - 1 && w_low >= 0)
v3 = bottom_data[h_high * data_width + w_low];
float v4 = 0;
if (h_high <= height - 1 && w_high <= width - 1)
v4 = bottom_data[h_high * data_width + w_high];
float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
return val;
}
__device__ float dmcn_get_gradient_weight(float argmax_h, float argmax_w,
const int h, const int w, const int height, const int width)
{
if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
{
//empty
return 0;
}
int argmax_h_low = floor(argmax_h);
int argmax_w_low = floor(argmax_w);
int argmax_h_high = argmax_h_low + 1;
int argmax_w_high = argmax_w_low + 1;
float weight = 0;
if (h == argmax_h_low && w == argmax_w_low)
weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
if (h == argmax_h_low && w == argmax_w_high)
weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
if (h == argmax_h_high && w == argmax_w_low)
weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
if (h == argmax_h_high && w == argmax_w_high)
weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
return weight;
}
__device__ float dmcn_get_coordinate_weight(float argmax_h, float argmax_w,
const int height, const int width, const float *im_data,
const int data_width, const int bp_dir)
{
if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
{
//empty
return 0;
}
int argmax_h_low = floor(argmax_h);
int argmax_w_low = floor(argmax_w);
int argmax_h_high = argmax_h_low + 1;
int argmax_w_high = argmax_w_low + 1;
float weight = 0;
if (bp_dir == 0)
{
if (argmax_h_low >= 0 && argmax_w_low >= 0)
weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];
if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];
if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];
if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];
}
else if (bp_dir == 1)
{
if (argmax_h_low >= 0 && argmax_w_low >= 0)
weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];
if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];
if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];
if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];
}
return weight;
}
__global__ void modulated_deformable_im2col_gpu_kernel(const int n,
const float *data_im, const float *data_offset, const float *data_mask,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int channel_per_deformable_group,
const int batch_size, const int num_channels, const int deformable_group,
const int height_col, const int width_col,
float *data_col)
{
// launch channels * batch_size * height_col * width_col cores
CUDA_KERNEL_LOOP(index, n)
{
// NOTE(CharlesShang): different from Dai Jifeng's MXNet implementation, col_buffer is of shape (c*kw*kh, N, oh, ow)
// here columns is of shape (N, c*kw*kh, oh * ow), need to adapt axis
// index index of output matrix
const int w_col = index % width_col;
const int h_col = (index / width_col) % height_col;
// const int b_col = (index / width_col / height_col) % batch_size;
const int b_col = (index / width_col / height_col / num_channels) % batch_size;
// const int c_im = (index / width_col / height_col) / batch_size;
const int c_im = (index / width_col / height_col) % num_channels;
// const int c_col = c_im * kernel_h * kernel_w;
const int c_col = c_im * kernel_h * kernel_w;
// compute deformable group index
const int deformable_group_index = c_im / channel_per_deformable_group;
const int h_in = h_col * stride_h - pad_h;
const int w_in = w_col * stride_w - pad_w;
// float *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
float *data_col_ptr = data_col + ((b_col * num_channels * kernel_w * kernel_h + c_col) * height_col + h_col) * width_col + w_col;
//const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;
const float *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;
const float *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
const float *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
for (int i = 0; i < kernel_h; ++i)
{
for (int j = 0; j < kernel_w; ++j)
{
const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;
const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
const float offset_h = data_offset_ptr[data_offset_h_ptr];
const float offset_w = data_offset_ptr[data_offset_w_ptr];
const float mask = data_mask_ptr[data_mask_hw_ptr];
float val = static_cast<float>(0);
const float h_im = h_in + i * dilation_h + offset_h;
const float w_im = w_in + j * dilation_w + offset_w;
//if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
{
//const float map_h = i * dilation_h + offset_h;
//const float map_w = j * dilation_w + offset_w;
//const int cur_height = height - h_in;
//const int cur_width = width - w_in;
//val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w);
val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im);
}
*data_col_ptr = val * mask;
// data_col_ptr += batch_size * height_col * width_col;
data_col_ptr += height_col * width_col;
}
}
}
}
__global__ void modulated_deformable_col2im_gpu_kernel(const int n,
const float *data_col, const float *data_offset, const float *data_mask,
const int channels, const int height, const int width,
const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int channel_per_deformable_group,
const int batch_size, const int deformable_group,
const int height_col, const int width_col,
float *grad_im)
{
CUDA_KERNEL_LOOP(index, n)
{
const int j = (index / width_col / height_col / batch_size) % kernel_w;
const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;
// compute the start and end of the output
const int deformable_group_index = c / channel_per_deformable_group;
int w_out = index % width_col;
int h_out = (index / width_col) % height_col;
int b = (index / width_col / height_col) % batch_size;
int w_in = w_out * stride_w - pad_w;
int h_in = h_out * stride_h - pad_h;
const float *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
const float *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
const float offset_h = data_offset_ptr[data_offset_h_ptr];
const float offset_w = data_offset_ptr[data_offset_w_ptr];
const float mask = data_mask_ptr[data_mask_hw_ptr];
const float cur_inv_h_data = h_in + i * dilation_h + offset_h;
const float cur_inv_w_data = w_in + j * dilation_w + offset_w;
const float cur_top_grad = data_col[index] * mask;
const int cur_h = (int)cur_inv_h_data;
const int cur_w = (int)cur_inv_w_data;
for (int dy = -2; dy <= 2; dy++)
{
for (int dx = -2; dx <= 2; dx++)
{
if (cur_h + dy >= 0 && cur_h + dy < height &&
cur_w + dx >= 0 && cur_w + dx < width &&
abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
abs(cur_inv_w_data - (cur_w + dx)) < 1)
{
int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
float weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);
atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
}
}
}
}
}
__global__ void modulated_deformable_col2im_coord_gpu_kernel(const int n,
const float *data_col, const float *data_im,
const float *data_offset, const float *data_mask,
const int channels, const int height, const int width,
const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int channel_per_deformable_group,
const int batch_size, const int offset_channels, const int deformable_group,
const int height_col, const int width_col,
float *grad_offset, float *grad_mask)
{
CUDA_KERNEL_LOOP(index, n)
{
float val = 0, mval = 0;
int w = index % width_col;
int h = (index / width_col) % height_col;
int c = (index / width_col / height_col) % offset_channels;
int b = (index / width_col / height_col) / offset_channels;
// compute the start and end of the output
const int deformable_group_index = c / (2 * kernel_h * kernel_w);
const int col_step = kernel_h * kernel_w;
int cnt = 0;
const float *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col;
const float *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width;
const float *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
const float *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step)
{
const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;
const int bp_dir = offset_c % 2;
int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
int w_out = col_pos % width_col;
int h_out = (col_pos / width_col) % height_col;
int w_in = w_out * stride_w - pad_w;
int h_in = h_out * stride_h - pad_h;
const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);
const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);
const float offset_h = data_offset_ptr[data_offset_h_ptr];
const float offset_w = data_offset_ptr[data_offset_w_ptr];
const float mask = data_mask_ptr[data_mask_hw_ptr];
float inv_h = h_in + i * dilation_h + offset_h;
float inv_w = w_in + j * dilation_w + offset_w;
if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
{
inv_h = inv_w = -2;
}
else
{
mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w);
}
const float weight = dmcn_get_coordinate_weight(
inv_h, inv_w,
height, width, data_im_ptr + cnt * height * width, width, bp_dir);
val += weight * data_col_ptr[col_pos] * mask;
cnt += 1;
}
// KERNEL_ASSIGN(grad_offset[index], offset_req, val);
grad_offset[index] = val;
if (offset_c % 2 == 0)
// KERNEL_ASSIGN(grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w], mask_req, mval);
grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval;
}
}
void modulated_deformable_im2col_cuda(cudaStream_t stream,
const float* data_im, const float* data_offset, const float* data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group, float* data_col) {
// num_axes should be smaller than block size
const int channel_per_deformable_group = channels / deformable_group;
const int num_kernels = channels * batch_size * height_col * width_col;
modulated_deformable_im2col_gpu_kernel
<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS,
0, stream>>>(
num_kernels, data_im, data_offset, data_mask, height_im, width_im, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group,
batch_size, channels, deformable_group, height_col, width_col, data_col);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err));
}
}
void modulated_deformable_col2im_cuda(cudaStream_t stream,
const float* data_col, const float* data_offset, const float* data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group, float* grad_im){
const int channel_per_deformable_group = channels / deformable_group;
const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col;
modulated_deformable_col2im_gpu_kernel
<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS,
0, stream>>>(
num_kernels, data_col, data_offset, data_mask, channels, height_im, width_im,
kernel_h, kernel_w, pad_h, pad_h, stride_h, stride_w,
dilation_h, dilation_w, channel_per_deformable_group,
batch_size, deformable_group, height_col, width_col, grad_im);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err));
}
}
void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
const float* data_col, const float* data_im, const float* data_offset, const float* data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group,
float* grad_offset, float* grad_mask) {
const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group;
const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group;
modulated_deformable_col2im_coord_gpu_kernel
<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS,
0, stream>>>(
num_kernels, data_col, data_im, data_offset, data_mask, channels, height_im, width_im,
kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, channel_per_deformable_group,
batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col,
grad_offset, grad_mask);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("error in modulated_deformable_col2im_coord_cuda: %s\n", cudaGetErrorString(err));
}
}
\ No newline at end of file
/*!
******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
*
* COPYRIGHT
*
* All contributions by the University of California:
* Copyright (c) 2014-2017 The Regents of the University of California (Regents)
* All rights reserved.
*
* All other contributions:
* Copyright (c) 2014-2017, the respective contributors
* All rights reserved.
*
* Caffe uses a shared copyright model: each contributor holds copyright over
* their contributions to Caffe. The project versioning records all such
* contribution and copyright details. If a contributor wants to further mark
* their specific copyright on a particular contribution, they should indicate
* their copyright solely in the commit message of the change when it is
* committed.
*
* LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* CONTRIBUTION AGREEMENT
*
* By contributing to the BVLC/caffe repository through pull-request, comment,
* or otherwise, the contributor releases their content to the
* license and copyright terms herein.
*
***************** END Caffe Copyright Notice and Disclaimer ********************
*
* Copyright (c) 2018 Microsoft
* Licensed under The MIT License [see LICENSE for details]
* \file modulated_deformable_im2col.h
* \brief Function definitions of converting an image to
* column matrix based on kernel, padding, dilation, and offset.
* These functions are mainly used in deformable convolution operators.
* \ref: https://arxiv.org/abs/1811.11168
* \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
*/
/***************** Adapted by Charles Shang *********************/
#ifndef DCN_V2_IM2COL_CUDA
#define DCN_V2_IM2COL_CUDA
#ifdef __cplusplus
extern "C"
{
#endif
void modulated_deformable_im2col_cuda(cudaStream_t stream,
const float *data_im, const float *data_offset, const float *data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group, float *data_col);
void modulated_deformable_col2im_cuda(cudaStream_t stream,
const float *data_col, const float *data_offset, const float *data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group, float *grad_im);
void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
const int batch_size, const int channels, const int height_im, const int width_im,
const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group,
float *grad_offset, float *grad_mask);
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
/*!
* Copyright (c) 2017 Microsoft
* Licensed under The MIT License [see LICENSE for details]
* \file deformable_psroi_pooling.cu
* \brief
* \author Yi Li, Guodong Zhang, Jifeng Dai
*/
/***************** Adapted by Charles Shang *********************/
#include <cstdio>
#include <algorithm>
#include <cstring>
#include <iostream>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <THC/THC.h>
#include <THC/THCAtomics.cuh>
#include <THC/THCDeviceUtils.cuh>
#define CUDA_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
i < (n); \
i += blockDim.x * gridDim.x)
const int CUDA_NUM_THREADS = 1024;
inline int GET_BLOCKS(const int N)
{
return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
}
template <typename T>
__device__ T bilinear_interp(
const T *data,
const T x,
const T y,
const int width,
const int height)
{
int x1 = floor(x);
int x2 = ceil(x);
int y1 = floor(y);
int y2 = ceil(y);
T dist_x = static_cast<T>(x - x1);
T dist_y = static_cast<T>(y - y1);
T value11 = data[y1 * width + x1];
T value12 = data[y2 * width + x1];
T value21 = data[y1 * width + x2];
T value22 = data[y2 * width + x2];
T value = (1 - dist_x) * (1 - dist_y) * value11 +
(1 - dist_x) * dist_y * value12 +
dist_x * (1 - dist_y) * value21 +
dist_x * dist_y * value22;
return value;
}
template <typename T>
__global__ void DeformablePSROIPoolForwardKernel(
const int count,
const T *bottom_data,
const T spatial_scale,
const int channels,
const int height, const int width,
const int pooled_height, const int pooled_width,
const T *bottom_rois, const T *bottom_trans,
const int no_trans,
const T trans_std,
const int sample_per_part,
const int output_dim,
const int group_size,
const int part_size,
const int num_classes,
const int channels_each_class,
T *top_data,
T *top_count)
{
CUDA_KERNEL_LOOP(index, count)
{
// The output is in order (n, ctop, ph, pw)
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int ctop = (index / pooled_width / pooled_height) % output_dim;
int n = index / pooled_width / pooled_height / output_dim;
// [start, end) interval for spatial sampling
const T *offset_bottom_rois = bottom_rois + n * 5;
int roi_batch_ind = offset_bottom_rois[0];
T roi_start_w = static_cast<T>(round(offset_bottom_rois[1])) * spatial_scale - 0.5;
T roi_start_h = static_cast<T>(round(offset_bottom_rois[2])) * spatial_scale - 0.5;
T roi_end_w = static_cast<T>(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;
T roi_end_h = static_cast<T>(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;
// Force too small ROIs to be 1x1
T roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0
T roi_height = max(roi_end_h - roi_start_h, 0.1);
// Compute w and h at bottom
T bin_size_h = roi_height / static_cast<T>(pooled_height);
T bin_size_w = roi_width / static_cast<T>(pooled_width);
T sub_bin_size_h = bin_size_h / static_cast<T>(sample_per_part);
T sub_bin_size_w = bin_size_w / static_cast<T>(sample_per_part);
int part_h = floor(static_cast<T>(ph) / pooled_height * part_size);
int part_w = floor(static_cast<T>(pw) / pooled_width * part_size);
int class_id = ctop / channels_each_class;
T trans_x = no_trans ? static_cast<T>(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * trans_std;
T trans_y = no_trans ? static_cast<T>(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * trans_std;
T wstart = static_cast<T>(pw) * bin_size_w + roi_start_w;
wstart += trans_x * roi_width;
T hstart = static_cast<T>(ph) * bin_size_h + roi_start_h;
hstart += trans_y * roi_height;
T sum = 0;
int count = 0;
int gw = floor(static_cast<T>(pw) * group_size / pooled_width);
int gh = floor(static_cast<T>(ph) * group_size / pooled_height);
gw = min(max(gw, 0), group_size - 1);
gh = min(max(gh, 0), group_size - 1);
const T *offset_bottom_data = bottom_data + (roi_batch_ind * channels) * height * width;
for (int ih = 0; ih < sample_per_part; ih++)
{
for (int iw = 0; iw < sample_per_part; iw++)
{
T w = wstart + iw * sub_bin_size_w;
T h = hstart + ih * sub_bin_size_h;
// bilinear interpolation
if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)
{
continue;
}
w = min(max(w, 0.), width - 1.);
h = min(max(h, 0.), height - 1.);
int c = (ctop * group_size + gh) * group_size + gw;
T val = bilinear_interp(offset_bottom_data + c * height * width, w, h, width, height);
sum += val;
count++;
}
}
top_data[index] = count == 0 ? static_cast<T>(0) : sum / count;
top_count[index] = count;
}
}
template <typename T>
__global__ void DeformablePSROIPoolBackwardAccKernel(
const int count,
const T *top_diff,
const T *top_count,
const int num_rois,
const T spatial_scale,
const int channels,
const int height, const int width,
const int pooled_height, const int pooled_width,
const int output_dim,
T *bottom_data_diff, T *bottom_trans_diff,
const T *bottom_data,
const T *bottom_rois,
const T *bottom_trans,
const int no_trans,
const T trans_std,
const int sample_per_part,
const int group_size,
const int part_size,
const int num_classes,
const int channels_each_class)
{
CUDA_KERNEL_LOOP(index, count)
{
// The output is in order (n, ctop, ph, pw)
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int ctop = (index / pooled_width / pooled_height) % output_dim;
int n = index / pooled_width / pooled_height / output_dim;
// [start, end) interval for spatial sampling
const T *offset_bottom_rois = bottom_rois + n * 5;
int roi_batch_ind = offset_bottom_rois[0];
T roi_start_w = static_cast<T>(round(offset_bottom_rois[1])) * spatial_scale - 0.5;
T roi_start_h = static_cast<T>(round(offset_bottom_rois[2])) * spatial_scale - 0.5;
T roi_end_w = static_cast<T>(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;
T roi_end_h = static_cast<T>(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;
// Force too small ROIs to be 1x1
T roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0
T roi_height = max(roi_end_h - roi_start_h, 0.1);
// Compute w and h at bottom
T bin_size_h = roi_height / static_cast<T>(pooled_height);
T bin_size_w = roi_width / static_cast<T>(pooled_width);
T sub_bin_size_h = bin_size_h / static_cast<T>(sample_per_part);
T sub_bin_size_w = bin_size_w / static_cast<T>(sample_per_part);
int part_h = floor(static_cast<T>(ph) / pooled_height * part_size);
int part_w = floor(static_cast<T>(pw) / pooled_width * part_size);
int class_id = ctop / channels_each_class;
T trans_x = no_trans ? static_cast<T>(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * trans_std;
T trans_y = no_trans ? static_cast<T>(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * trans_std;
T wstart = static_cast<T>(pw) * bin_size_w + roi_start_w;
wstart += trans_x * roi_width;
T hstart = static_cast<T>(ph) * bin_size_h + roi_start_h;
hstart += trans_y * roi_height;
if (top_count[index] <= 0)
{
continue;
}
T diff_val = top_diff[index] / top_count[index];
const T *offset_bottom_data = bottom_data + roi_batch_ind * channels * height * width;
T *offset_bottom_data_diff = bottom_data_diff + roi_batch_ind * channels * height * width;
int gw = floor(static_cast<T>(pw) * group_size / pooled_width);
int gh = floor(static_cast<T>(ph) * group_size / pooled_height);
gw = min(max(gw, 0), group_size - 1);
gh = min(max(gh, 0), group_size - 1);
for (int ih = 0; ih < sample_per_part; ih++)
{
for (int iw = 0; iw < sample_per_part; iw++)
{
T w = wstart + iw * sub_bin_size_w;
T h = hstart + ih * sub_bin_size_h;
// bilinear interpolation
if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)
{
continue;
}
w = min(max(w, 0.), width - 1.);
h = min(max(h, 0.), height - 1.);
int c = (ctop * group_size + gh) * group_size + gw;
// backward on feature
int x0 = floor(w);
int x1 = ceil(w);
int y0 = floor(h);
int y1 = ceil(h);
T dist_x = w - x0, dist_y = h - y0;
T q00 = (1 - dist_x) * (1 - dist_y);
T q01 = (1 - dist_x) * dist_y;
T q10 = dist_x * (1 - dist_y);
T q11 = dist_x * dist_y;
int bottom_index_base = c * height * width;
atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x0, q00 * diff_val);
atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x0, q01 * diff_val);
atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x1, q10 * diff_val);
atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x1, q11 * diff_val);
if (no_trans)
{
continue;
}
T U00 = offset_bottom_data[bottom_index_base + y0 * width + x0];
T U01 = offset_bottom_data[bottom_index_base + y1 * width + x0];
T U10 = offset_bottom_data[bottom_index_base + y0 * width + x1];
T U11 = offset_bottom_data[bottom_index_base + y1 * width + x1];
T diff_x = (U11 * dist_y + U10 * (1 - dist_y) - U01 * dist_y - U00 * (1 - dist_y)) * trans_std * diff_val;
diff_x *= roi_width;
T diff_y = (U11 * dist_x + U01 * (1 - dist_x) - U10 * dist_x - U00 * (1 - dist_x)) * trans_std * diff_val;
diff_y *= roi_height;
atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w, diff_x);
atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w, diff_y);
}
}
}
}
std::tuple<at::Tensor, at::Tensor>
dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input,
const at::Tensor &bbox,
const at::Tensor &trans,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std)
{
AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(bbox.type().is_cuda(), "rois must be a CUDA tensor");
AT_ASSERTM(trans.type().is_cuda(), "trans must be a CUDA tensor");
const int batch = input.size(0);
const int channels = input.size(1);
const int height = input.size(2);
const int width = input.size(3);
const int channels_trans = no_trans ? 2 : trans.size(1);
const int num_bbox = bbox.size(0);
AT_ASSERTM(channels == output_dim, "input channels and output channels must equal");
auto pooled_height = pooled_size;
auto pooled_width = pooled_size;
auto out = at::empty({num_bbox, output_dim, pooled_height, pooled_width}, input.options());
long out_size = num_bbox * output_dim * pooled_height * pooled_width;
auto top_count = at::zeros({num_bbox, output_dim, pooled_height, pooled_width}, input.options());
const int num_classes = no_trans ? 1 : channels_trans / 2;
const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
if (out.numel() == 0)
{
THCudaCheck(cudaGetLastError());
return std::make_tuple(out, top_count);
}
dim3 grid(std::min(THCCeilDiv(out_size, 512L), 4096L));
dim3 block(512);
AT_DISPATCH_FLOATING_TYPES(input.type(), "dcn_v2_psroi_pooling_cuda_forward", [&] {
DeformablePSROIPoolForwardKernel<scalar_t><<<grid, block, 0, stream>>>(
out_size,
input.contiguous().data<scalar_t>(),
spatial_scale,
channels,
height, width,
pooled_height,
pooled_width,
bbox.contiguous().data<scalar_t>(),
trans.contiguous().data<scalar_t>(),
no_trans,
trans_std,
sample_per_part,
output_dim,
group_size,
part_size,
num_classes,
channels_each_class,
out.data<scalar_t>(),
top_count.data<scalar_t>());
});
THCudaCheck(cudaGetLastError());
return std::make_tuple(out, top_count);
}
std::tuple<at::Tensor, at::Tensor>
dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad,
const at::Tensor &input,
const at::Tensor &bbox,
const at::Tensor &trans,
const at::Tensor &top_count,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std)
{
AT_ASSERTM(out_grad.type().is_cuda(), "out_grad must be a CUDA tensor");
AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(bbox.type().is_cuda(), "bbox must be a CUDA tensor");
AT_ASSERTM(trans.type().is_cuda(), "trans must be a CUDA tensor");
AT_ASSERTM(top_count.type().is_cuda(), "top_count must be a CUDA tensor");
const int batch = input.size(0);
const int channels = input.size(1);
const int height = input.size(2);
const int width = input.size(3);
const int channels_trans = no_trans ? 2 : trans.size(1);
const int num_bbox = bbox.size(0);
AT_ASSERTM(channels == output_dim, "input channels and output channels must equal");
auto pooled_height = pooled_size;
auto pooled_width = pooled_size;
long out_size = num_bbox * output_dim * pooled_height * pooled_width;
const int num_classes = no_trans ? 1 : channels_trans / 2;
const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;
auto input_grad = at::zeros({batch, channels, height, width}, out_grad.options());
auto trans_grad = at::zeros_like(trans);
if (input_grad.numel() == 0)
{
THCudaCheck(cudaGetLastError());
return std::make_tuple(input_grad, trans_grad);
}
dim3 grid(std::min(THCCeilDiv(out_size, 512L), 4096L));
dim3 block(512);
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(out_grad.type(), "dcn_v2_psroi_pooling_cuda_backward", [&] {
DeformablePSROIPoolBackwardAccKernel<scalar_t><<<grid, block, 0, stream>>>(
out_size,
out_grad.contiguous().data<scalar_t>(),
top_count.contiguous().data<scalar_t>(),
num_bbox,
spatial_scale,
channels,
height,
width,
pooled_height,
pooled_width,
output_dim,
input_grad.contiguous().data<scalar_t>(),
trans_grad.contiguous().data<scalar_t>(),
input.contiguous().data<scalar_t>(),
bbox.contiguous().data<scalar_t>(),
trans.contiguous().data<scalar_t>(),
no_trans,
trans_std,
sample_per_part,
group_size,
part_size,
num_classes,
channels_each_class);
});
THCudaCheck(cudaGetLastError());
return std::make_tuple(input_grad, trans_grad);
}
\ No newline at end of file
#pragma once
#include <torch/extension.h>
at::Tensor
dcn_v2_cuda_forward(const at::Tensor &input,
const at::Tensor &weight,
const at::Tensor &bias,
const at::Tensor &offset,
const at::Tensor &mask,
const int kernel_h,
const int kernel_w,
const int stride_h,
const int stride_w,
const int pad_h,
const int pad_w,
const int dilation_h,
const int dilation_w,
const int deformable_group);
std::vector<at::Tensor>
dcn_v2_cuda_backward(const at::Tensor &input,
const at::Tensor &weight,
const at::Tensor &bias,
const at::Tensor &offset,
const at::Tensor &mask,
const at::Tensor &grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group);
std::tuple<at::Tensor, at::Tensor>
dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input,
const at::Tensor &bbox,
const at::Tensor &trans,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std);
std::tuple<at::Tensor, at::Tensor>
dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad,
const at::Tensor &input,
const at::Tensor &bbox,
const at::Tensor &trans,
const at::Tensor &top_count,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std);
\ No newline at end of file
#pragma once
#include "cpu/vision.h"
#ifdef WITH_CUDA
#include "cuda/vision.h"
#endif
at::Tensor
dcn_v2_forward(const at::Tensor &input,
const at::Tensor &weight,
const at::Tensor &bias,
const at::Tensor &offset,
const at::Tensor &mask,
const int kernel_h,
const int kernel_w,
const int stride_h,
const int stride_w,
const int pad_h,
const int pad_w,
const int dilation_h,
const int dilation_w,
const int deformable_group)
{
if (input.type().is_cuda())
{
#ifdef WITH_CUDA
return dcn_v2_cuda_forward(input, weight, bias, offset, mask,
kernel_h, kernel_w,
stride_h, stride_w,
pad_h, pad_w,
dilation_h, dilation_w,
deformable_group);
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
AT_ERROR("Not implemented on the CPU");
}
std::vector<at::Tensor>
dcn_v2_backward(const at::Tensor &input,
const at::Tensor &weight,
const at::Tensor &bias,
const at::Tensor &offset,
const at::Tensor &mask,
const at::Tensor &grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group)
{
if (input.type().is_cuda())
{
#ifdef WITH_CUDA
return dcn_v2_cuda_backward(input,
weight,
bias,
offset,
mask,
grad_output,
kernel_h, kernel_w,
stride_h, stride_w,
pad_h, pad_w,
dilation_h, dilation_w,
deformable_group);
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
AT_ERROR("Not implemented on the CPU");
}
std::tuple<at::Tensor, at::Tensor>
dcn_v2_psroi_pooling_forward(const at::Tensor &input,
const at::Tensor &bbox,
const at::Tensor &trans,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std)
{
if (input.type().is_cuda())
{
#ifdef WITH_CUDA
return dcn_v2_psroi_pooling_cuda_forward(input,
bbox,
trans,
no_trans,
spatial_scale,
output_dim,
group_size,
pooled_size,
part_size,
sample_per_part,
trans_std);
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
AT_ERROR("Not implemented on the CPU");
}
std::tuple<at::Tensor, at::Tensor>
dcn_v2_psroi_pooling_backward(const at::Tensor &out_grad,
const at::Tensor &input,
const at::Tensor &bbox,
const at::Tensor &trans,
const at::Tensor &top_count,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std)
{
if (input.type().is_cuda())
{
#ifdef WITH_CUDA
return dcn_v2_psroi_pooling_cuda_backward(out_grad,
input,
bbox,
trans,
top_count,
no_trans,
spatial_scale,
output_dim,
group_size,
pooled_size,
part_size,
sample_per_part,
trans_std);
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
AT_ERROR("Not implemented on the CPU");
}
\ No newline at end of file
#include "dcn_v2.h"
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward");
m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward");
m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward");
m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward");
}
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import time
import torch
import torch.nn as nn
from torch.autograd import gradcheck
from dcn_v2 import dcn_v2_conv, DCNv2, DCN
from dcn_v2 import dcn_v2_pooling, DCNv2Pooling, DCNPooling
deformable_groups = 1
N, inC, inH, inW = 2, 2, 4, 4
outC = 2
kH, kW = 3, 3
def conv_identify(weight, bias):
weight.data.zero_()
bias.data.zero_()
o, i, h, w = weight.shape
y = h//2
x = w//2
for p in range(i):
for q in range(o):
if p == q:
weight.data[q, p, y, x] = 1.0
def check_zero_offset():
conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW,
kernel_size=(kH, kW),
stride=(1, 1),
padding=(1, 1),
bias=True).cuda()
conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW,
kernel_size=(kH, kW),
stride=(1, 1),
padding=(1, 1),
bias=True).cuda()
dcn_v2 = DCNv2(inC, outC, (kH, kW),
stride=1, padding=1, dilation=1,
deformable_groups=deformable_groups).cuda()
conv_offset.weight.data.zero_()
conv_offset.bias.data.zero_()
conv_mask.weight.data.zero_()
conv_mask.bias.data.zero_()
conv_identify(dcn_v2.weight, dcn_v2.bias)
input = torch.randn(N, inC, inH, inW).cuda()
offset = conv_offset(input)
mask = conv_mask(input)
mask = torch.sigmoid(mask)
output = dcn_v2(input, offset, mask)
output *= 2
d = (input - output).abs().max()
if d < 1e-10:
print('Zero offset passed')
else:
print('Zero offset failed')
print(input)
print(output)
def check_gradient_dconv():
input = torch.rand(N, inC, inH, inW).cuda() * 0.01
input.requires_grad = True
offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda() * 2
# offset.data.zero_()
# offset.data -= 0.5
offset.requires_grad = True
mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda()
# mask.data.zero_()
mask.requires_grad = True
mask = torch.sigmoid(mask)
weight = torch.randn(outC, inC, kH, kW).cuda()
weight.requires_grad = True
bias = torch.rand(outC).cuda()
bias.requires_grad = True
stride = 1
padding = 1
dilation = 1
print('check_gradient_dconv: ',
gradcheck(dcn_v2_conv, (input, offset, mask, weight, bias,
stride, padding, dilation, deformable_groups),
eps=1e-3, atol=1e-4, rtol=1e-2))
def check_pooling_zero_offset():
input = torch.randn(2, 16, 64, 64).cuda().zero_()
input[0, :, 16:26, 16:26] = 1.
input[1, :, 10:20, 20:30] = 2.
rois = torch.tensor([
[0, 65, 65, 103, 103],
[1, 81, 41, 119, 79],
]).cuda().float()
pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=16,
no_trans=True,
group_size=1,
trans_std=0.0).cuda()
out = pooling(input, rois, input.new())
s = ', '.join(['%f' % out[i, :, :, :].mean().item()
for i in range(rois.shape[0])])
print(s)
dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=16,
no_trans=False,
group_size=1,
trans_std=0.0).cuda()
offset = torch.randn(20, 2, 7, 7).cuda().zero_()
dout = dpooling(input, rois, offset)
s = ', '.join(['%f' % dout[i, :, :, :].mean().item()
for i in range(rois.shape[0])])
print(s)
def check_gradient_dpooling():
input = torch.randn(2, 3, 5, 5).cuda() * 0.01
N = 4
batch_inds = torch.randint(2, (N, 1)).cuda().float()
x = torch.rand((N, 1)).cuda().float() * 15
y = torch.rand((N, 1)).cuda().float() * 15
w = torch.rand((N, 1)).cuda().float() * 10
h = torch.rand((N, 1)).cuda().float() * 10
rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
offset = torch.randn(N, 2, 3, 3).cuda()
input.requires_grad = True
offset.requires_grad = True
spatial_scale = 1.0 / 4
pooled_size = 3
output_dim = 3
no_trans = 0
group_size = 1
trans_std = 0.0
sample_per_part = 4
part_size = pooled_size
print('check_gradient_dpooling:',
gradcheck(dcn_v2_pooling, (input, rois, offset,
spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size,
part_size,
sample_per_part,
trans_std),
eps=1e-4))
def example_dconv():
input = torch.randn(2, 64, 128, 128).cuda()
# wrap all things (offset and mask) in DCN
dcn = DCN(64, 64, kernel_size=(3, 3), stride=1,
padding=1, deformable_groups=2).cuda()
# print(dcn.weight.shape, input.shape)
output = dcn(input)
targert = output.new(*output.size())
targert.data.uniform_(-0.01, 0.01)
error = (targert - output).mean()
error.backward()
print(output.shape)
def example_dpooling():
input = torch.randn(2, 32, 64, 64).cuda()
batch_inds = torch.randint(2, (20, 1)).cuda().float()
x = torch.randint(256, (20, 1)).cuda().float()
y = torch.randint(256, (20, 1)).cuda().float()
w = torch.randint(64, (20, 1)).cuda().float()
h = torch.randint(64, (20, 1)).cuda().float()
rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
offset = torch.randn(20, 2, 7, 7).cuda()
input.requires_grad = True
offset.requires_grad = True
# normal roi_align
pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=32,
no_trans=True,
group_size=1,
trans_std=0.1).cuda()
# deformable pooling
dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=32,
no_trans=False,
group_size=1,
trans_std=0.1).cuda()
out = pooling(input, rois, offset)
dout = dpooling(input, rois, offset)
print(out.shape)
print(dout.shape)
target_out = out.new(*out.size())
target_out.data.uniform_(-0.01, 0.01)
target_dout = dout.new(*dout.size())
target_dout.data.uniform_(-0.01, 0.01)
e = (target_out - out).mean()
e.backward()
e = (target_dout - dout).mean()
e.backward()
def example_mdpooling():
input = torch.randn(2, 32, 64, 64).cuda()
input.requires_grad = True
batch_inds = torch.randint(2, (20, 1)).cuda().float()
x = torch.randint(256, (20, 1)).cuda().float()
y = torch.randint(256, (20, 1)).cuda().float()
w = torch.randint(64, (20, 1)).cuda().float()
h = torch.randint(64, (20, 1)).cuda().float()
rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
# mdformable pooling (V2)
dpooling = DCNPooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=32,
no_trans=False,
group_size=1,
trans_std=0.1,
deform_fc_dim=1024).cuda()
dout = dpooling(input, rois)
target = dout.new(*dout.size())
target.data.uniform_(-0.1, 0.1)
error = (target - dout).mean()
error.backward()
print(dout.shape)
if __name__ == '__main__':
example_dconv()
example_dpooling()
example_mdpooling()
check_pooling_zero_offset()
# zero offset check
if inC == outC:
check_zero_offset()
check_gradient_dpooling()
check_gradient_dconv()
# """
# ****** Note: backward is not reentrant error may not be a serious problem,
# ****** since the max error is less than 1e-7,
# ****** Still looking for what trigger this problem
# """
"""
@Time : 2020/6/1 16:24
@Author : Qin Dian
@Manual :
"""
\ No newline at end of file
"""
@Time : 2020/6/1 16:31
@Author : Qin Dian
@Manual :
"""
\ No newline at end of file
import math
import torch
from torch import nn
from torch.autograd import Function
from torch.nn.modules.utils import _pair
from torch.autograd.function import once_differentiable
from lib.csrc.dcn_v2 import _ext as _backend
class _DCNv2(Function):
@staticmethod
def forward(ctx, input, offset, mask, weight, bias,
stride, padding, dilation, deformable_groups):
ctx.stride = _pair(stride)
ctx.padding = _pair(padding)
ctx.dilation = _pair(dilation)
ctx.kernel_size = _pair(weight.shape[2:4])
ctx.deformable_groups = deformable_groups
output = _backend.dcn_v2_forward(input, weight, bias,
offset, mask,
ctx.kernel_size[0], ctx.kernel_size[1],
ctx.stride[0], ctx.stride[1],
ctx.padding[0], ctx.padding[1],
ctx.dilation[0], ctx.dilation[1],
ctx.deformable_groups)
ctx.save_for_backward(input, offset, mask, weight, bias)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, offset, mask, weight, bias = ctx.saved_tensors
grad_input, grad_offset, grad_mask, grad_weight, grad_bias = \
_backend.dcn_v2_backward(input, weight,
bias,
offset, mask,
grad_output,
ctx.kernel_size[0], ctx.kernel_size[1],
ctx.stride[0], ctx.stride[1],
ctx.padding[0], ctx.padding[1],
ctx.dilation[0], ctx.dilation[1],
ctx.deformable_groups)
return grad_input, grad_offset, grad_mask, grad_weight, grad_bias,\
None, None, None, None,
dcn_v2_conv = _DCNv2.apply
class DCNv2(nn.Module):
def __init__(self, in_channels, out_channels,
kernel_size, stride, padding, dilation=1, deformable_groups=1):
super(DCNv2, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _pair(kernel_size)
self.stride = _pair(stride)
self.padding = _pair(padding)
self.dilation = _pair(dilation)
self.deformable_groups = deformable_groups
self.weight = nn.Parameter(torch.Tensor(
out_channels, in_channels, *self.kernel_size))
self.bias = nn.Parameter(torch.Tensor(out_channels))
self.reset_parameters()
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
self.bias.data.zero_()
def forward(self, input, offset, mask):
assert 2 * self.deformable_groups * self.kernel_size[0] * self.kernel_size[1] == \
offset.shape[1]
assert self.deformable_groups * self.kernel_size[0] * self.kernel_size[1] == \
mask.shape[1]
return dcn_v2_conv(input, offset, mask,
self.weight,
self.bias,
self.stride,
self.padding,
self.dilation,
self.deformable_groups)
class DCN(DCNv2):
def __init__(self, in_channels, out_channels,
kernel_size, stride, padding,
dilation=1, deformable_groups=1):
super(DCN, self).__init__(in_channels, out_channels,
kernel_size, stride, padding, dilation, deformable_groups)
channels_ = self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1]
self.conv_offset_mask = nn.Conv2d(self.in_channels,
channels_,
kernel_size=self.kernel_size,
stride=self.stride,
padding=self.padding,
bias=True)
self.init_offset()
def init_offset(self):
self.conv_offset_mask.weight.data.zero_()
self.conv_offset_mask.bias.data.zero_()
def forward(self, input):
out = self.conv_offset_mask(input)
o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
return dcn_v2_conv(input, offset, mask,
self.weight, self.bias,
self.stride,
self.padding,
self.dilation,
self.deformable_groups)
class _DCNv2Pooling(Function):
@staticmethod
def forward(ctx, input, rois, offset,
spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0):
ctx.spatial_scale = spatial_scale
ctx.no_trans = int(no_trans)
ctx.output_dim = output_dim
ctx.group_size = group_size
ctx.pooled_size = pooled_size
ctx.part_size = pooled_size if part_size is None else part_size
ctx.sample_per_part = sample_per_part
ctx.trans_std = trans_std
output, output_count = \
_backend.dcn_v2_psroi_pooling_forward(input, rois, offset,
ctx.no_trans, ctx.spatial_scale,
ctx.output_dim, ctx.group_size,
ctx.pooled_size, ctx.part_size,
ctx.sample_per_part, ctx.trans_std)
ctx.save_for_backward(input, rois, offset, output_count)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, rois, offset, output_count = ctx.saved_tensors
grad_input, grad_offset = \
_backend.dcn_v2_psroi_pooling_backward(grad_output,
input,
rois,
offset,
output_count,
ctx.no_trans,
ctx.spatial_scale,
ctx.output_dim,
ctx.group_size,
ctx.pooled_size,
ctx.part_size,
ctx.sample_per_part,
ctx.trans_std)
return grad_input, None, grad_offset, \
None, None, None, None, None, None, None, None
dcn_v2_pooling = _DCNv2Pooling.apply
class DCNv2Pooling(nn.Module):
def __init__(self,
spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0):
super(DCNv2Pooling, self).__init__()
self.spatial_scale = spatial_scale
self.pooled_size = pooled_size
self.output_dim = output_dim
self.no_trans = no_trans
self.group_size = group_size
self.part_size = pooled_size if part_size is None else part_size
self.sample_per_part = sample_per_part
self.trans_std = trans_std
def forward(self, input, rois, offset):
assert input.shape[1] == self.output_dim
if self.no_trans:
offset = input.new()
return dcn_v2_pooling(input, rois, offset,
self.spatial_scale,
self.pooled_size,
self.output_dim,
self.no_trans,
self.group_size,
self.part_size,
self.sample_per_part,
self.trans_std)
class DCNPooling(DCNv2Pooling):
def __init__(self,
spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0,
deform_fc_dim=1024):
super(DCNPooling, self).__init__(spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size,
part_size,
sample_per_part,
trans_std)
self.deform_fc_dim = deform_fc_dim
if not no_trans:
self.offset_mask_fc = nn.Sequential(
nn.Linear(self.pooled_size * self.pooled_size *
self.output_dim, self.deform_fc_dim),
nn.ReLU(inplace=True),
nn.Linear(self.deform_fc_dim, self.deform_fc_dim),
nn.ReLU(inplace=True),
nn.Linear(self.deform_fc_dim, self.pooled_size *
self.pooled_size * 3)
)
self.offset_mask_fc[4].weight.data.zero_()
self.offset_mask_fc[4].bias.data.zero_()
def forward(self, input, rois):
offset = input.new()
if not self.no_trans:
# do roi_align first
n = rois.shape[0]
roi = dcn_v2_pooling(input, rois, offset,
self.spatial_scale,
self.pooled_size,
self.output_dim,
True, # no trans
self.group_size,
self.part_size,
self.sample_per_part,
self.trans_std)
# build mask and offset
offset_mask = self.offset_mask_fc(roi.view(n, -1))
offset_mask = offset_mask.view(
n, 3, self.pooled_size, self.pooled_size)
o1, o2, mask = torch.chunk(offset_mask, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
# do pooling with offset and mask
return dcn_v2_pooling(input, rois, offset,
self.spatial_scale,
self.pooled_size,
self.output_dim,
self.no_trans,
self.group_size,
self.part_size,
self.sample_per_part,
self.trans_std) * mask
# only roi_align
return dcn_v2_pooling(input, rois, offset,
self.spatial_scale,
self.pooled_size,
self.output_dim,
self.no_trans,
self.group_size,
self.part_size,
self.sample_per_part,
self.trans_std)
import math
import logging
import numpy as np
from os.path import join
import torch
from torch import nn
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
from networks.backbones.dcn_v2 import DCN
BN_MOMENTUM = 0.1
logger = logging.getLogger(__name__)
def get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):
return join('http://dl.yf.io/dla/models', data, '{}-{}.pth'.format(name, hash))
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3,
stride=stride, padding=dilation,
bias=False, dilation=dilation)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=dilation,
bias=False, dilation=dilation)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 2
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(Bottleneck, self).__init__()
expansion = Bottleneck.expansion
bottle_planes = planes // expansion
self.conv1 = nn.Conv2d(inplanes, bottle_planes,
kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,
stride=stride, padding=dilation,
bias=False, dilation=dilation)
self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(bottle_planes, planes,
kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class BottleneckX(nn.Module):
expansion = 2
cardinality = 32
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(BottleneckX, self).__init__()
cardinality = BottleneckX.cardinality
# dim = int(math.floor(planes * (BottleneckV5.expansion / 64.0)))
# bottle_planes = dim * cardinality
bottle_planes = planes * cardinality // 32
self.conv1 = nn.Conv2d(inplanes, bottle_planes,
kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,
stride=stride, padding=dilation, bias=False,
dilation=dilation, groups=cardinality)
self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(bottle_planes, planes,
kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class Root(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, residual):
super(Root, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, 1,
stride=1, bias=False, padding=(kernel_size - 1) // 2)
self.bn = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.residual = residual
def forward(self, *x):
children = x
x = self.conv(torch.cat(x, 1))
x = self.bn(x)
if self.residual:
x += children[0]
x = self.relu(x)
return x
class Tree(nn.Module):
def __init__(self, levels, block, in_channels, out_channels, stride=1,
level_root=False, root_dim=0, root_kernel_size=1,
dilation=1, root_residual=False):
super(Tree, self).__init__()
if root_dim == 0:
root_dim = 2 * out_channels
if level_root:
root_dim += in_channels
if levels == 1:
self.tree1 = block(in_channels, out_channels, stride,
dilation=dilation)
self.tree2 = block(out_channels, out_channels, 1,
dilation=dilation)
else:
self.tree1 = Tree(levels - 1, block, in_channels, out_channels,
stride, root_dim=0,
root_kernel_size=root_kernel_size,
dilation=dilation, root_residual=root_residual)
self.tree2 = Tree(levels - 1, block, out_channels, out_channels,
root_dim=root_dim + out_channels,
root_kernel_size=root_kernel_size,
dilation=dilation, root_residual=root_residual)
if levels == 1:
self.root = Root(root_dim, out_channels, root_kernel_size,
root_residual)
self.level_root = level_root
self.root_dim = root_dim
self.downsample = None
self.project = None
self.levels = levels
if stride > 1:
self.downsample = nn.MaxPool2d(stride, stride=stride)
if in_channels != out_channels:
self.project = nn.Sequential(
nn.Conv2d(in_channels, out_channels,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
)
def forward(self, x, residual=None, children=None):
children = [] if children is None else children
bottom = self.downsample(x) if self.downsample else x
residual = self.project(bottom) if self.project else bottom
if self.level_root:
children.append(bottom)
x1 = self.tree1(x, residual)
if self.levels == 1:
x2 = self.tree2(x1)
x = self.root(x2, x1, *children)
else:
children.append(x1)
x = self.tree2(x1, children=children)
return x
class DLA(nn.Module):
def __init__(self, levels, channels, num_classes=1000,
block=BasicBlock, residual_root=False, linear_root=False):
super(DLA, self).__init__()
self.channels = channels
self.num_classes = num_classes
self.base_layer = nn.Sequential(
nn.Conv2d(3, channels[0], kernel_size=7, stride=1,
padding=3, bias=False),
nn.BatchNorm2d(channels[0], momentum=BN_MOMENTUM),
nn.ReLU(inplace=True))
self.level0 = self._make_conv_level(
channels[0], channels[0], levels[0])
self.level1 = self._make_conv_level(
channels[0], channels[1], levels[1], stride=2)
self.level2 = Tree(levels[2], block, channels[1], channels[2], 2,
level_root=False,
root_residual=residual_root)
self.level3 = Tree(levels[3], block, channels[2], channels[3], 2,
level_root=True, root_residual=residual_root)
self.level4 = Tree(levels[4], block, channels[3], channels[4], 2,
level_root=True, root_residual=residual_root)
self.level5 = Tree(levels[5], block, channels[4], channels[5], 2,
level_root=True, root_residual=residual_root)
# for m in self.modules():
# if isinstance(m, nn.Conv2d):
# n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
# m.weight.data.normal_(0, math.sqrt(2. / n))
# elif isinstance(m, nn.BatchNorm2d):
# m.weight.data.fill_(1)
# m.bias.data.zero_()
def _make_level(self, block, inplanes, planes, blocks, stride=1):
downsample = None
if stride != 1 or inplanes != planes:
downsample = nn.Sequential(
nn.MaxPool2d(stride, stride=stride),
nn.Conv2d(inplanes, planes,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
)
layers = []
layers.append(block(inplanes, planes, stride, downsample=downsample))
for i in range(1, blocks):
layers.append(block(inplanes, planes))
return nn.Sequential(*layers)
def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
modules = []
for i in range(convs):
modules.extend([
nn.Conv2d(inplanes, planes, kernel_size=3,
stride=stride if i == 0 else 1,
padding=dilation, bias=False, dilation=dilation),
nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)])
inplanes = planes
return nn.Sequential(*modules)
def forward(self, x):
y = []
x = self.base_layer(x)
for i in range(6):
x = getattr(self, 'level{}'.format(i))(x)
y.append(x)
return y
def load_pretrained_model(self, data='imagenet', name='dla34', hash='ba72cf86'):
# fc = self.fc
if name.endswith('.pth'):
model_weights = torch.load(data + name)
else:
model_url = get_model_url(data, name, hash)
model_weights = model_zoo.load_url(model_url)
num_classes = len(model_weights[list(model_weights.keys())[-1]])
self.fc = nn.Conv2d(
self.channels[-1], num_classes,
kernel_size=1, stride=1, padding=0, bias=True)
self.load_state_dict(model_weights)
# self.fc = fc
def dla34(pretrained=True, **kwargs): # DLA-34
model = DLA([1, 1, 1, 2, 2, 1],
[16, 32, 64, 128, 256, 512],
block=BasicBlock, **kwargs)
if pretrained:
model.load_pretrained_model(data='imagenet', name='dla34', hash='ba72cf86')
return model
class Identity(nn.Module):
def __init__(self):
super(Identity, self).__init__()
def forward(self, x):
return x
def fill_fc_weights(layers):
for m in layers.modules():
if isinstance(m, nn.Conv2d):
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
class DeformConv(nn.Module):
def __init__(self, chi, cho):
super(DeformConv, self).__init__()
self.actf = nn.Sequential(
nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
)
self.conv = DCN(chi, cho, kernel_size=(3, 3), stride=1, padding=1, dilation=1, deformable_groups=1)
def forward(self, x):
x = self.conv(x)
x = self.actf(x)
return x
class IDAUp(nn.Module):
def __init__(self, o, channels, up_f):
super(IDAUp, self).__init__()
for i in range(1, len(channels)):
c = channels[i]
f = int(up_f[i])
proj = DeformConv(c, o)
node = DeformConv(o, o)
up = nn.ConvTranspose2d(o, o, f * 2, stride=f,
padding=f // 2, output_padding=0,
groups=o, bias=False)
fill_up_weights(up)
setattr(self, 'proj_' + str(i), proj)
setattr(self, 'up_' + str(i), up)
setattr(self, 'node_' + str(i), node)
def forward(self, layers, startp, endp):
for i in range(startp + 1, endp):
upsample = getattr(self, 'up_' + str(i - startp))
project = getattr(self, 'proj_' + str(i - startp))
layers[i] = upsample(project(layers[i]))
node = getattr(self, 'node_' + str(i - startp))
layers[i] = node(layers[i] + layers[i - 1])
class DLAUp(nn.Module):
def __init__(self, startp, channels, scales, in_channels=None):
super(DLAUp, self).__init__()
self.startp = startp
if in_channels is None:
in_channels = channels
self.channels = channels
channels = list(channels)
scales = np.array(scales, dtype=int)
for i in range(len(channels) - 1):
j = -i - 2
setattr(self, 'ida_{}'.format(i),
IDAUp(channels[j], in_channels[j:],
scales[j:] // scales[j]))
scales[j + 1:] = scales[j]
in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]
def forward(self, layers):
out = [layers[-1]] # start with 32
for i in range(len(layers) - self.startp - 1):
ida = getattr(self, 'ida_{}'.format(i))
ida(layers, len(layers) - i - 2, len(layers))
out.insert(0, layers[-1])
return out
class Interpolate(nn.Module):
def __init__(self, scale, mode):
super(Interpolate, self).__init__()
self.scale = scale
self.mode = mode
def forward(self, x):
x = F.interpolate(x, scale_factor=self.scale, mode=self.mode, align_corners=False)
return x
class DLASeg(nn.Module):
def __init__(self, base_name, heads, pretrained, down_ratio, final_kernel,
last_level, head_conv, out_channel=0):
super(DLASeg, self).__init__()
assert down_ratio in [1, 2, 4, 8, 16]
self.first_level = int(np.log2(down_ratio))
self.last_level = last_level
self.base = globals()[base_name](pretrained=pretrained)
channels = self.base.channels
scales = [2 ** i for i in range(len(channels[self.first_level:]))]
self.dla_up = DLAUp(self.first_level, channels[self.first_level:], scales)
if out_channel == 0:
out_channel = channels[self.first_level]
self.ida_up = IDAUp(out_channel, channels[self.first_level:self.last_level],
[2 ** i for i in range(self.last_level - self.first_level)])
self.heads = heads
for head in self.heads:
classes = self.heads[head]
if head_conv > 0:
fc = nn.Sequential(
nn.Conv2d(channels[self.first_level], head_conv,
kernel_size=3, padding=1, bias=True),
nn.ReLU(inplace=True),
nn.Conv2d(head_conv, classes,
kernel_size=final_kernel, stride=1,
padding=final_kernel // 2, bias=True))
if 'hm' in head:
fc[-1].bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
else:
fc = nn.Conv2d(channels[self.first_level], classes,
kernel_size=final_kernel, stride=1,
padding=final_kernel // 2, bias=True)
if 'hm' in head:
fc.bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
self.__setattr__(head, fc)
def forward(self, x):
x = self.base(x)
x = self.dla_up(x)
y = []
for i in range(self.last_level - self.first_level):
y.append(x[i].clone())
self.ida_up(y, 0, len(y))
z = {}
for head in self.heads:
z[head] = self.__getattr__(head)(y[-1])
return z, y[-1]
"""
@Time : 2020/6/2 14:55
@Author : Qin Dian
@Manual : Deep Snake Network
"""
import torch.nn as nn
# from networks.backbones.dla import DLASeg
# from utils import data_utils, net_utils
import torch
# class Network(nn.Module):
# def __init__(self, num_layers, heads, head_conv=256, down_ratio=1):
# super(Network, self).__init__()
#
# self.dla = DLASeg('dla{}'.format(num_layers), heads,
# pretrained=True,
# down_ratio=down_ratio,
# final_kernel=1,
# last_level=5,
# head_conv=head_conv)
#
# @staticmethod
# def decode_detection(output, h, w):
# ct_hm = output['ct_hm']
# wh = output['hw']
# ct, detection = net_utils.extract_top_k_ctp_dt(torch.sigmoid(ct_hm), wh)
# detection[..., :4] = data_utils.clip_to_image(detection[..., :4], h, w)
# output.update({'ct': ct, 'detection': detection})
# return ct, detection
#
# @staticmethod
# def use_gt_detection(output, batch):
# _, _, height, width = output['ct_hm'].size()
# ct_01 = batch['ct_01'].byte()
#
# ct_ind = batch['ct_ind'][ct_01]
# xs, ys = ct_ind % width, ct_ind // width
# xs, ys = xs[:, None].float(), ys[:, None].float()
# ct = torch.cat([xs, ys], dim=1)
#
# wh = batch['hw'][ct_01]
# bboxes = torch.cat([xs - wh[..., 0:1] / 2,
# ys - wh[..., 1:2] / 2,
# xs + wh[..., 0:1] / 2,
# ys + wh[..., 1:2] / 2], dim=1)
# score = torch.ones([len(bboxes)]).to(bboxes)[:, None]
# ct_cls = batch['ct_cls'][ct_01].float()[:, None]
# detection = torch.cat([bboxes, score, ct_cls], dim=1)
#
# output['ct'] = ct[None]
# output['detection'] = detection[None]
#
# return output
#
# def forward(self, x, batch=None):
# # output 为目标检测特征
# output, cnn_feature = self.dla(x)
# with torch.no_grad():
# # 处理目标检测特征为中心点和轮廓
# self.decode_detection(output, cnn_feature.size(2), cnn_feature.size(3))
# return output
class conv_block(nn.Module):
"""
Convolution Block
"""
def __init__(self, in_ch, out_ch):
super(conv_block, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True),
nn.Conv2d(out_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True))
def forward(self, x):
x = self.conv(x)
return x
class up_conv(nn.Module):
"""
Up Convolution Block
"""
def __init__(self, in_ch, out_ch):
super(up_conv, self).__init__()
self.up = nn.Sequential(
nn.Upsample(scale_factor=2),
nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True)
)
def forward(self, x):
x = self.up(x)
return x
class U_Net(nn.Module):
"""
UNet - Basic Implementation
Paper : https://arxiv.org/abs/1505.04597
"""
def __init__(self, in_ch=3, out_ch=1):
super(U_Net, self).__init__()
n1 = 64
filters = [n1, n1 * 2, n1 * 4, n1 * 8, n1 * 16]
self.Maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.Maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.Maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
self.Maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
self.Conv1 = conv_block(in_ch, filters[0])
self.Conv2 = conv_block(filters[0], filters[1])
self.Conv3 = conv_block(filters[1], filters[2])
self.Conv4 = conv_block(filters[2], filters[3])
self.Conv5 = conv_block(filters[3], filters[4])
self.Up5 = up_conv(filters[4], filters[3])
self.Up_conv5 = conv_block(filters[4], filters[3])
self.Up4 = up_conv(filters[3], filters[2])
self.Up_conv4 = conv_block(filters[3], filters[2])
self.Up3 = up_conv(filters[2], filters[1])
self.Up_conv3 = conv_block(filters[2], filters[1])
self.Up2 = up_conv(filters[1], filters[0])
self.Up_conv2 = conv_block(filters[1], filters[0])
self.Conv = nn.Conv2d(filters[0], out_ch, kernel_size=1, stride=1, padding=0)
# self.active = torch.nn.Sigmoid()
def forward(self, x):
e1 = self.Conv1(x)
e2 = self.Maxpool1(e1)
e2 = self.Conv2(e2)
e3 = self.Maxpool2(e2)
e3 = self.Conv3(e3)
e4 = self.Maxpool3(e3)
e4 = self.Conv4(e4)
e5 = self.Maxpool4(e4)
e5 = self.Conv5(e5)
d5 = self.Up5(e5)
d5 = torch.cat((e4, d5), dim=1)
d5 = self.Up_conv5(d5)
d4 = self.Up4(d5)
d4 = torch.cat((e3, d4), dim=1)
d4 = self.Up_conv4(d4)
d3 = self.Up3(d4)
d3 = torch.cat((e2, d3), dim=1)
d3 = self.Up_conv3(d3)
d2 = self.Up2(d3)
d2 = torch.cat((e1, d2), dim=1)
d2 = self.Up_conv2(d2)
out = self.Conv(d2)
# d1 = self.active(out)
return out
if __name__ == '__main__':
net = Network(num_layers=34, heads={'ct_hm': 7, 'hw': 2})
net.eval()
net.cuda()
tmp_ct = torch.randn((10, 3, 512, 512)).cuda()
out = net(tmp_ct)
print(out['ct_hm'].size())
\ No newline at end of file
## MNIST
In this readme, give instructions on how to run your code.
In this case, we remove the datasets from the lightningModule as you
might want to use the same model with many datasets
#### CPU
```bash
python mnist_trainer.py
```
#### Multiple-GPUs
```bash
python mnist_trainer.py --gpus 4
```
or specific GPUs
```bash
python mnist_trainer.py --gpus '0,3'
```
#### On multiple nodes
```bash
python mnist_trainer.py --gpus 4 --nodes 4 --precision 16
```
"""
@Time : 2020/5/28 19:33
@Author : Qin Dian
@Manual :
"""
\ No newline at end of file
"""
This file defines the core research contribution
"""
import os
import torch
from torch.nn import functional as F
from argparse import ArgumentParser
import pytorch_lightning as pl
class CoolSystem(pl.LightningModule):
def __init__(self, hparams):
super(CoolSystem, self).__init__()
# not the best model...
self.hparams = hparams
self.l1 = torch.nn.Linear(28 * 28, 10)
def forward(self, x):
return torch.relu(self.l1(x.view(x.size(0), -1)))
def training_step(self, batch, batch_idx):
# REQUIRED
x, y = batch
y_hat = self.forward(x)
loss = F.cross_entropy(y_hat, y)
tensorboard_logs = {'train_loss': loss}
return {'loss': loss, 'log': tensorboard_logs}
def validation_step(self, batch, batch_idx):
# OPTIONAL
x, y = batch
y_hat = self.forward(x)
return {'val_loss': F.cross_entropy(y_hat, y)}
def validation_epoch_end(self, outputs):
# OPTIONAL
avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
tensorboard_logs = {'avg_val_loss': avg_loss}
return {'val_loss': avg_loss, 'log': tensorboard_logs}
def configure_optimizers(self):
# REQUIRED
# can return multiple optimizers and learning_rate schedulers
return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
@staticmethod
def add_model_specific_args(parent_parser):
"""
Specify the hyperparams for this LightningModule
"""
# MODEL specific
parser = ArgumentParser(parents=[parent_parser], add_help=False)
parser.add_argument('--learning_rate', default=0.02, type=float)
parser.add_argument('--batch_size', default=32, type=int)
# training specific (for this model)
parser.add_argument('--max_nb_epochs', default=2, type=int)
return parser
"""
This file runs the main training/val loop, etc... using Lightning Trainer
"""
import os
from pytorch_lightning import Trainer, seed_everything
from argparse import ArgumentParser
from production_demo.mnist import CoolSystem
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
# sets seeds for numpy, torch, etc...
# must do for DDP to work well
seed_everything(123)
def main(args):
# init module
model = CoolSystem(hparams=args)
train_loader = DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()), batch_size=args.batch_size)
val_loader = DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()), batch_size=args.batch_size)
# makes all flags available to trainer from cli
trainer = Trainer.from_argparse_args(args)
trainer.fit(model, train_loader, val_loader)
if __name__ == '__main__':
parser = ArgumentParser(add_help=False)
parser.add_argument('--gpus', default=2)
# add args from trainer
parser = Trainer.add_argparse_args(parser)
# give the module a chance to add own params
# good practice to define LightningModule speficic params in the module
parser = CoolSystem.add_model_specific_args(parser)
# parse params
args = parser.parse_args()
main(args)
## MNIST
In this readme, give instructions on how to run your code.
#### CPU
```bash
python mnist_trainer.py
```
#### Multiple-GPUs
```bash
python mnist_trainer.py --gpus 4
```
or specific GPUs
```bash
python mnist_trainer.py --gpus '0,3'
```
#### On multiple nodes
```bash
python mnist_trainer.py --gpus 4 --nodes 4 --precision 16
```
"""
@Time : 2020/6/23 12:53
@Author : Qin Dian
@Manual :
"""
from pytorch_lightning import Trainer, seed_everything
import os
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
from argparse import ArgumentParser
import pytorch_lightning as pl
class CoolSystem(pl.LightningModule):
def __init__(self, hparams):
super(CoolSystem, self).__init__()
# not the best model...
self.hparams = hparams
self.l1 = torch.nn.Linear(28 * 28, 10)
def forward(self, x):
return torch.relu(self.l1(x.view(x.size(0), -1)))
def training_step(self, batch, batch_idx):
# REQUIRED
x, y = batch
y_hat = self.forward(x)
loss = F.cross_entropy(y_hat, y)
tensorboard_logs = {'train_loss': loss}
return {'loss': loss, 'log': tensorboard_logs}
def validation_step(self, batch, batch_idx):
# OPTIONAL
x, y = batch
y_hat = self.forward(x)
return {'val_loss': F.cross_entropy(y_hat, y)}
def validation_epoch_end(self, outputs):
# OPTIONAL
avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
tensorboard_logs = {'avg_val_loss': avg_loss}
return {'val_loss': avg_loss, 'log': tensorboard_logs}
def test_step(self, batch, batch_idx):
# OPTIONAL
x, y = batch
y_hat = self.forward(x)
return {'test_loss': F.cross_entropy(y_hat, y)}
def test_epoch_end(self, outputs):
# OPTIONAL
avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
tensorboard_logs = {'test_val_loss': avg_loss}
return {'test_loss': avg_loss, 'log': tensorboard_logs}
def configure_optimizers(self):
# REQUIRED
# can return multiple optimizers and learning_rate schedulers
return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
def train_dataloader(self):
# REQUIRED
return DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()),
batch_size=self.hparams.batch_size)
def val_dataloader(self):
# OPTIONAL
return DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()),
batch_size=self.hparams.batch_size)
def test_dataloader(self):
# OPTIONAL
return DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()),
batch_size=self.hparams.batch_size)
@staticmethod
def add_model_specific_args(parent_parser):
"""
Specify the hyperparams for this LightningModule
"""
# MODEL specific
parser = ArgumentParser(parents=[parent_parser], add_help=False)
parser.add_argument('--learning_rate', default=0.02, type=float)
parser.add_argument('--batch_size', default=240, type=int)
# training specific (for this model)
parser.add_argument('--max_nb_epochs', default=100, type=int)
return parser
seed_everything(123)
def main(args):
# init module
model = CoolSystem(hparams=args)
# most basic trainer, uses good defaults
trainer = Trainer.from_argparse_args(args, gpus=2)
trainer.fit(model)
trainer.test()
if __name__ == '__main__':
parser = ArgumentParser(add_help=False)
# add args from trainer
parser = Trainer.add_argparse_args(parser)
# give the module a chance to add own params
# good practice to define LightningModule speficic params in the module
parser = CoolSystem.add_model_specific_args(parser)
# parse params
args = parser.parse_args()
main(args)
"""
This file defines the core research contribution
"""
import os
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
from argparse import ArgumentParser
import pytorch_lightning as pl
class CoolSystem(pl.LightningModule):
def __init__(self, hparams):
super(CoolSystem, self).__init__()
# not the best model...
self.hparams = hparams
self.l1 = torch.nn.Linear(28 * 28, 10)
def forward(self, x):
return torch.relu(self.l1(x.view(x.size(0), -1)))
def training_step(self, batch, batch_idx):
# REQUIRED
x, y = batch
y_hat = self.forward(x)
loss = F.cross_entropy(y_hat, y)
tensorboard_logs = {'train_loss': loss}
return {'loss': loss, 'log': tensorboard_logs}
def validation_step(self, batch, batch_idx):
# OPTIONAL
x, y = batch
y_hat = self.forward(x)
return {'val_loss': F.cross_entropy(y_hat, y)}
def validation_epoch_end(self, outputs):
# OPTIONAL
avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
tensorboard_logs = {'avg_val_loss': avg_loss}
return {'val_loss': avg_loss, 'log': tensorboard_logs}
def test_step(self, batch, batch_idx):
# OPTIONAL
x, y = batch
y_hat = self.forward(x)
return {'test_loss': F.cross_entropy(y_hat, y)}
def test_epoch_end(self, outputs):
# OPTIONAL
avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
tensorboard_logs = {'test_val_loss': avg_loss}
return {'test_loss': avg_loss, 'log': tensorboard_logs}
def configure_optimizers(self):
# REQUIRED
# can return multiple optimizers and learning_rate schedulers
return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
def train_dataloader(self):
# REQUIRED
return DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()), batch_size=self.hparams.batch_size)
def val_dataloader(self):
# OPTIONAL
return DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()), batch_size=self.hparams.batch_size)
def test_dataloader(self):
# OPTIONAL
return DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=transforms.ToTensor()), batch_size=self.hparams.batch_size)
@staticmethod
def add_model_specific_args(parent_parser):
"""
Specify the hyperparams for this LightningModule
"""
# MODEL specific
parser = ArgumentParser(parents=[parent_parser], add_help=False)
parser.add_argument('--learning_rate', default=0.02, type=float)
parser.add_argument('--batch_size', default=32, type=int)
# training specific (for this model)
parser.add_argument('--max_nb_epochs', default=2, type=int)
return parser
"""
This file runs the main training/val loop, etc... using Lightning Trainer
"""
from pytorch_lightning import Trainer, seed_everything
from argparse import ArgumentParser
from mnist import CoolSystem
# sets seeds for numpy, torch, etc...
# must do for DDP to work well
seed_everything(123)
def main(args):
# init module
model = CoolSystem(hparams=args)
# most basic trainer, uses good defaults
trainer = Trainer.from_argparse_args(args)
trainer.fit(model)
trainer.test()
if __name__ == '__main__':
parser = ArgumentParser(add_help=False)
parser.add_argument('--gpus', default=2)
# add args from trainer
parser = Trainer.add_argparse_args(parser)
# give the module a chance to add own params
# good practice to define LightningModule speficic params in the module
parser = CoolSystem.add_model_specific_args(parser)
# parse params
args = parser.parse_args()
main(args)
"""
@Time : 2019/12/10 14:29
@Author : Qin Dian
@Manual :
"""
import numpy as np
import os
import argparse
from glob import glob
from multiprocessing.dummy import Pool
from skimage import measure
parser = argparse.ArgumentParser(description='Slice Maker')
parser.add_argument('--in_path', type=str, default=r'D:\A_UNET\in')
parser.add_argument('--out_path', type=str, default=r'D:\A_UNET\out')
parser.add_argument('--generate_tumor_index', type=bool, default=True, help='是否生成肿瘤指示列表文件')
parser.add_argument('--series_index', type=int, default=0, help='哪一期的肿瘤指示列表')
parser.add_argument('--list_path', type=str, default=r'D:\A_UNET')
parser.add_argument('--process_num', type=int, default=20)
args = parser.parse_args()
def main():
if not os.path.exists(args.out_path):
os.mkdir(args.out_path)
paths = glob(os.path.join(args.in_path, "ct*.npz"))
pool = Pool(args.process_num)
result = pool.map(make_slice, paths)
if args.generate_tumor_index:
tumor_slices = []
for i in result:
tumor_slices += i
np.save(os.path.join(args.list_path, f'tumor_slices_s{args.series_index}.npy'), tumor_slices)
def make_slice(path):
result = []
ct = np.load(path, allow_pickle=True)
names = os.path.split(path)
mask = np.load(os.path.join(names[0], names[1][3:]), allow_pickle=True)
ct = ct.get('arrays', ct.get('arr_0'))
ct = ct.reshape(3, -1, 512, 512)
mask = mask.get('arrays', mask.get('arr_0'))
mask = mask.reshape(3, -1, 512, 512)
case = names[1][3:].split('.')[0]
for i in range(ct.shape[1]):
ct_slice = ct[:, i, ...]
mask_slice = mask[:, i, ...]
if np.any(mask_slice > 1):
# format of center: {0:[[x,y,h,w,c], ...], 1:[[x,y,h,w,c], ...], 2:[[x,y,h,w,c]}
# h:max_x-min_x, w: max_y-min_y,
# 即三期各期的中心点list
centers = {}
for series_i in range(3):
t_mask = mask_slice[series_i] >> 1
tumors = measure.label((t_mask > 0).astype(np.uint8), connectivity=1)
regions = measure.regionprops(tumors)
s_centers = []
for region in regions:
min_x, min_y, max_x, max_y = region.bbox
s_centers.append([(min_x+max_x)//2, (min_y+max_y)//2, max_x-min_x, max_y-min_y,
t_mask[(min_x+max_x)//2, (min_y+max_y)//2]])
centers[series_i] = s_centers
np.savez_compressed(f'{args.out_path}/{case}_{i}.npz', ct=ct_slice, mask=mask_slice, centers=centers)
# 只记录有tumor的slice
if args.generate_tumor_index and np.any(mask_slice[args.series_index] > 1):
print(f'{case}_{i}.npz')
result.append(f'{case}_{i}.npz')
else:
np.savez_compressed(f'{args.out_path}/{case}_{i}.npz', ct=ct_slice, mask=mask_slice)
print(f'complete making slices of {case}')
return result
if __name__ == '__main__':
main()
"""
@Time : 2020/6/1 16:30
@Author : Qin Dian
@Manual :
"""
\ No newline at end of file
"""
@Time : 2020/7/3 11:13
@Author : Qin Dian
@Manual :
"""
import torch
from torch import nn
import torch.nn.functional as f
class FocalLoss(nn.Module):
'''nn.Module warpper for focal loss'''
def __init__(self):
super(FocalLoss, self).__init__()
self.neg_loss = _neg_loss
def forward(self, out, target):
return self.neg_loss(out, target)
class IndL1Loss1d(nn.Module):
def __init__(self, task='l1'):
super(IndL1Loss1d, self).__init__()
if task == 'l1':
self.loss = f.l1_loss
elif task == 'smooth_l1':
self.loss = f.smooth_l1_loss
def forward(self, output, target):
output = output * target.bool()
loss = self.loss(output, target, reduction='sum')
loss = loss / (target.bool().sum() + 1e-4)
return loss
def _neg_loss(pred, gt):
"""
Modified focal loss. Exactly the same as CornerNet.
Runs faster and costs a little bit more memory
Arguments:
pred (batch x c x h x w)
gt (batch x c x h x w)
"""
pos_inds = gt.eq(1).float()
neg_inds = gt.lt(1).float()
neg_weights = torch.pow(1 - gt, 4)
loss = 0
pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds
neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds
num_pos = pos_inds.float().sum()
pos_loss = pos_loss.sum()
neg_loss = neg_loss.sum()
if num_pos == 0:
loss = loss - neg_loss
else:
loss = loss - (pos_loss + neg_loss) / num_pos
return loss
"""
@Time : 2020/6/17 14:08
@Author : Qin Dian
@Manual :
"""
"""
@Time : 2020/6/17 14:09
@Author : Qin Dian
@Manual :
"""
import torch
import numpy as np
def to_3channel_replica(img):
"""
convert 1 channel gray image to 3 channel by replicate itself 3 times
:param img:
:return:
"""
if len(img.shape) > 2:
ret = np.stack((img, img, img), axis=1)
else:
ret = np.stack((img, img, img), axis=0)
return ret
def cut_384(img):
"""
cut a 512*512 ct img to 385*384
:param img:
:return:
"""
if len(img.shape) > 2:
ret = img[:, 50:434, 60:444]
else:
ret = img[50:434, 60:444]
return ret
def clip_to_image(bbox, h, w):
"""
keep bbox in the image
"""
bbox[..., :2] = torch.clamp(bbox[..., :2], min=0)
bbox[..., 2] = torch.clamp(bbox[..., 2], max=w-1)
bbox[..., 3] = torch.clamp(bbox[..., 3], max=h-1)
return bbox
def window_standardize(img, lower_bound, upper_bound):
"""
clip the pixel values into [lower_bound, upper_bound], and standardize them
"""
img = np.clip(img, lower_bound, upper_bound)
# x=x*2-1: map x to [-1,1]
img = 2 * (img - lower_bound) / (upper_bound - lower_bound) - 1
return img
def draw_umich_gaussian(heat_map, center, h, w, k=1):
"""
draw gaussian radius for 'center' in 'heat_map'
:param heat_map: heat_map canvas
:param center: gaussian center
:param w: width
:param h: height
:param k:
:return:
"""
radius = _gaussian_radius((h, w))
radius = max(0, int(radius))
diameter = 2 * radius + 1
gaussian = _gaussian2d((diameter, diameter), sigma=diameter / 6)
x, y = int(center[0]), int(center[1])
height, width = heat_map.shape[:2]
left, right = min(x, radius), min(width - x, radius + 1)
top, bottom = min(y, radius), min(height - y, radius + 1)
masked_heat_map = heat_map[y - top:y + bottom, x - left:x + right]
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
if min(masked_gaussian.shape) > 0 and min(masked_heat_map.shape) > 0:
np.maximum(masked_heat_map, masked_gaussian * k, out=masked_heat_map)
return heat_map
def _gaussian2d(shape, sigma=(1, 1), rho=0):
if not isinstance(sigma, tuple):
sigma = (sigma, sigma)
sigma_x, sigma_y = sigma
m, n = [(ss - 1.) / 2. for ss in shape]
y, x = np.ogrid[-m:m+1, -n:n+1]
energy = (x * x) / (sigma_x * sigma_x) - 2 * rho * x * y / (sigma_x * sigma_y) + (y * y) / (sigma_y * sigma_y)
h = np.exp(-energy / (2 * (1 - rho * rho)))
h[h < np.finfo(h.dtype).eps * h.max()] = 0
return h
def _gaussian_radius(det_size, min_overlap=0.7):
height, width = det_size
a1 = 1
b1 = (height + width)
c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
r1 = (b1 + sq1) / 2
a2 = 4
b2 = 2 * (height + width)
c2 = (1 - min_overlap) * width * height
sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
r2 = (b2 + sq2) / 2
a3 = 4 * min_overlap
b3 = -2 * min_overlap * (height + width)
c3 = (min_overlap - 1) * width * height
if b3 ** 2 - 4 * a3 * c3 < 0:
r3 = min(r1, r2)
else:
sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
r3 = (b3 + sq3) / 2
return min(r1, r2, r3)
\ No newline at end of file
"""
@Time : 2020/7/9 16:09
@Author : Qin Dian
@Manual :
"""
import matplotlib.pyplot as plt
plt.subplot(221)
plt.imshow(ct[1,2].cpu(), cmap='gray')
plt.subplot(222)
plt.imshow(hm[1,2].cpu())
plt.subplot(223)
plt.imshow(wh[1,0].cpu())
plt.subplot(224)
plt.imshow(wh[1,1].cpu())
plt.show()
import matplotlib.pyplot as plt
plt.rcParams['figure.dpi']=300
plt.subplot(321)
plt.imshow(ct, cmap='gray')
plt.subplot(322)
plt.imshow(hm[0])
plt.subplot(323)
plt.imshow(hm[1])
plt.subplot(324)
plt.imshow(hm[2])
plt.subplot(325)
plt.imshow(mask)
plt.show()
import matplotlib.pyplot as plt
plt.subplot(221)
plt.imshow(x[74,0].cpu(), cmap='gray')
plt.subplot(222)
plt.imshow(out[2,0].cpu())
plt.subplot(223)
plt.imshow(out[2,0].cpu())
plt.subplot(224)
plt.imshow(out[2,0].cpu())
plt.show()
\ No newline at end of file
"""
@Time : 2020/6/17 14:19
@Author : Qin Dian
@Manual :
"""
import torch
import torch.nn.functional as F
def clipped_sigmoid(x):
y = torch.clamp(x.sigmoid(), min=1e-4, max=1 - 1e-4)
return y
def nms(heat, kernel=3):
"""
Make finding top k center points more convenient by using max pooling
"""
pad = (kernel - 1) // 2
hmax = F.max_pool2d(
heat, (kernel, kernel), stride=1, padding=pad)
keep = (hmax == heat).type(torch.FloatTensor).to(heat.device)
return heat * keep
def gather_feat(feat, ind, mask=None):
dim = feat.size(2)
ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
feat = feat.gather(1, ind)
if mask is not None:
mask = mask.unsqueeze(2).expand_as(feat)
feat = feat[mask]
feat = feat.view(-1, dim)
return feat
def topk(scores, k=40):
batch, cat, height, width = scores.size()
topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), k)
topk_inds = topk_inds % (height * width)
topk_ys = (topk_inds / width).int().float()
topk_xs = (topk_inds % width).int().float()
topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), k)
topk_clses = (topk_ind / k).int()
topk_inds = gather_feat(
topk_inds.view(batch, -1, 1), topk_ind).view(batch, k)
topk_ys = gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, k)
topk_xs = gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, k)
return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
def transpose_and_gather_feat(feat, ind):
feat = feat.permute(0, 2, 3, 1).contiguous()
feat = feat.view(feat.size(0), -1, feat.size(3))
feat = gather_feat(feat, ind)
return feat
def extract_top_k_ctp_dt(ct_hm, wh, reg=None, k=100):
batch, cat, height, width = ct_hm.size()
ct_hm = nms(ct_hm)
scores, inds, clses, ys, xs = topk(ct_hm, k=k)
wh = transpose_and_gather_feat(wh, inds)
wh = wh.view(batch, k, 2)
if reg is not None:
reg = transpose_and_gather_feat(reg, inds)
reg = reg.view(batch, k, 2)
xs = xs.view(batch, k, 1) + reg[:, :, 0:1]
ys = ys.view(batch, k, 1) + reg[:, :, 1:2]
else:
xs = xs.view(batch, k, 1)
ys = ys.view(batch, k, 1)
clses = clses.view(batch, k, 1).float()
scores = scores.view(batch, k, 1)
ct = torch.cat([xs, ys], dim=2)
bboxes = torch.cat([xs - wh[..., 0:1] / 2,
ys - wh[..., 1:2] / 2,
xs + wh[..., 0:1] / 2,
ys + wh[..., 1:2] / 2], dim=2)
detection = torch.cat([bboxes, scores, clses], dim=2)
return ct, detection
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment