Last active
August 14, 2024 13:07
-
-
Save ventusff/57f47588eaff5f8b77a382260e7da8a3 to your computer and use it in GitHub Desktop.
Derivations and code tests for second-order gradients of hashgrids of tiny-cuda-nn.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
from torch import autograd | |
from torch.optim import Adam | |
import torch.nn.functional as F | |
import tinycudann as tcnn | |
class SDF(nn.Module): | |
def __init__(self, hash=True, n_levels=12, log2_hashmap_size=15, base_resolution=16, smoothstep=False) -> None: | |
super().__init__() | |
self.encoder = tcnn.Encoding(3, { | |
"otype": "HashGrid" if hash else "DenseGrid", | |
"n_levels": n_levels, | |
"n_features_per_level": 2, | |
"log2_hashmap_size": log2_hashmap_size, | |
"base_resolution": base_resolution, | |
"per_level_scale": 1.5, | |
"interpolation": "Smoothstep" if smoothstep else "Linear" | |
}) | |
self.decoder = nn.Sequential( | |
nn.Linear(self.encoder.n_output_dims, 64), | |
nn.ReLU(True), | |
nn.Linear(64, 1) | |
) | |
def forward(self, x): | |
encoded = self.encoder(x).to(dtype=torch.float) | |
sdf = self.decoder(encoded) | |
return sdf | |
def forward_with_nablas(self, x): | |
with torch.enable_grad(): | |
x = x.requires_grad_(True) | |
sdf = self.forward(x) | |
nablas = autograd.grad( | |
sdf, | |
x, | |
torch.ones_like(sdf, device=x.device), | |
create_graph=True, | |
retain_graph=True, | |
only_inputs=True)[0] | |
return sdf, nablas | |
if __name__ == '__main__': | |
""" | |
NOTE: Jianfei: I provide three testing tools for backward_backward functionality. | |
Play around as you want :) | |
1. test_train(): train a toy SDF model with eikonal term. | |
2. grad_check(): check backward_backward numerical correctness via torch.autograd.gradcheck. | |
3. vis_graph(): visualize torch compute graph | |
""" | |
def test_(): | |
device = torch.device("cuda") | |
model = SDF(True, n_levels=1, log2_hashmap_size=15, base_resolution=4, smoothstep=False).to(device) | |
x = (torch.tensor([[0.3, 0.4, 0.5]], dtype=torch.float, device=device)).requires_grad_(True) | |
sdf, nablas = model.forward_with_nablas(x) | |
autograd.grad( | |
nablas, | |
x, | |
torch.ones_like(nablas, device=x.device), | |
create_graph=False, | |
retain_graph=False, | |
only_inputs=True)[0] | |
def test_train(): | |
""" | |
train a toy SDF model with eikonal term. | |
""" | |
from tqdm import tqdm | |
device = torch.device("cuda") | |
model = SDF(True, 4, base_resolution=12).to(device) | |
# model = SDF(False, 4, base_resolution=12).to(device) | |
optimizer = Adam(model.parameters(), 2.0e-3) | |
with tqdm(range(10000)) as pbar: | |
for _ in pbar: | |
x = torch.rand([51200,3], dtype=torch.float, device=device) | |
sdf, nablas = model.forward_with_nablas(x) | |
nablas_norm: torch.Tensor = nablas.norm(dim=-1) | |
# eikonal term | |
loss = F.mse_loss(nablas_norm, nablas_norm.new_ones(nablas_norm.shape), reduction='mean') | |
optimizer.zero_grad() | |
loss.backward() | |
optimizer.step() | |
pbar.set_postfix(loss=loss.item()) | |
def grad_check(): | |
""" | |
check backward_backward numerical correctness via torch.autograd.gradcheck | |
""" | |
import numpy as np | |
from types import SimpleNamespace | |
from tinycudann.modules import _module_function_backward, _module_function, _torch_precision, _C | |
dtype = _torch_precision(_C.preferred_precision()) | |
device = torch.device("cuda") | |
# NOTE: need a smaller net when gradcheck, otherwise will OOM | |
model = SDF(True, n_levels=4, log2_hashmap_size=19, base_resolution=4, smoothstep=True).to(device) | |
# model = SDF(True, n_levels=1, log2_hashmap_size=15, base_resolution=8, smoothstep=False).to(device) | |
def apply_on_x(x): | |
params = model.encoder.params.to(_torch_precision(model.encoder.native_tcnn_module.param_precision())).contiguous() | |
return _module_function.apply( | |
model.encoder.native_tcnn_module, x, params, 128.0 | |
) | |
# ✓ y w.r.t. x i.e. dy_dx (passed) | |
autograd.gradcheck( | |
apply_on_x, | |
# (torch.rand([1,3], dtype=torch.float, device=device)).requires_grad_(True), | |
(torch.tensor([[0.17, 0.55, 0.79]], dtype=torch.float, device=device)).requires_grad_(True), | |
eps=1.0e-3) | |
# ✓ dL_dx w.r.t. x i.e. ddLdx_dx (passed) | |
# ✓ dL_dx w.r.t. dL_dy i.e. ddLdx_ddLdy (passed) | |
autograd.gradgradcheck( | |
apply_on_x, | |
# (torch.rand([1,3], dtype=torch.float, device=device)).requires_grad_(True), | |
(torch.tensor([[0.17, 0.55, 0.79]], dtype=torch.float, device=device)).requires_grad_(True), | |
eps=1.0e-3, | |
nondet_tol=0.001 # due to non-determinism of atomicAdd | |
) | |
def backward_apply_on_x(x): | |
dL_dy = torch.ones([*x.shape[:-1], model.encoder.n_output_dims], dtype=dtype, device=device) | |
params = model.encoder.params.to(_torch_precision(model.encoder.native_tcnn_module.param_precision())).contiguous() | |
native_ctx, y = model.encoder.native_tcnn_module.fwd(x, params) | |
dummy_ctx_fwd = SimpleNamespace( | |
native_tcnn_module=model.encoder.native_tcnn_module, | |
loss_scale=model.encoder.loss_scale, | |
native_ctx=native_ctx) | |
return _module_function_backward.apply(dummy_ctx_fwd, dL_dy, x, params, y) | |
def backward_apply_on_params(params): | |
x = (torch.tensor([[0.17, 0.55, 0.79]], dtype=torch.float, device=device)).requires_grad_(True) | |
dL_dy = torch.ones([*x.shape[:-1], model.encoder.n_output_dims], dtype=dtype, device=device) | |
params = params.to(_torch_precision(model.encoder.native_tcnn_module.param_precision())).contiguous() | |
native_ctx, y = model.encoder.native_tcnn_module.fwd(x, params) | |
dummy_ctx_fwd = SimpleNamespace( | |
native_tcnn_module=model.encoder.native_tcnn_module, | |
loss_scale=model.encoder.loss_scale, | |
native_ctx=native_ctx) | |
return _module_function_backward.apply(dummy_ctx_fwd, dL_dy, x, params, y) | |
def backward_apply_on_dLdy(dL_dy): | |
x = (torch.tensor([[0.17, 0.55, 0.79]], dtype=torch.float, device=device)).requires_grad_(True) | |
# params = model.encoder.params.data.to(_torch_precision(model.encoder.native_tcnn_module.param_precision())).contiguous() | |
params = model.encoder.params.to(_torch_precision(model.encoder.native_tcnn_module.param_precision())).contiguous() | |
native_ctx, y = model.encoder.native_tcnn_module.fwd(x, params) | |
dummy_ctx_fwd = SimpleNamespace( | |
native_tcnn_module=model.encoder.native_tcnn_module, | |
loss_scale=model.encoder.loss_scale, | |
native_ctx=native_ctx) | |
return _module_function_backward.apply(dummy_ctx_fwd, dL_dy, x, params, y) | |
# NOTE: partial passed (Jacobian mismatch for output 1 with respect to input 0, which is ddLdgrid_dx) | |
# ✓ dL_dx w.r.t. x i.e. ddLdx_dx (passed) | |
# ✓ dL_dgrid w.r.t. x i.e. ddLdgrid_dx (currently do not support second order gradients from grid's gradient.) | |
# autograd.gradcheck( | |
# backward_apply_on_x, | |
# # (torch.rand([1,3], dtype=torch.float, device=device)).requires_grad_(True), | |
# (torch.tensor([[0.17, 0.55, 0.79]], dtype=torch.float, device=device)).requires_grad_(True), | |
# eps=1.0e-4 | |
# ) | |
# NOTE: passed | |
# ✓ dL_dx w.r.t. grid i.e. ddLdx_dgrid (passed) | |
# ✓ dL_dgrid w.r.t. grid i.e. ddLdgrid_dgrid (all zero) | |
autograd.gradcheck( | |
backward_apply_on_params, | |
model.encoder.params, | |
eps=1.0e-3 | |
) | |
# NOTE: partial passed (Jacobian mismatch for output 1 with respect to input 0, which is ddLdgrid_ddLdy) | |
# ✓ dL_dx w.r.t. dL_dy i.e. ddLdx_ddLdy (passed) | |
# x dL_dgrid w.r.t. dL_dy i.e. ddLdgrid_ddLdy (currently do not support second order gradients from grid's gradient.) | |
autograd.gradcheck( | |
backward_apply_on_dLdy, | |
torch.randn([1,model.encoder.n_output_dims], dtype=dtype, device=device).requires_grad_(True), | |
eps=1.0e-3, atol=0.01, rtol=0.001 | |
) | |
def vis_graph(): | |
""" | |
visualize torch compute graphs | |
""" | |
import torchviz | |
device = torch.device("cuda") | |
# NOTE: need a smaller net when gradcheck, otherwise will OOM | |
model = SDF(True, n_levels=4, log2_hashmap_size=15, base_resolution=4).to(device) | |
x = torch.tensor([[0.17, 0.55, 0.79]], dtype=torch.float, device=device) | |
sdf, nablas = model.forward_with_nablas(x) | |
torchviz.make_dot( | |
(nablas, sdf, x, model.encoder.params, *list(model.decoder.parameters())), | |
{'nablas': nablas, 'sdf': sdf, 'x': x, 'grid_param': model.encoder.params, | |
**{n:p for n, p in model.decoder.named_parameters(prefix='decoder')} | |
}).render("attached", format="png") | |
def check_throw(): | |
network = tcnn.Network(3, 1, network_config={ | |
"otype": "FullyFusedMLP", # Component type. | |
"activation": 'ReLU', # Activation of hidden layers. | |
"output_activation": 'None', # Activation of the output layer. | |
"n_neurons": 64, # Neurons in each hidden layer. # May only be 16, 32, 64, or 128. | |
"n_hidden_layers": 5, # Number of hidden layers. | |
"feedback_alignment": False # Use feedback alignment # [Lillicrap et al. 2016]. | |
}, seed=42) | |
# test_() | |
test_train() | |
# grad_check() | |
# vis_graph() | |
# check_throw() |
Author
ventusff
commented
Mar 25, 2022
Great work! I tried the code and it works perfectly! One notable thing is grad_check()
should be used with double
. But tcnn
may be compiled under float16
precision automatically when your GPU architecture is satisfied.
Unfortunately, grad_check()
almost failed under float16
precision using your code. For debug purpose, I manually set TCNN_HALF_PRECISION
to 0
in line 52, tiny-cuda-nn/include/common.h
. It causes the compilation for float32
precision, which is more tolerable.
I just leave this note for others, in case we are meeting a similar problem.
Cheers,
Yes, only compilation with float32
precision can pass gradcheck()
tests
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment