提交测试

2024-01-16 17:22:21 +08:00
parent 92862c0372
commit 73635fda01
654 changed files with 178015 additions and 2 deletions
--- a/examples/recipes/README.md
+++ b/examples/recipes/README.md
@@ -0,0 +1,18 @@
+# Quick Start: Kaolin Recipes
+<hr>
+
+For a quick start with Kaolin, see the example snippets included below. <br>
+In depth guides are available in the [tutorials](https://kaolin.readthedocs.io/en/latest/notes/tutorial_index.html) section.
+
+## Data
+
+### Converting Data
+<hr>
+
+* [Point cloud to SPC]("https://github.com/NVIDIAGameWorks/kaolin/blob/master/examples/recipes/dataload/spc_from_pointcloud.py")
+
+## 3D Formats
+### SPC / Octree based Ops
+<hr>
+
+* [SPC: Basic Usage]("https://github.com/NVIDIAGameWorks/kaolin/blob/master/examples/recipes/spc/spc_basics.py")
--- a/examples/recipes/camera/camera_coordinate_systems.py
+++ b/examples/recipes/camera/camera_coordinate_systems.py
@@ -0,0 +1,25 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to change the coordinate system of the camera.
+# ==============================================================================================================
+
+import math
+import torch
+import numpy as np
+from kaolin.render.camera import Camera, blender_coords
+
+device = 'cuda'
+
+camera = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    fov=30 * np.pi / 180,  # In radians
+    width=800, height=800,
+    device=device
+)
+
+print(camera.basis_change_matrix)
+camera.change_coordinate_system(blender_coords())
+print(camera.basis_change_matrix)
+camera.reset_coordinate_system()
+print(camera.basis_change_matrix)
--- a/examples/recipes/camera/camera_init_explicit.py
+++ b/examples/recipes/camera/camera_init_explicit.py
@@ -0,0 +1,88 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to initialize instances of kaolin's pinhole / ortho cameras
+# explicitly.
+# Also review `camera_init_simple` which greatly simplifies the construction methods shown here.
+# ==============================================================================================================
+
+import math
+import torch
+from kaolin.render.camera import Camera, CameraExtrinsics, PinholeIntrinsics, OrthographicIntrinsics
+
+#################################################################
+#   Camera 1: from eye, at, up and focal length (Perspective)   #
+#################################################################
+# Build the camera extrinsics object from lookat
+eye = torch.tensor([0.0, 0.0, -1.0], device='cuda') # Camera positioned here in world coords
+at = torch.tensor([0.0, 0.0, 0.0], device='cuda')   # Camera observing this world point
+up = torch.tensor([0.0, 1.0, 0.0], device='cuda')   # Camera up direction vector
+extrinsics = CameraExtrinsics.from_lookat(eye, at, up)
+
+# Build a pinhole camera's intrinsics. This time we use focal length (other useful args: focal_y, x0, y0)
+intrinsics = PinholeIntrinsics.from_focal(width=800, height=600, focal_x=1.0, device='cuda')
+
+# Combine extrinsics and intrinsics to obtain the full camera object
+camera_1 = Camera(extrinsics=extrinsics, intrinsics=intrinsics)
+print('--- Camera 1 ---')
+print(camera_1)
+
+########################################################################
+#   Camera 2: from camera position, orientation and fov (Perspective)  #
+########################################################################
+# Build the camera extrinsics object from lookat
+cam_pos = torch.tensor([0.0, 0.0, -1.0], device='cuda')
+cam_dir = torch.tensor([[1.0, 0.0, 0.0],
+                        [0.0, 1.0, 0.0],
+                        [0.0, 0.0, 1.0]], device='cuda')  # 3x3 orientation within the world
+extrinsics = CameraExtrinsics.from_camera_pose(cam_pos=cam_pos, cam_dir=cam_dir)
+
+# Use pinhole camera intrinsics, construct using field-of-view (other useful args: camera_fov_direction, x0, y0)
+intrinsics = PinholeIntrinsics.from_fov(width=800, height=600, fov=math.radians(45.0), device='cuda')
+camera_2 = Camera(extrinsics=extrinsics, intrinsics=intrinsics)
+
+print('--- Camera 2 ---')
+print(camera_2)
+
+####################################################################
+#   Camera 3: camera view matrix, (Orthographic)                   #
+####################################################################
+# Build the camera extrinsics object from lookat
+world2cam = torch.tensor([[1.0, 0.0, 0.0, 0.5],
+                          [0.0, 1.0, 0.0, 0.5],
+                          [0.0, 0.0, 1.0, 0.5],
+                          [0.0, 0.0, 0.0, 1.0]], device='cuda')  # 3x3 orientation within the world
+extrinsics = CameraExtrinsics.from_view_matrix(view_matrix=world2cam)
+
+# Use pinhole camera intrinsics, construct using field-of-view (other useful args: camera_fov_direction, x0, y0)
+intrinsics = OrthographicIntrinsics.from_frustum(width=800, height=600, near=-800, far=800,
+                                                 fov_distance=1.0, device='cuda')
+camera_3 = Camera(extrinsics=extrinsics, intrinsics=intrinsics)
+
+print('--- Camera 3 ---')
+print(camera_3)
+
+
+##########################################################
+#   Camera 4: Combining cameras                          #
+##########################################################
+# Must be of the same intrinsics type, and non params fields such as width, height, near, far
+# (currently we don't perform validation)
+camera_4 = Camera.cat((camera_1, camera_2))
+
+print('--- Camera 4 ---')
+print(camera_4)
+
+
+##########################################################
+#   Camera 5: constructing a batch of cameras together   #
+##########################################################
+
+# Extrinsics are created using batched tensors. The intrinsics are automatically broadcasted.
+camera_5 = Camera.from_args(
+    eye=torch.tensor([[4.0, 4.0, 4.0], [4.0, 4.0, 4.0]]),
+    at=torch.tensor([[0.0, 0.0, 0.0], [4.0, 4.0, 4.0]]),
+    up=torch.tensor([[0.0, 1.0, 0.0], [4.0, 4.0, 4.0]]),
+    width=800, height=600, focal_x=300.0
+)
+
+print('--- Camera 5 ---')
+print(camera_5)
--- a/examples/recipes/camera/camera_init_simple.py
+++ b/examples/recipes/camera/camera_init_simple.py
@@ -0,0 +1,65 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to initialize instances of kaolin's pinhole / ortho cameras.
+# ==============================================================================================================
+
+import math
+import torch
+import numpy as np
+from kaolin.render.camera import Camera
+
+device = 'cuda'
+
+perspective_camera_1 = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    fov=30 * np.pi / 180,  # In radians
+    x0=0.0, y0=0.0,
+    width=800, height=800,
+    near=1e-2, far=1e2,
+    dtype=torch.float64,
+    device=device
+)
+
+print('--- Perspective Camera 1 ---')
+print(perspective_camera_1)
+
+perspective_camera_2 = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    fov=30 * np.pi / 180,  # In radians
+    width=800, height=800,
+    device=device
+)
+
+print('--- Perspective Camera 2 ---')
+print(perspective_camera_2)
+
+ortho_camera_1 = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    width=800, height=800,
+    near=-800, far=800,
+    fov_distance=1.0,
+    dtype=torch.float64,
+    device=device
+)
+
+print('--- Orthographic Camera 1 ---')
+print(ortho_camera_1)
+
+
+ortho_camera_2 = Camera.from_args(
+    view_matrix=torch.tensor([[1.0, 0.0, 0.0, 0.5],
+                              [0.0, 1.0, 0.0, 0.5],
+                              [0.0, 0.0, 1.0, 0.5],
+                              [0.0, 0.0, 0.0, 1.0]]),
+    width=800, height=800,
+    dtype=torch.float64,
+    device=device
+)
+
+print('--- Orthographic Camera 2 ---')
+print(ortho_camera_2)
--- a/examples/recipes/camera/camera_movement.py
+++ b/examples/recipes/camera/camera_movement.py
@@ -0,0 +1,27 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to manipulate kaolin's camera.
+# ==============================================================================================================
+
+import torch
+from kaolin.render.camera import Camera
+
+
+camera = Camera.from_args(
+    eye=torch.tensor([0.0, 0.0, -1.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    width=800, height=600,
+    fov=1.0,
+    device='cuda'
+)
+
+# Extrinsic rigid transformations managed by CameraExtrinsics
+camera.move_forward(amount=10.0)               # Translate forward in world coordinates (this is wisp's mouse zoom)
+camera.move_right(amount=-5.0)                 # Translate left in world coordinates
+camera.move_up(amount=5.0)                     # Translate up in world coordinates
+camera.rotate(yaw=0.1, pitch=0.02, roll=1.0)   # Rotate the camera
+
+# Intrinsic lens transformations managed by CameraIntrinsics
+# Zoom in to decrease field of view - for Orthographic projection the internal implementation differs
+# as there is no acual fov or depth concept (hence we use a "made up" fov distance parameter, see the projection matrix)
+camera.zoom(amount=0.5)
--- a/examples/recipes/camera/camera_opengl_shaders.py
+++ b/examples/recipes/camera/camera_opengl_shaders.py
@@ -0,0 +1,57 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to use the camera for generating a view-projection matrix
+# as used in opengl shaders.
+# ==============================================================================================================
+
+import torch
+import numpy as np
+from kaolin.render.camera import Camera
+
+# !!! This example is not runnable -- it is minimal to contain integration between the opengl shader and !!!
+# !!! the camera matrix                                                                                  !!!
+try:
+    from glumpy import gloo
+except:
+    class DummyGloo(object):
+        def Program(self, vertex, fragment):
+            # see: https://glumpy.readthedocs.io/en/latest/api/gloo-shader.html#glumpy.gloo.Program
+            return dict([])
+    gloo = DummyGloo()
+
+
+device = 'cuda'
+
+camera = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    fov=30 * np.pi / 180,  # In radians
+    x0=0.0, y0=0.0,
+    width=800, height=800,
+    near=1e-2, far=1e2,
+    dtype=torch.float64,
+    device=device
+)
+
+
+vertex = """
+            uniform mat4   u_viewprojection;
+            attribute vec3 position;
+            attribute vec4 color;
+            varying vec4 v_color;
+            void main()
+            {
+                v_color = color;
+                gl_Position = u_viewprojection * vec4(position, 1.0f);
+            } """
+
+fragment = """
+            varying vec4 v_color;
+            void main()
+            {
+                gl_FragColor = v_color;
+            } """
+
+# Compile GL program
+gl_program = gloo.Program(vertex, fragment)
+gl_program["u_viewprojection"] = camera.view_projection_matrix()[0].cpu().numpy().T
--- a/examples/recipes/camera/camera_properties.py
+++ b/examples/recipes/camera/camera_properties.py
@@ -0,0 +1,47 @@
+# ==============================================================================================================
+# The following snippet demonstrates various camera properties
+# ==============================================================================================================
+
+import math
+import torch
+import numpy as np
+from kaolin.render.camera import Camera
+
+device = 'cuda'
+
+camera = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    fov=30 * np.pi / 180,  # In radians
+    width=800, height=800,
+    dtype=torch.float32,
+    device=device
+)
+
+print(camera.width)
+print(camera.height)
+print(camera.lens_type)
+
+print(camera.device)
+camera = camera.cpu()
+print(camera.device)
+
+# Create a batched camera and view single element
+camera = Camera.cat((camera, camera))
+print(camera)
+camera = camera[0]
+print(camera)
+
+print(camera.dtype)
+camera = camera.half()
+print(camera.dtype)
+camera = camera.double()
+print(camera.dtype)
+camera = camera.float()
+print(camera.dtype)
+
+print(camera.extrinsics.requires_grad)
+print(camera.intrinsics.requires_grad)
+
+print(camera.to('cuda', torch.float64))
--- a/examples/recipes/camera/camera_ray_tracing.py
+++ b/examples/recipes/camera/camera_ray_tracing.py
@@ -0,0 +1,71 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to use the camera for implementing a ray-generation function
+# for ray based applications.
+# ==============================================================================================================
+
+import torch
+import numpy as np
+from typing import Tuple
+from kaolin.render.camera import Camera, CameraFOV
+
+def generate_pixel_grid(res_x=None, res_y=None, device='cuda'):
+    h_coords = torch.arange(res_x, device=device)
+    w_coords = torch.arange(res_y, device=device)
+    pixel_y, pixel_x = torch.meshgrid(h_coords, w_coords)
+    pixel_x = pixel_x + 0.5
+    pixel_y = pixel_y + 0.5
+    return pixel_y, pixel_x
+
+
+def generate_perspective_rays(camera: Camera, pixel_grid: Tuple[torch.Tensor, torch.Tensor]):
+    # coords_grid should remain immutable (a new tensor is implicitly created here)
+    pixel_y, pixel_x = pixel_grid
+    pixel_x = pixel_x.to(camera.device, camera.dtype)
+    pixel_y = pixel_y.to(camera.device, camera.dtype)
+
+    # Account for principal point offset from canvas center
+    pixel_x = pixel_x - camera.x0
+    pixel_y = pixel_y + camera.y0
+
+    # pixel values are now in range [-1, 1], both tensors are of shape res_y x res_x
+    pixel_x = 2 * (pixel_x / camera.width) - 1.0
+    pixel_y = 2 * (pixel_y / camera.height) - 1.0
+
+    ray_dir = torch.stack((pixel_x * camera.tan_half_fov(CameraFOV.HORIZONTAL),
+                           -pixel_y * camera.tan_half_fov(CameraFOV.VERTICAL),
+                           -torch.ones_like(pixel_x)), dim=-1)
+
+    ray_dir = ray_dir.reshape(-1, 3)    # Flatten grid rays to 1D array
+    ray_orig = torch.zeros_like(ray_dir)
+
+    # Transform from camera to world coordinates
+    ray_orig, ray_dir = camera.extrinsics.inv_transform_rays(ray_orig, ray_dir)
+    ray_dir /= torch.linalg.norm(ray_dir, dim=-1, keepdim=True)
+    ray_orig, ray_dir = ray_orig[0], ray_dir[0]  # Assume a single camera
+
+    return ray_orig, ray_dir, camera.near, camera.far
+
+
+camera = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    fov=30 * np.pi / 180,  # In radians
+    x0=0.0, y0=0.0,
+    width=800, height=800,
+    near=1e-2, far=1e2,
+    dtype=torch.float64,
+    device='cuda'
+)
+
+pixel_grid = generate_pixel_grid(200, 200)
+ray_orig, ray_dir, near, far = generate_perspective_rays(camera, pixel_grid)
+
+print('Ray origins:')
+print(ray_orig)
+print('Ray directions:')
+print(ray_dir)
+print('Near clipping plane:')
+print(near)
+print('Far clipping plane:')
+print(far)
--- a/examples/recipes/camera/camera_transforms.py
+++ b/examples/recipes/camera/camera_transforms.py
@@ -0,0 +1,59 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to use the camera transform directly on vectors
+# ==============================================================================================================
+
+import math
+import torch
+import numpy as np
+from kaolin.render.camera import Camera
+
+device = 'cuda'
+
+camera = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    fov=30 * np.pi / 180,  # In radians
+    width=800, height=800,
+    dtype=torch.float32,
+    device=device
+)
+
+print('View projection matrix')
+print(camera.view_projection_matrix())
+
+print('View matrix: world2cam')
+print(camera.view_matrix())
+
+print('Inv View matrix: cam2world')
+print(camera.inv_view_matrix())
+
+print('Projection matrix')
+print(camera.projection_matrix())
+
+vectors = torch.randn(10, 3).to(camera.device, camera.dtype)   # Create a batch of points
+
+# For ortho and perspective: this is equivalent to multiplying camera.projection_matrix() @ vectors
+# and then dividing by the w coordinate (perspective division)
+print(camera.transform(vectors))
+
+# For ray tracing we have camera.inv_transform_rays which performs multiplication with inv_view_matrix()
+# (just for the extrinsics part)
+
+# Can also access properties directly:
+# --
+# View matrix components (camera space)
+print(camera.R)
+print(camera.t)
+
+# Camera axes and position in world coordinates
+print(camera.cam_pos())
+print(camera.cam_right())
+print(camera.cam_pos())
+print(camera.cam_forward())
+
+print(camera.focal_x)
+print(camera.focal_y)
+print(camera.x0)
+print(camera.y0)
+
--- a/examples/recipes/camera/cameras_differentiable.py
+++ b/examples/recipes/camera/cameras_differentiable.py
@@ -0,0 +1,65 @@
+# ====================================================================================================================
+# The following snippet demonstrates how cameras can be used for optimizing specific extrinsic / intrinsic parameters
+# ====================================================================================================================
+
+import torch
+import torch.optim as optim
+from kaolin.render.camera import Camera
+
+# Create simple perspective camera
+cam = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    width=800, height=600, focal_x=300.0
+)
+
+# When requires_grad is on, the camera will automatically switch to differentiation friendly backend
+# (implicitly calling cam.switch_backend('matrix_6dof_rotation') )
+cam.requires_grad_(True)
+
+# Constraint camera to optimize only fov and camera position (cannot rotate)
+ext_mask, int_mask = cam.gradient_mask('t', 'focal_x', 'focal_y')
+ext_params, int_params = cam.parameters()
+ext_params.register_hook(lambda grad: grad * ext_mask.float())
+grad_scale = 1e5    # Used to move the projection matrix elements faster
+int_params.register_hook(lambda grad: grad * int_mask.float() * grad_scale)
+
+# Make the camera a bit noisy
+# Currently can't copy the camera here after requires_grad is true because we're still missing a camera.detach() op
+target = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    width=800, height=600, focal_x=300.0
+)
+target.t = target.t + torch.randn_like(target.t)
+target.focal_x = target.focal_x + torch.randn_like(target.focal_x)
+target.focal_y = target.focal_y + torch.randn_like(target.focal_y)
+target_mat = target.view_projection_matrix()
+
+# Save for later so we have some comparison of what changed
+initial_view = cam.view_matrix().detach().clone()
+initial_proj = cam.projection_matrix().detach().clone()
+
+# Train a few steps
+optimizer = optim.SGD(cam.parameters(), lr=0.1, momentum=0.9)
+for idx in range(10):
+    view_proj = cam.view_projection_matrix()
+    optimizer.zero_grad()
+    loss = torch.nn.functional.mse_loss(target_mat, view_proj)
+    loss.backward()
+    optimizer.step()
+    print(f'Iteration {idx}:')
+    print(f'Loss: {loss.item()}')
+    print(f'Extrinsics: {cam.extrinsics.parameters()}')
+    print(f'Intrinsics: {cam.intrinsics.parameters()}')
+
+# Projection matrix grads are much smaller as they're scaled by the view-frustum dimensions..
+print(f'View matrix before: {initial_view}')
+print(f'View matrix after: {cam.view_matrix()}')
+print(f'Projection matrix before: {initial_proj}')
+print(f'Projection matrix after: {cam.projection_matrix()}')
+
+print('Did the camera change?')
+print(not torch.allclose(cam, target))
--- a/examples/recipes/dataload/spc_from_pointcloud.py
+++ b/examples/recipes/dataload/spc_from_pointcloud.py
@@ -0,0 +1,52 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to build kaolin's compressed octree,
+# "Structured Point Cloud (SPC)", from raw point cloud data.
+# ==============================================================================================================
+# See also:
+#
+#  - Tutorial: Understanding Structured Point Clouds (SPCs)
+#    https://github.com/NVIDIAGameWorks/kaolin/blob/master/examples/tutorial/understanding_spcs_tutorial.ipynb
+#
+#  - Documentation: Structured Point Clouds
+#    https://kaolin.readthedocs.io/en/latest/modules/kaolin.ops.spc.html?highlight=spc#kaolin-ops-spc
+# ==============================================================================================================
+
+import torch
+import kaolin
+
+# Create some point data with features
+# Point coordinates are expected to be normalized to the range [-1, 1].
+points = torch.tensor([
+    [-1.0, -1.0, -1.0],
+    [-0.9, -0.95, -1.0],
+    [1.0, 0.0, 0.0],
+    [0.0, -0.1, 0.3],
+    [1.0, 1.0, 1.0]
+], device='cuda')
+features = torch.tensor([
+    [0.1, 1.1, 2.1],
+    [0.2, 1.2, 2.2],
+    [0.3, 1.3, 2.3],
+    [0.4, 1.4, 2.4],
+    [0.5, 1.5, 2.5],
+], device='cuda')
+
+# Structured Point Cloud will be using 3 levels of detail
+level = 3
+
+# In kaolin, operations are batched by default
+# Here, in contrast, we use a single point cloud and therefore invoke an unbatched conversion function.
+# For more information about batched operations, see:
+# https://kaolin.readthedocs.io/en/latest/modules/kaolin.ops.batch.html#kaolin-ops-batch
+spc = kaolin.ops.conversions.pointcloud.unbatched_pointcloud_to_spc(pointcloud=points,
+                                                                    level=level,
+                                                                    features=features)
+
+# SPC is an object which keep tracks of the various octree component
+print(spc)
+print(f'SPC keeps track of the following cells in {level} levels of detail (parents + leaves):\n'
+      f' {spc.point_hierarchies}\n')
+
+# Note that the point cloud coordinates are quantized to integer coordinates.
+# During conversion, when points fall within the same cell, their features are averaged
+print(f'Features for leaf cells:\n {spc.features}')
--- a/examples/recipes/preprocess/fast_mesh_sampling.py
+++ b/examples/recipes/preprocess/fast_mesh_sampling.py
@@ -0,0 +1,140 @@
+# ==============================================================================================================
+# The following snippet shows how to use kaolin to preprocess a shapenet dataset
+# To quickly sample point clouds from the mesh at runtime
+# ==============================================================================================================
+# See also:
+#  - Documentation: ShapeNet dataset
+#    https://kaolin.readthedocs.io/en/latest/modules/kaolin.io.shapenet.html#kaolin.io.shapenet.ShapeNetV2
+#  - Documentation: CachedDataset
+#    https://kaolin.readthedocs.io/en/latest/modules/kaolin.io.dataset.html#kaolin.io.dataset.CachedDataset
+#  - Documentation: Mesh Ops:
+#    https://kaolin.readthedocs.io/en/latest/modules/kaolin.ops.mesh.html
+#  - Documentation: Obj loading:
+#    https://kaolin.readthedocs.io/en/latest/modules/kaolin.io.obj.html
+# ==============================================================================================================
+
+import argparse
+import os
+import torch
+
+import kaolin as kal
+
+parser = argparse.ArgumentParser(description='')
+parser.add_argument('--shapenet-dir', type=str, default=os.getenv('KAOLIN_TEST_SHAPENETV2_PATH'),
+                    help='Path to shapenet (v2)')
+parser.add_argument('--cache-dir', type=str, default='/tmp/dir',
+                    help='Path where output of the dataset is cached')
+parser.add_argument('--num-samples', type=int, default=10,
+                    help='Number of points to sample on the mesh')
+parser.add_argument('--cache-at-runtime', action='store_true',
+                    help='run the preprocessing lazily')
+parser.add_argument('--num-workers', type=int, default=0,
+                    help='Number of workers during preprocessing (not used with --cache-at-runtime)')
+
+args = parser.parse_args()
+
+
+def preprocessing_transform(inputs):
+    """This the transform used in shapenet dataset __getitem__.
+
+    Three tasks are done:
+    1) Get the areas of each faces, so it can be used to sample points
+    2) Get a proper list of RGB diffuse map
+    3) Get the material associated to each face
+    """
+    mesh = inputs['mesh']
+    vertices = mesh.vertices.unsqueeze(0)
+    faces = mesh.faces
+    
+    # Some materials don't contain an RGB texture map, so we are considering the single value
+    # to be a single pixel texture map (1, 3, 1, 1)
+    # we apply a modulo 1 on the UVs because ShapeNet follows GL_REPEAT behavior (see: https://open.gl/textures)
+    uvs = torch.nn.functional.pad(mesh.uvs.unsqueeze(0) % 1, (0, 0, 0, 1)) * 2. - 1.
+    uvs[:, :, 1] = -uvs[:, :, 1]
+    face_uvs_idx = mesh.face_uvs_idx
+    face_material_idx = mesh.material_assignments
+    materials = [m['map_Kd'].permute(2, 0, 1).unsqueeze(0).float() / 255. if 'map_Kd' in m else
+                 m['Kd'].reshape(1, 3, 1, 1)
+                 for m in mesh.materials]
+
+    mask = face_uvs_idx == -1
+    face_uvs_idx[mask] = 0
+    face_uvs = kal.ops.mesh.index_vertices_by_faces(
+        uvs, face_uvs_idx
+    )
+    face_uvs[:, mask] = 0.
+
+    outputs = {
+        'vertices': vertices,
+        'faces': faces,
+        'face_areas': kal.ops.mesh.face_areas(vertices, faces),
+        'face_uvs': face_uvs,
+        'materials': materials,
+        'face_material_idx': face_material_idx,
+        'name': inputs['name']
+    }
+
+    return outputs
+
+class SamplePointsTransform(object):
+    def __init__(self, num_samples):
+        self.num_samples = num_samples
+
+    def __call__(self, inputs):
+        coords, face_idx, feature_uvs = kal.ops.mesh.sample_points(
+            inputs['vertices'],
+            inputs['faces'],
+            num_samples=self.num_samples,
+            areas=inputs['face_areas'],
+            face_features=inputs['face_uvs']
+        )
+        coords = coords.squeeze(0)
+        face_idx = face_idx.squeeze(0)
+        feature_uvs = feature_uvs.squeeze(0)
+
+        # Interpolate the RGB values from the texture map
+        point_materials_idx = inputs['face_material_idx'][face_idx]
+        all_point_colors = torch.zeros((self.num_samples, 3))
+        for i, material in enumerate(inputs['materials']):
+            mask = point_materials_idx == i
+            point_color = torch.nn.functional.grid_sample(
+                material,
+                feature_uvs[mask].reshape(1, 1, -1, 2),
+                mode='bilinear',
+                align_corners=False,
+                padding_mode='border')
+            all_point_colors[mask] = point_color[0, :, 0, :].permute(1, 0)
+
+        outputs = {
+            'coords': coords,
+            'face_idx': face_idx,
+            'colors': all_point_colors,
+            'name': inputs['name']
+        }
+        return outputs
+
+# Make ShapeNet dataset with preprocessing transform
+ds = kal.io.shapenet.ShapeNetV2(root=args.shapenet_dir,
+                                categories=['dishwasher'],
+                                train=True,
+                                split=0.1,
+                                with_materials=True,
+                                output_dict=True,
+                                transform=preprocessing_transform)
+
+# Cache the result of the preprocessing transform
+# and apply the sampling at runtime
+pc_ds = kal.io.dataset.CachedDataset(ds,
+                                     cache_dir=args.cache_dir,
+                                     save_on_disk=True,
+                                     num_workers=args.num_workers,
+                                     transform=SamplePointsTransform(args.num_samples),
+                                     cache_at_runtime=args.cache_at_runtime,
+                                     force_overwrite=True)
+
+
+for data in pc_ds:
+    print("coords:\n", data['coords'])
+    print("face_idx:\n", data['face_idx'])
+    print("colors:\n", data['colors'])
+    print("name:\n", data['name'])
--- a/examples/recipes/preprocess/occupancy_sampling.py
+++ b/examples/recipes/preprocess/occupancy_sampling.py
@@ -0,0 +1,48 @@
+# ==============================================================================================================
+# The following snippet shows how to use kaolin to test sampled values of an occupancy function
+# against a watertight mesh.
+# ==============================================================================================================
+# See also:
+#  - Documentation: Triangular meshes
+#    https://kaolin.readthedocs.io/en/latest/modules/kaolin.ops.mesh.html#triangular-meshes
+# ==============================================================================================================
+
+import os
+import torch
+import kaolin
+
+FILE_DIR = os.path.abspath(os.path.dirname(os.path.abspath(__file__)))
+mesh_path = os.path.join(FILE_DIR, os.pardir, os.pardir, "samples", "sphere.obj")   # Path to some .obj file with textures
+num_samples = 100000                    # Number of sample points
+
+# 1. Load a watertight mesh from obj file
+mesh = kaolin.io.obj.import_mesh(mesh_path)
+print(f'Loaded mesh with {len(mesh.vertices)} vertices and {len(mesh.faces)} faces.')
+
+# 2. Preprocess mesh:
+# Move tensors to CUDA device
+vertices = mesh.vertices.cuda()
+faces = mesh.faces.cuda()
+
+# Kaolin assumes an exact batch format, we make sure to convert from: (V, 3) to (1, V, 3), where 1 is the batch size
+vertices = vertices.unsqueeze(0)
+
+# 3. Sample random points uniformly in space, from the bounding box of the mesh + 10% margin
+min_bound, _ = vertices.min(dim=1)
+max_bound, _ = vertices.max(dim=1)
+margin = (max_bound - min_bound) * 0.1
+max_bound += margin
+min_bound -= margin
+occupancy_coords = (max_bound - min_bound) * torch.rand(1, num_samples, 3, device='cuda') + min_bound
+
+# 4. Calculate occupancy value
+occupancy_value = kaolin.ops.mesh.check_sign(vertices, faces, occupancy_coords)
+
+# Unbatch to obtain a torch.Tensor of (V, 3) and (V, 1)
+occupancy_coords = occupancy_coords.squeeze(0)
+occupancy_value = occupancy_value.squeeze(0)
+
+percent_in_mesh = torch.count_nonzero(occupancy_value) / len(occupancy_value)
+print(f'Sampled a tensor of points uniformly in space '
+      f'with {occupancy_coords.shape[0]} points of {occupancy_coords.shape[1]}D coordinates.')
+print(f'{"{:.3f}".format(percent_in_mesh)}% of the sampled points are inside the mesh volume.')
--- a/examples/recipes/spc/spc_basics.py
+++ b/examples/recipes/spc/spc_basics.py
@@ -0,0 +1,54 @@
+# ==============================================================================================================
+# The following snippet demonstrates the basic usage of kaolin's compressed octree,
+# termed "Structured Point Cloud (SPC)".
+# Note this is a low level structure: practitioners are encouraged to visit the references below.
+# ==============================================================================================================
+# See also:
+#
+#  - Code: kaolin.ops.spc.SPC
+#    https://kaolin.readthedocs.io/en/latest/modules/kaolin.rep.html?highlight=SPC#kaolin.rep.Spc
+#
+#  - Tutorial: Understanding Structured Point Clouds (SPCs)
+#    https://github.com/NVIDIAGameWorks/kaolin/blob/master/examples/tutorial/understanding_spcs_tutorial.ipynb
+#
+#  - Documentation: Structured Point Clouds
+#    https://kaolin.readthedocs.io/en/latest/modules/kaolin.ops.spc.html?highlight=spc#kaolin-ops-spc
+# ==============================================================================================================
+
+import torch
+import kaolin
+
+# Construct SPC from some points data. Point coordinates are expected to be normalized to the range [-1, 1].
+points = torch.tensor([[-1.0, -1.0, -1.0], [-0.9, -0.95, -1.0], [1.0, 1.0, 1.0]], device='cuda')
+
+# In kaolin, operations are batched by default
+# Here, in contrast, we use a single point cloud and therefore invoke an unbatched conversion function.
+# The Structured Point Cloud will be using 3 levels of detail
+spc = kaolin.ops.conversions.pointcloud.unbatched_pointcloud_to_spc(pointcloud=points, level=3)
+
+# SPC is a batched object, and most of its fields are packed.
+# (see: https://kaolin.readthedocs.io/en/latest/modules/kaolin.ops.batch.html#kaolin-ops-batch )
+# spc.length defines the boundaries between different batched SPC instances the same object holds.
+# Here we keep track of a single entry batch, which has 8 octree non-leaf cells.
+print(f'spc.batch_size: {spc.batch_size}')
+print(f'spc.lengths (cells per batch entry): {spc.lengths}')
+
+# SPC is hierarchical and keeps information for every level of detail from 0 to 3.
+# spc.point_hierarchies keeps the sparse, zero indexed coordinates of each occupied cell, per level.
+print(f'SPC keeps track of total of {spc.point_hierarchies.shape[0]} parent + leaf cells:')
+
+# To separate the boundaries, the spc.pyramids field is used.
+# This field is not-packed, unlike the other SPC fields.
+pyramid_of_first_entry_in_batch = spc.pyramids[0]
+cells_per_level = pyramid_of_first_entry_in_batch[0]
+cumulative_cells_per_level = pyramid_of_first_entry_in_batch[1]
+for i, lvl_cells in enumerate(cells_per_level[:-1]):
+    print(f'LOD #{i} has {lvl_cells} cells.')
+
+# The spc.octrees field keeps track of the fundamental occupancy information of each cell in the octree.
+print('The occupancy of each octant parent cell, in Morton / Z-curve order is:')
+print(['{0:08b}'.format(octree_byte) for octree_byte in spc.octrees])
+
+# Since SPCs are low level objects, they require bookkeeping of multiple fields.
+# For ease of use, these fields are collected and tracked within a single class: kaolin.ops.spc.SPC
+# See references at the header for elaborate information on how to use this object.
--- a/examples/recipes/spc/spc_conv3d_example.py
+++ b/examples/recipes/spc/spc_conv3d_example.py
@@ -0,0 +1,113 @@
+# ==============================================================================================================
+# The following code demonstrates the usage of kaolin's "Structured Point Cloud (SPC)" 3d convolution
+# functionality. Note that this sample does NOT demonstrate how to use Kaolin's Pytorch 3d convolution layers. 
+# Rather, 3d convolutions are used to 'filter' color data useful for level-of-detail management during 
+# rendering. This can be thought of as the 3d analog of generating a 2d mipmap. 
+# 
+# Note this is a low level interface: practitioners are encouraged to visit the references below.
+# ==============================================================================================================
+# See also:
+#
+#  - Code: kaolin.ops.spc.SPC
+#    https://kaolin.readthedocs.io/en/latest/modules/kaolin.rep.html?highlight=SPC#kaolin.rep.Spc
+#
+#  - Tutorial: Understanding Structured Point Clouds (SPCs)
+#    https://github.com/NVIDIAGameWorks/kaolin/blob/master/examples/tutorial/understanding_spcs_tutorial.ipynb
+#
+#  - Documentation: Structured Point Clouds
+#    https://kaolin.readthedocs.io/en/latest/modules/kaolin.ops.spc.html?highlight=spc#kaolin-ops-spc
+# ==============================================================================================================
+
+import torch
+import kaolin
+
+# The following function applies a series of SPC convolutions to encode the entire hierarchy into a single tensor.
+# Each step applies a convolution on the "highest" level of the SPC with some averaging kernel.
+# Therefore, each step locally averages the "colored point hierarchy", where each "colored point"
+# corresponds to a point in the SPC point hierarchy.
+# For a description of inputs 'octree', 'point_hierachy', 'level', 'pyramids', and 'exsum', as well a
+# detailed description of the mathematics of SPC convolutions, see:
+# https://kaolin.readthedocs.io/en/latest/modules/kaolin.ops.spc.html?highlight=SPC#kaolin.ops.spc.Conv3d
+# The input 'color' is Pytorch tensor containing color features corresponding to some 'level' of the hierarchy.
+def encode(colors, octree, point_hierachy, pyramids, exsum, level):
+
+    # SPC convolutions are characterized by a set of 'kernel vectors' and corresponding 'weights'.
+
+    # kernel_vectors is the "kernel support" -
+    # a listing of 3D coordinates where the weights of the convolution are non-null,
+    # in this case a it's a simple dense 2x2x2 grid.
+    kernel_vectors = torch.tensor([[0,0,0],[0,0,1],[0,1,0],[0,1,1],
+                                   [1,0,0],[1,0,1],[1,1,0],[1,1,1]], 
+                                   dtype=torch.short, device='cuda')
+
+    # The weights specify how the input colors 'under' the kernel are mapped to an output color, 
+    # in this case a simple average.
+    weights = torch.diag(torch.tensor([0.125, 0.125, 0.125, 0.125],
+                                      dtype=torch.float32, device='cuda'))  # Tensor of (4, 4)
+    weights = weights.repeat(8,1,1).contiguous()  # Tensor of (8, 4, 4)
+
+    # Storage for the output color hierarchy is allocated. This includes points at the bottom of the hierarchy, 
+    # as well as intermediate SPC levels (which may store different features)
+    color_hierarchy = torch.empty((pyramids[0,1,level+1],4), dtype=torch.float32, device='cuda')
+    # Copy the input colors into the highest level of color_hierarchy. pyramids is used here to select all leaf 
+    # points at the bottom of the hierarchy and set them to some pre-sampled random color. Points at intermediate 
+    # levels are left empty.
+    color_hierarchy[pyramids[0,1,level]:pyramids[0,1,level+1]] = colors[:]
+
+    # Performs the 3d convolutions in a bottom up fashion to 'filter' colors from the previous level
+    for l in range(level,0,-1):
+
+        # Apply the 3d convolution. Note that jump=1 means the inputs and outputs differ by 1 level
+        # This is analogous to to a stride=2 in grid based convolutions
+        colors, ll = kaolin.ops.spc.conv3d(octree, 
+                                           point_hierachy, 
+                                           l, 
+                                           pyramids, 
+                                           exsum, 
+                                           colors, 
+                                           weights, 
+                                           kernel_vectors, 
+                                           jump=1)
+        # Copy the output colors into the color hierarchy
+        color_hierarchy[pyramids[0,1,ll]:pyramids[0,1,l]] = colors[:]
+        print(f"At level {l}, output feature shape is:\n{colors.shape}")
+
+    # Normalize the colors. 
+    color_hierarchy /= color_hierarchy[:,3:]
+    # Normalization is needed here due to the sparse nature of SPCs. When a point under a kernel is not 
+    # present in the point hierarchy, the corresponding data is treated as zeros. Normalization is equivalent 
+    # to having the filter weights sum to one. This may not always be desirable, e.g. alpha blending.
+
+    return color_hierarchy
+
+
+# Highest level of SPC
+level = 3
+
+# Construct a fully occupied Structured Point Cloud with N levels of detail
+# See https://kaolin.readthedocs.io/en/latest/modules/kaolin.rep.html?highlight=SPC#kaolin.rep.Spc
+spc = kaolin.rep.Spc.make_dense(level, device='cuda')
+
+# In kaolin, operations are batched by default, the spc object above contains a single item batch, hence [0]
+num_points_last_lod = spc.num_points(level)[0]
+
+# Create tensor of random colors for all points in the highest level of detail
+colors = torch.rand((num_points_last_lod, 4), dtype=torch.float32, device='cuda')
+# Set 4th color channel to one for subsequent color normalization
+colors[:,3] = 1
+
+print(f'Input SPC features: {colors.shape}')
+
+# Encode color hierarchy by invoking a series of convolutions, until we end up with a single tensor.
+color_hierarchy = encode(colors=colors,
+                         octree=spc.octrees,
+                         point_hierachy=spc.point_hierarchies,
+                         pyramids=spc.pyramids,
+                         exsum=spc.exsum,
+                         level=level)
+
+# Print root node color
+print(f'Final encoded value (average of averages):')
+print(color_hierarchy[0])
+# This will be the average of averages, over the entire spc hierarchy. Since the initial random colors
+# came from a uniform distribution, this should approach [0.5, 0.5, 0.5, 1.0] as 'level' increases
--- a/examples/recipes/spc/spc_dual_octree.py
+++ b/examples/recipes/spc/spc_dual_octree.py
@@ -0,0 +1,73 @@
+# ==============================================================================================================
+# The following snippet demonstrates the basic usage of kaolin's dual octree, an octree which keeps features
+# at the 8 corners of each cell (the primary octree keeps a single feature at each cell center).
+# The implementation is realized through kaolin's "Structured Point Cloud (SPC)".
+# Note this is a low level structure: practitioners are encouraged to visit the references below.
+# ==============================================================================================================
+# See also:
+#
+#  - Code: kaolin.ops.spc.SPC
+#    https://kaolin.readthedocs.io/en/latest/modules/kaolin.rep.html?highlight=SPC#kaolin.rep.Spc
+#
+#  - Tutorial: Understanding Structured Point Clouds (SPCs)
+#    https://github.com/NVIDIAGameWorks/kaolin/blob/master/examples/tutorial/understanding_spcs_tutorial.ipynb
+#
+#  - Documentation: Structured Point Clouds
+#    https://kaolin.readthedocs.io/en/latest/modules/kaolin.ops.spc.html?highlight=spc#kaolin-ops-spc
+# ==============================================================================================================
+
+import torch
+import kaolin
+
+# Construct SPC from some points data. Point coordinates are expected to be normalized to the range [-1, 1].
+# To keep the example readable, by default we set the SPC level to 1: root + 8 cells
+# (note that with a single LOD, only 2 cells should be occupied due to quantization)
+level = 1
+points = torch.tensor([[-1.0, -1.0, -1.0], [-0.9, -0.95, -1.0], [1.0, 1.0, 1.0]], device='cuda')
+spc = kaolin.ops.conversions.pointcloud.unbatched_pointcloud_to_spc(pointcloud=points, level=level)
+
+# Construct the dual octree with an unbatched operation, each cell is now converted to 8 corners
+# More info about batched / packed tensors at:
+# https://kaolin.readthedocs.io/en/latest/modules/kaolin.ops.batch.html#kaolin-ops-batch
+pyramid = spc.pyramids[0]  # The pyramids field is batched, we select the singleton entry, #0
+point_hierarchy = spc.point_hierarchies   # point_hierarchies is a packed tensor, so no need to unbatch
+point_hierarchy_dual, pyramid_dual = kaolin.ops.spc.unbatched_make_dual(point_hierarchy=point_hierarchy,
+                                                                        pyramid=pyramid)
+
+# Let's compare the primary and dual octrees.
+# The function 'unbatched_get_level_points' yields a tensor which lists all points / sparse cell coordinates occupied
+# at a certain level.
+#          [Primary octree]                        [Dual octree]
+#          . . . . . . . .                        X . . .X. . . X
+#          | .   X  .  X  | .                     | .      .     | .
+#          |   . . . . . . . .           ===>     |   X . . X . . . X
+#          |   | .   X  . | X   .                 X   | .      . |     .
+#          |   |   . . . . . . . .                |   |   X . . .X. . . X
+#          |   |    |     |       |               |   X    |     |       |
+#           . .|. . | . . .       |      ===>      X .|. . X . . X       |
+#             .| X  |.  X   .     |                  .|    |.      .     X
+#               . . | . . . . .   |                    X . | . X . . X   |
+#                 . | X  .  X   . |                      . |    .      . |
+#                   . . . . . . . .                        X . . X . . . X
+#
+primary_lod0 = kaolin.ops.spc.unbatched_get_level_points(point_hierarchy, pyramid, level=0)
+primary_lod1 = kaolin.ops.spc.unbatched_get_level_points(point_hierarchy, pyramid, level=1)
+dual_lod0 = kaolin.ops.spc.unbatched_get_level_points(point_hierarchy_dual, pyramid_dual, level=0)
+dual_lod1 = kaolin.ops.spc.unbatched_get_level_points(point_hierarchy_dual, pyramid_dual, level=1)
+print(f'Primary octree: Level 0 (root cells): \n{primary_lod0}')
+print(f'Dual octree: Level 0 (root corners): \n{dual_lod0}')
+print(f'Primary octree: Level 1 (cells): \n{primary_lod1}')
+print(f'Dual octree: Level 1 (corners): \n{dual_lod1}')
+
+# kaolin allows for interchangeable usage of the primary and dual octrees.
+# First we have to create a mapping between them:
+trinkets, _ = kaolin.ops.spc.unbatched_make_trinkets(point_hierarchy, pyramid, point_hierarchy_dual, pyramid_dual)
+
+# trinkets are indirection pointers (in practice, indices) from the nodes of the primary octree
+# to the nodes of the dual octree. The nodes of the dual octree represent the corners of the voxels
+# defined by the primary octree.
+print(f'point_hierarchy is of shape {point_hierarchy.shape}')
+print(f'point_hierarchy_dual is of shape {point_hierarchy_dual.shape}')
+print(f'trinkets is of shape {trinkets.shape}')
+print(f'Trinket indices are multilevel: {trinkets}')
+# See also spc_trilinear_interp.py for a practical application which uses the dual octree & trinkets
--- a/examples/recipes/spc/spc_trilinear_interp.py
+++ b/examples/recipes/spc/spc_trilinear_interp.py
@@ -0,0 +1,69 @@
+# ==============================================================================================================
+# The following snippet demonstrates the basic usage of kaolin's dual octree, an octree which keeps features
+# at the 8 corners of each cell (the primary octree keeps a single feature at each cell center).
+# In this example we sample an interpolated value according to the 8 corners of a cell.
+# The implementation is realized through kaolin's "Structured Point Cloud (SPC)".
+# Note this is a low level structure: practitioners are encouraged to visit the references below.
+# ==============================================================================================================
+# See also:
+#
+#  - Code: kaolin.ops.spc.SPC
+#    https://kaolin.readthedocs.io/en/latest/modules/kaolin.rep.html?highlight=SPC#kaolin.rep.Spc
+#
+#  - Tutorial: Understanding Structured Point Clouds (SPCs)
+#    https://github.com/NVIDIAGameWorks/kaolin/blob/master/examples/tutorial/understanding_spcs_tutorial.ipynb
+#
+#  - Documentation: Structured Point Clouds
+#    https://kaolin.readthedocs.io/en/latest/modules/kaolin.ops.spc.html?highlight=spc#kaolin-ops-spc
+# ==============================================================================================================
+
+import torch
+import kaolin
+
+# Construct SPC from some points data. Point coordinates are expected to be normalized to the range [-1, 1].
+# To keep the example readable, by default we set the SPC level to 1: root + 8 cells
+# (note that with a single LOD, only 2 cells should be occupied due to quantization)
+level = 1
+points = torch.tensor([[-1.0, -1.0, -1.0], [-0.9, -0.95, -1.0], [1.0, 1.0, 1.0]], device='cuda')
+spc = kaolin.ops.conversions.pointcloud.unbatched_pointcloud_to_spc(pointcloud=points, level=level)
+
+# Construct the dual octree with an unbatched operation, each cell is now converted to 8 corners
+# More info about batched / packed tensors at:
+# https://kaolin.readthedocs.io/en/latest/modules/kaolin.ops.batch.html#kaolin-ops-batch
+pyramid = spc.pyramids[0]  # The pyramids field is batched, we select the singleton entry, #0
+point_hierarchy = spc.point_hierarchies   # point_hierarchies is a packed tensor, so no need to unbatch
+point_hierarchy_dual, pyramid_dual = kaolin.ops.spc.unbatched_make_dual(point_hierarchy=point_hierarchy,
+                                                                        pyramid=pyramid)
+# kaolin allows for interchangeable usage of the primary and dual octrees via the "trinkets" mapping
+# trinkets are indirection pointers (in practice, indices) from the nodes of the primary octree
+# to the nodes of the dual octree. The nodes of the dual octree represent the corners of the voxels
+# defined by the primary octree.
+trinkets, _ = kaolin.ops.spc.unbatched_make_trinkets(point_hierarchy, pyramid, point_hierarchy_dual, pyramid_dual)
+
+# We'll now apply the dual octree and trinkets to perform trilinaer interpolation.
+# First we'll generate some features for the corners.
+# The first dimension of pyramid / pyramid_dual specifies how many unique points exist per level.
+# For the pyramid_dual, this means how many "unique corners" are in place (as neighboring cells may share corners!)
+num_of_corners_at_last_lod = pyramid_dual[0, level]
+feature_dims = 32
+feats = torch.rand([num_of_corners_at_last_lod, feature_dims], device='cuda')
+
+# Create some query coordinate with normalized values in the range [-1, 1], here we pick (0.5, 0.5, 0.5).
+# We'll also modify the dimensions of the query tensor to match the interpolation function api:
+# batch dimension refers to the unique number of spc cells we're querying.
+# samples_count refers to the number of interpolations we perform per cell.
+query_coord = points.new_tensor((0.5, 0.5, 0.5)).unsqueeze(0)  # Tensor of (batch, 3), in this case batch=1
+sampled_query_coords = query_coord.unsqueeze(1)  # Tensor of (batch, samples_count, 3), in this case samples_count=1
+
+# unbatched_query converts from normalized coordinates to the index of the cell containing this point.
+# The query_index can be used to pick the point from point_hierarchy
+query_index = kaolin.ops.spc.unbatched_query(spc.octrees, spc.exsum, query_coord, level, with_parents=False)
+
+# The unbatched_interpolate_trilinear function uses the query coordinates to perform trilinear interpolation.
+# Here, unbatched specifies this function supports only a single SPC at a time.
+# Per single SPC, we may interpolate a batch of coordinates and samples
+interpolated = kaolin.ops.spc.unbatched_interpolate_trilinear(coords=sampled_query_coords,
+                                                              pidx=query_index.int(),
+                                                              point_hierarchy=point_hierarchy,
+                                                              trinkets=trinkets, feats=feats, level=level)
+print(f'Interpolated a tensor of shape {interpolated.shape} with values: {interpolated}')
--- a/examples/samples/128_tets_0.npz
+++ b/examples/samples/128_tets_0.npz
--- a/examples/samples/128_tets_1.npz
+++ b/examples/samples/128_tets_1.npz
--- a/examples/samples/128_tets_2.npz
+++ b/examples/samples/128_tets_2.npz
--- a/examples/samples/128_tets_3.npz
+++ b/examples/samples/128_tets_3.npz
--- a/examples/samples/128_verts.npz
+++ b/examples/samples/128_verts.npz
--- a/examples/samples/bbox.mtl
+++ b/examples/samples/bbox.mtl
@@ -0,0 +1,12 @@
+# Blender MTL File: 'None'
+# Material Count: 1
+
+newmtl Material
+Ns 323.999994
+Ka 1.000000 1.000000 1.000000
+Kd 0.800000 0.800000 0.800000
+Ks 0.500000 0.500000 0.500000
+Ke 0.000000 0.000000 0.000000
+Ni 1.450000
+d 1.000000
+illum 2
--- a/examples/samples/bbox.obj
+++ b/examples/samples/bbox.obj
@@ -0,0 +1,46 @@
+# Blender v3.0.0 OBJ File: ''
+# www.blender.org
+mtllib bbox.mtl
+o Cube
+v 1.000000 1.000000 -1.000000
+v 1.000000 -1.000000 -1.000000
+v 1.000000 1.000000 1.000000
+v 1.000000 -1.000000 1.000000
+v -1.000000 1.000000 -1.000000
+v -1.000000 -1.000000 -1.000000
+v -1.000000 1.000000 1.000000
+v -1.000000 -1.000000 1.000000
+vt 0.875000 0.500000
+vt 0.625000 0.750000
+vt 0.625000 0.500000
+vt 0.375000 1.000000
+vt 0.375000 0.750000
+vt 0.625000 0.000000
+vt 0.375000 0.250000
+vt 0.375000 0.000000
+vt 0.375000 0.500000
+vt 0.125000 0.750000
+vt 0.125000 0.500000
+vt 0.625000 0.250000
+vt 0.875000 0.750000
+vt 0.625000 1.000000
+vn 0.0000 1.0000 0.0000
+vn 0.0000 0.0000 1.0000
+vn -1.0000 0.0000 0.0000
+vn 0.0000 -1.0000 0.0000
+vn 1.0000 0.0000 0.0000
+vn 0.0000 0.0000 -1.0000
+usemtl Material
+s off
+f 5/1/1 3/2/1 1/3/1
+f 3/2/2 8/4/2 4/5/2
+f 7/6/3 6/7/3 8/8/3
+f 2/9/4 8/10/4 6/11/4
+f 1/3/5 4/5/5 2/9/5
+f 5/12/6 2/9/6 6/7/6
+f 5/1/1 7/13/1 3/2/1
+f 3/2/2 7/14/2 8/4/2
+f 7/6/3 5/12/3 6/7/3
+f 2/9/4 4/5/4 8/10/4
+f 1/3/5 3/2/5 4/5/5
+f 5/12/6 1/3/6 2/9/6
--- a/examples/samples/bear_pointcloud.usd
+++ b/examples/samples/bear_pointcloud.usd
--- a/examples/samples/colored_sphere.obj
+++ b/examples/samples/colored_sphere.obj
--- a/examples/samples/colored_sphere.obj.mtl
+++ b/examples/samples/colored_sphere.obj.mtl
@@ -0,0 +1,14 @@
+#
+# Wavefront material file
+# Converted by Meshlab Group
+#
+
+newmtl material_0
+Ka 0.200000 0.200000 0.200000
+Kd 0.752941 0.752941 0.752941
+Ks 1.000000 1.000000 1.000000
+Tr 1.000000
+illum 2
+Ns 0.000000
+map_Kd sphere_mtl.jpg
+
--- a/examples/samples/dash3d_mesh.png
+++ b/examples/samples/dash3d_mesh.png
--- a/examples/samples/dash3d_pcd.png
+++ b/examples/samples/dash3d_pcd.png
--- a/examples/samples/octants.png
+++ b/examples/samples/octants.png
--- a/examples/samples/rendered_clock.zip
+++ b/examples/samples/rendered_clock.zip
--- a/examples/samples/spcTeapot.png
+++ b/examples/samples/spcTeapot.png
--- a/examples/samples/sphere.obj
+++ b/examples/samples/sphere.obj
--- a/examples/samples/sphere_mtl.jpg
+++ b/examples/samples/sphere_mtl.jpg
--- a/examples/tutorial/init.py
+++ b/examples/tutorial/init.py
--- a/examples/tutorial/assets/ndc_camera_space.png
+++ b/examples/tutorial/assets/ndc_camera_space.png
--- a/examples/tutorial/assets/ndc_image_space.png
+++ b/examples/tutorial/assets/ndc_image_space.png
--- a/examples/tutorial/bbox_tutorial.ipynb
+++ b/examples/tutorial/bbox_tutorial.ipynb
--- a/examples/tutorial/camera_and_rasterization.ipynb
+++ b/examples/tutorial/camera_and_rasterization.ipynb
--- a/examples/tutorial/dibr_tutorial.ipynb
+++ b/examples/tutorial/dibr_tutorial.ipynb
--- a/examples/tutorial/diffuse_lighting.ipynb
+++ b/examples/tutorial/diffuse_lighting.ipynb
--- a/examples/tutorial/dmtet_network.py
+++ b/examples/tutorial/dmtet_network.py
@@ -0,0 +1,88 @@
+import torch
+from tqdm import tqdm
+
+# MLP + Positional Encoding
+class Decoder(torch.nn.Module):
+    def __init__(self, input_dims = 3, internal_dims = 128, output_dims = 4, hidden = 5, multires = 2):
+        super().__init__()
+        self.embed_fn = None
+        if multires > 0:
+            embed_fn, input_ch = get_embedder(multires)
+            self.embed_fn = embed_fn
+            input_dims = input_ch
+
+        net = (torch.nn.Linear(input_dims, internal_dims, bias=False), torch.nn.ReLU())
+        for i in range(hidden-1):
+            net = net + (torch.nn.Linear(internal_dims, internal_dims, bias=False), torch.nn.ReLU())
+        net = net + (torch.nn.Linear(internal_dims, output_dims, bias=False),)
+        self.net = torch.nn.Sequential(*net)
+
+    def forward(self, p):
+        if self.embed_fn is not None:
+            p = self.embed_fn(p)
+        out = self.net(p)
+        return out
+
+    def pre_train_sphere(self, iter):
+        print ("Initialize SDF to sphere")
+        loss_fn = torch.nn.MSELoss()
+        optimizer = torch.optim.Adam(list(self.parameters()), lr=1e-4)
+
+        for i in tqdm(range(iter)):
+            p = torch.rand((1024,3), device='cuda') - 0.5
+            ref_value  = torch.sqrt((p**2).sum(-1)) - 0.3
+            output = self(p)
+            loss = loss_fn(output[...,0], ref_value)
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+
+        print("Pre-trained MLP", loss.item())
+
+
+# Positional Encoding from https://github.com/yenchenlin/nerf-pytorch/blob/1f064835d2cca26e4df2d7d130daa39a8cee1795/run_nerf_helpers.py
+class Embedder:
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+        self.create_embedding_fn()
+        
+    def create_embedding_fn(self):
+        embed_fns = []
+        d = self.kwargs['input_dims']
+        out_dim = 0
+        if self.kwargs['include_input']:
+            embed_fns.append(lambda x : x)
+            out_dim += d
+            
+        max_freq = self.kwargs['max_freq_log2']
+        N_freqs = self.kwargs['num_freqs']
+        
+        if self.kwargs['log_sampling']:
+            freq_bands = 2.**torch.linspace(0., max_freq, steps=N_freqs)
+        else:
+            freq_bands = torch.linspace(2.**0., 2.**max_freq, steps=N_freqs)
+            
+        for freq in freq_bands:
+            for p_fn in self.kwargs['periodic_fns']:
+                embed_fns.append(lambda x, p_fn=p_fn, freq=freq : p_fn(x * freq))
+                out_dim += d
+                    
+        self.embed_fns = embed_fns
+        self.out_dim = out_dim
+        
+    def embed(self, inputs):
+        return torch.cat([fn(inputs) for fn in self.embed_fns], -1)
+
+def get_embedder(multires):
+    embed_kwargs = {
+                'include_input' : True,
+                'input_dims' : 3,
+                'max_freq_log2' : multires-1,
+                'num_freqs' : multires,
+                'log_sampling' : True,
+                'periodic_fns' : [torch.sin, torch.cos],
+    }
+    
+    embedder_obj = Embedder(**embed_kwargs)
+    embed = lambda x, eo=embedder_obj : eo.embed(x)
+    return embed, embedder_obj.out_dim
--- a/examples/tutorial/dmtet_tutorial.ipynb
+++ b/examples/tutorial/dmtet_tutorial.ipynb
@@ -0,0 +1,369 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ee093335",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# Reconstructing Point Cloud with DMTet\n",
+    "\n",
+    "Deep Marching Tetrahedra (DMTet) is a hybrid 3D representation that combines both implicit and explicit 3D surface representations. It represents a shape with a discrete SDF defined on vertices of a deformable tetrahedral grid. The SDF is converted to triangular mesh using a differentiable marching tetrahedra layer (MT), allowing explicit supervision on the extracted surface to be back-propagated to SDF and change mesh topology. In this tutorial, we demonstrate this by optimizing DMTet to reconstruct point cloud by minimizing the Chamfer Distance. The key functions used in this tutorial are in `kaolin.ops.conversions.trianglemesh`. See detailed [API documentation](https://kaolin.readthedocs.io/en/latest/modules/kaolin.ops.conversions.html#kaolin-ops-conversions).\n",
+    "\n",
+    "In addition, we demonstrate the use of [Kaolin's 3D checkpoints and training visualization](https://kaolin.readthedocs.io/en/latest/modules/kaolin.visualize.html) with the [Omniverse Kaolin App](https://docs.omniverse.nvidia.com/app_kaolin/app_kaolin/user_manual.html)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "31d9198f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import kaolin\n",
+    "import numpy as np\n",
+    "from dmtet_network import Decoder\n",
+    "\n",
+    "# path to the point cloud to be reconstructed\n",
+    "pcd_path = \"../samples/bear_pointcloud.usd\"\n",
+    "# path to the output logs (readable with the training visualizer in the omniverse app)\n",
+    "logs_path = './logs/'\n",
+    "\n",
+    "# We initialize the timelapse that will store USD for the visualization apps\n",
+    "timelapse = kaolin.visualize.Timelapse(logs_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "58c9c196",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# arguments and hyperparameters\n",
+    "device = 'cuda'\n",
+    "lr = 1e-3\n",
+    "laplacian_weight = 0.1\n",
+    "iterations = 5000\n",
+    "save_every = 100\n",
+    "multires = 2\n",
+    "grid_res = 128"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "16a2899d",
+   "metadata": {},
+   "source": [
+    "# Loading Point Cloud\n",
+    "\n",
+    "In this example, we use the point cloud generated by [Omniverse Kaolin App](https://docs.omniverse.nvidia.com/app_kaolin/app_kaolin/user_manual.html#data-generator). We load the pre-generated point cloud in `examples/samples/` and normalize it to the range of the tetrahedral grid. The normalized point cloud is saved to the checkpoint which can be visualized using [the Omniverse app](https://docs.omniverse.nvidia.com/app_kaolin/app_kaolin).\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "5674d9a2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n",
+      "torch.Size([89164, 3])\n"
+     ]
+    }
+   ],
+   "source": [
+    "points = kaolin.io.usd.import_pointclouds(pcd_path)[0].points.to(device)\n",
+    "if points.shape[0] > 100000:\n",
+    "    idx = list(range(points.shape[0]))\n",
+    "    np.random.shuffle(idx)\n",
+    "    idx = torch.tensor(idx[:100000], device=points.device, dtype=torch.long)    \n",
+    "    points = points[idx]\n",
+    "\n",
+    "# The reconstructed object needs to be slightly smaller than the grid to get watertight surface after MT.\n",
+    "points = kaolin.ops.pointcloud.center_points(points.unsqueeze(0), normalize=True).squeeze(0) * 0.9\n",
+    "timelapse.add_pointcloud_batch(category='input',\n",
+    "                               pointcloud_list=[points.cpu()], points_type = \"usd_geom_points\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8b39b36c",
+   "metadata": {},
+   "source": [
+    "# Loading the Tetrahedral Grid\n",
+    "\n",
+    "DMTet starts from a uniform tetrahedral grid of predefined resolution, and uses a network to predict the SDF value as well as deviation vector at each grid vertex. \n",
+    "\n",
+    "Here we load the pre-generated tetrahedral grid using [Quartet](https://github.com/crawforddoran/quartet) at resolution 128, which has roughly the same number of vertices as a voxel grid of resolution 65. We use a simple MLP + positional encoding to predict the SDF and deviation vectors in DMTet, and initialize the encoded SDF to represent a sphere. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "33ab4b6f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([277410, 3]) torch.Size([1524684, 4])\n",
+      "Initialize SDF to sphere\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1000/1000 [00:03<00:00, 279.25it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pre-trained MLP 5.480436811922118e-06\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "tet_verts = torch.tensor(np.load('../samples/{}_verts.npz'.format(grid_res))['data'], dtype=torch.float, device=device)\n",
+    "tets = torch.tensor(([np.load('../samples/{}_tets_{}.npz'.format(grid_res, i))['data'] for i in range(4)]), dtype=torch.long, device=device).permute(1,0)\n",
+    "print (tet_verts.shape, tets.shape)\n",
+    "\n",
+    "# Initialize model and create optimizer\n",
+    "model = Decoder(multires=multires).to(device)\n",
+    "model.pre_train_sphere(1000)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "73fe95a7",
+   "metadata": {},
+   "source": [
+    "# Preparing the Losses and Regularizer\n",
+    "\n",
+    "During training we will use two losses defined on the surface mesh:\n",
+    "- We use Chamfer Distance as the reconstruction loss. At each step, we randomly sample points from the surface mesh and compute the point-to-point distance to the GT point cloud.\n",
+    "- DMTet can employ direct regularization on the surface mesh to impose useful geometric constraints. We demonstrate this with a Laplacian loss which encourages the surface to be smooth.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "78ad11ef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Laplacian regularization using umbrella operator (Fujiwara / Desbrun).\n",
+    "# https://mgarland.org/class/geom04/material/smoothing.pdf\n",
+    "def laplace_regularizer_const(mesh_verts, mesh_faces):\n",
+    "    term = torch.zeros_like(mesh_verts)\n",
+    "    norm = torch.zeros_like(mesh_verts[..., 0:1])\n",
+    "\n",
+    "    v0 = mesh_verts[mesh_faces[:, 0], :]\n",
+    "    v1 = mesh_verts[mesh_faces[:, 1], :]\n",
+    "    v2 = mesh_verts[mesh_faces[:, 2], :]\n",
+    "\n",
+    "    term.scatter_add_(0, mesh_faces[:, 0:1].repeat(1,3), (v1 - v0) + (v2 - v0))\n",
+    "    term.scatter_add_(0, mesh_faces[:, 1:2].repeat(1,3), (v0 - v1) + (v2 - v1))\n",
+    "    term.scatter_add_(0, mesh_faces[:, 2:3].repeat(1,3), (v0 - v2) + (v1 - v2))\n",
+    "\n",
+    "    two = torch.ones_like(v0) * 2.0\n",
+    "    norm.scatter_add_(0, mesh_faces[:, 0:1], two)\n",
+    "    norm.scatter_add_(0, mesh_faces[:, 1:2], two)\n",
+    "    norm.scatter_add_(0, mesh_faces[:, 2:3], two)\n",
+    "\n",
+    "    term = term / torch.clamp(norm, min=1.0)\n",
+    "\n",
+    "    return torch.mean(term**2)\n",
+    "\n",
+    "def loss_f(mesh_verts, mesh_faces, points, it):\n",
+    "    pred_points = kaolin.ops.mesh.sample_points(mesh_verts.unsqueeze(0), mesh_faces, 50000)[0][0]\n",
+    "    chamfer = kaolin.metrics.pointcloud.chamfer_distance(pred_points.unsqueeze(0), points.unsqueeze(0)).mean()\n",
+    "    if it > iterations//2:\n",
+    "        lap = laplace_regularizer_const(mesh_verts, mesh_faces)\n",
+    "        return chamfer + lap * laplacian_weight\n",
+    "    return chamfer\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0f96974c",
+   "metadata": {},
+   "source": [
+    "# Setting up Optimizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "a5d4a42f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vars = [p for _, p in model.named_parameters()]\n",
+    "optimizer = torch.optim.Adam(vars, lr=lr)\n",
+    "scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: max(0.0, 10**(-x*0.0002))) # LR decay over time"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e7917ee1",
+   "metadata": {},
+   "source": [
+    "# Training\n",
+    "\n",
+    "At every iteration, we first predict SDF and deviation vector at each vertex with the network. Next, we extract the triangular mesh by running Marching Tetrahedra on the grid. We then compute loss functions on the extracted mesh and backpropagate gradient to the network weights. Notice that the topology of the mesh is changing during training, as shown in the output message. The training takes ~5 minutes on a TITAN RTX GPU."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "583bec8b",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Iteration 0 - loss: 0.02473130077123642, # of mesh vertices: 18110, # of mesh faces: 36216\n",
+      "Iteration 100 - loss: 0.002605137648060918, # of mesh vertices: 24234, # of mesh faces: 48464\n",
+      "Iteration 200 - loss: 0.0003765518486034125, # of mesh vertices: 26862, # of mesh faces: 53720\n",
+      "Iteration 300 - loss: 0.0010241996496915817, # of mesh vertices: 31508, # of mesh faces: 63012\n",
+      "Iteration 400 - loss: 0.0001085952389985323, # of mesh vertices: 28300, # of mesh faces: 56596\n",
+      "Iteration 500 - loss: 7.9919038398657e-05, # of mesh vertices: 28710, # of mesh faces: 57416\n",
+      "Iteration 600 - loss: 0.00010018410830525681, # of mesh vertices: 27400, # of mesh faces: 54796\n",
+      "Iteration 700 - loss: 6.0749654949177057e-05, # of mesh vertices: 28494, # of mesh faces: 56984\n",
+      "Iteration 800 - loss: 0.0002924088039435446, # of mesh vertices: 27660, # of mesh faces: 55316\n",
+      "Iteration 900 - loss: 9.263768151868135e-05, # of mesh vertices: 28512, # of mesh faces: 57020\n",
+      "Iteration 1000 - loss: 7.250437192851678e-05, # of mesh vertices: 28598, # of mesh faces: 57192\n",
+      "Iteration 1100 - loss: 6.00546263740398e-05, # of mesh vertices: 28352, # of mesh faces: 56700\n",
+      "Iteration 1200 - loss: 4.965237167198211e-05, # of mesh vertices: 28606, # of mesh faces: 57208\n",
+      "Iteration 1300 - loss: 4.5047825551591814e-05, # of mesh vertices: 28934, # of mesh faces: 57864\n",
+      "Iteration 1400 - loss: 4.2731968278530985e-05, # of mesh vertices: 28878, # of mesh faces: 57752\n",
+      "Iteration 1500 - loss: 8.582305599702522e-05, # of mesh vertices: 28790, # of mesh faces: 57576\n",
+      "Iteration 1600 - loss: 4.140706005273387e-05, # of mesh vertices: 28924, # of mesh faces: 57844\n",
+      "Iteration 1700 - loss: 3.995447332272306e-05, # of mesh vertices: 28850, # of mesh faces: 57696\n",
+      "Iteration 1800 - loss: 3.944659692933783e-05, # of mesh vertices: 29064, # of mesh faces: 58128\n",
+      "Iteration 1900 - loss: 3.890909647452645e-05, # of mesh vertices: 28994, # of mesh faces: 57984\n",
+      "Iteration 2000 - loss: 3.9877151721157134e-05, # of mesh vertices: 28832, # of mesh faces: 57660\n",
+      "Iteration 2100 - loss: 3.8087084249127656e-05, # of mesh vertices: 28942, # of mesh faces: 57880\n",
+      "Iteration 2200 - loss: 3.8198602851480246e-05, # of mesh vertices: 29116, # of mesh faces: 58228\n",
+      "Iteration 2300 - loss: 3.789698894252069e-05, # of mesh vertices: 29188, # of mesh faces: 58372\n",
+      "Iteration 2400 - loss: 3.733349876711145e-05, # of mesh vertices: 28986, # of mesh faces: 57968\n",
+      "Iteration 2500 - loss: 3.886773993144743e-05, # of mesh vertices: 28728, # of mesh faces: 57452\n",
+      "Iteration 2600 - loss: 3.7754220102215186e-05, # of mesh vertices: 29132, # of mesh faces: 58260\n",
+      "Iteration 2700 - loss: 3.751121403183788e-05, # of mesh vertices: 28962, # of mesh faces: 57920\n",
+      "Iteration 2800 - loss: 3.733678022399545e-05, # of mesh vertices: 28942, # of mesh faces: 57880\n",
+      "Iteration 2900 - loss: 3.712274701683782e-05, # of mesh vertices: 28970, # of mesh faces: 57936\n",
+      "Iteration 3000 - loss: 3.738816667464562e-05, # of mesh vertices: 29154, # of mesh faces: 58304\n",
+      "Iteration 3100 - loss: 3.6861980333924294e-05, # of mesh vertices: 29090, # of mesh faces: 58176\n",
+      "Iteration 3200 - loss: 3.7955178413540125e-05, # of mesh vertices: 29228, # of mesh faces: 58452\n",
+      "Iteration 3300 - loss: 3.692376412800513e-05, # of mesh vertices: 28990, # of mesh faces: 57976\n",
+      "Iteration 3400 - loss: 3.6803434340981767e-05, # of mesh vertices: 29032, # of mesh faces: 58060\n",
+      "Iteration 3500 - loss: 3.666708289529197e-05, # of mesh vertices: 29006, # of mesh faces: 58008\n",
+      "Iteration 3600 - loss: 3.6867546441499144e-05, # of mesh vertices: 28916, # of mesh faces: 57828\n",
+      "Iteration 3700 - loss: 3.673196624731645e-05, # of mesh vertices: 28876, # of mesh faces: 57748\n",
+      "Iteration 3800 - loss: 3.683008617372252e-05, # of mesh vertices: 28868, # of mesh faces: 57732\n",
+      "Iteration 3900 - loss: 3.696472413139418e-05, # of mesh vertices: 28932, # of mesh faces: 57860\n",
+      "Iteration 4000 - loss: 3.699162698467262e-05, # of mesh vertices: 29188, # of mesh faces: 58372\n",
+      "Iteration 4100 - loss: 3.622782969614491e-05, # of mesh vertices: 28980, # of mesh faces: 57956\n",
+      "Iteration 4200 - loss: 3.6102632293477654e-05, # of mesh vertices: 28990, # of mesh faces: 57976\n",
+      "Iteration 4300 - loss: 3.6840694519924e-05, # of mesh vertices: 28888, # of mesh faces: 57772\n",
+      "Iteration 4400 - loss: 3.603967707022093e-05, # of mesh vertices: 28992, # of mesh faces: 57980\n",
+      "Iteration 4500 - loss: 3.609260966186412e-05, # of mesh vertices: 29044, # of mesh faces: 58084\n",
+      "Iteration 4600 - loss: 3.623321754275821e-05, # of mesh vertices: 29112, # of mesh faces: 58220\n",
+      "Iteration 4700 - loss: 3.591994391172193e-05, # of mesh vertices: 29116, # of mesh faces: 58228\n",
+      "Iteration 4800 - loss: 3.641782677732408e-05, # of mesh vertices: 29148, # of mesh faces: 58292\n",
+      "Iteration 4900 - loss: 3.601510252337903e-05, # of mesh vertices: 29078, # of mesh faces: 58152\n",
+      "Iteration 4999 - loss: 3.580914199119434e-05, # of mesh vertices: 29056, # of mesh faces: 58108\n"
+     ]
+    }
+   ],
+   "source": [
+    "for it in range(iterations):\n",
+    "    pred = model(tet_verts) # predict SDF and per-vertex deformation\n",
+    "    sdf, deform = pred[:,0], pred[:,1:]\n",
+    "    verts_deformed = tet_verts + torch.tanh(deform) / grid_res # constraint deformation to avoid flipping tets\n",
+    "    mesh_verts, mesh_faces = kaolin.ops.conversions.marching_tetrahedra(verts_deformed.unsqueeze(0), tets, sdf.unsqueeze(0)) # running MT (batched) to extract surface mesh\n",
+    "    mesh_verts, mesh_faces = mesh_verts[0], mesh_faces[0]\n",
+    "\n",
+    "    loss = loss_f(mesh_verts, mesh_faces, points, it)\n",
+    "    optimizer.zero_grad()\n",
+    "    loss.backward()\n",
+    "    optimizer.step()\n",
+    "    scheduler.step()\n",
+    "    if (it) % save_every == 0 or it == (iterations - 1): \n",
+    "        print ('Iteration {} - loss: {}, # of mesh vertices: {}, # of mesh faces: {}'.format(it, loss, mesh_verts.shape[0], mesh_faces.shape[0]))\n",
+    "        # save reconstructed mesh\n",
+    "        timelapse.add_mesh_batch(\n",
+    "            iteration=it+1,\n",
+    "            category='extracted_mesh',\n",
+    "            vertices_list=[mesh_verts.cpu()],\n",
+    "            faces_list=[mesh_faces.cpu()]\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e49e8f67",
+   "metadata": {},
+   "source": [
+    "# Visualize Training\n",
+    "\n",
+    "You can now use [the Omniverse app](https://docs.omniverse.nvidia.com/app_kaolin/app_kaolin) to visualize the mesh optimization over training by using the training visualizer on \"./logs/\", where we stored the checkpoints.\n",
+    "\n",
+    "Alternatively, you can use [kaolin-dash3d](https://kaolin.readthedocs.io/en/latest/notes/checkpoints.html?highlight=usd#visualizing-with-kaolin-dash3d) to visualize the checkpoint by running <code>kaolin-dash3d --logdir=$logs_path --port=8080</code>. This command will launch a web server that will stream geometry to web clients. You can view the input point cloud and the reconstructed mesh at [localhost:8080](localhost:8080) as shown below. You can change the *global iteration* on the left to see how the mesh evolves during training. \n",
+    "\n",
+    "![alt text](../samples/dash3d_mesh.png \"Title\")\n",
+    "![alt text](../samples/dash3d_pcd.png \"Title\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "4040fd28a16387d31474220157706b1752bd7f86ecfd14350c5c940438c26826"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/examples/tutorial/gltf_viz.ipynb
+++ b/examples/tutorial/gltf_viz.ipynb
--- a/examples/tutorial/interactive_visualizer.ipynb
+++ b/examples/tutorial/interactive_visualizer.ipynb
--- a/examples/tutorial/sg_specular_lighting.ipynb
+++ b/examples/tutorial/sg_specular_lighting.ipynb
--- a/examples/tutorial/spc_formatting.py
+++ b/examples/tutorial/spc_formatting.py
@@ -0,0 +1,59 @@
+from collections import deque
+from termcolor import colored
+
+def color_by_level(level):
+    _colormap = ["red", "blue", "green", "yellow", "magenta", "cyan", "grey"]
+    return _colormap[level % len(_colormap)]
+
+def push_pop_octree(q, oct_item):
+    prefix = q.popleft()
+    bit_idx = 0
+    parsed_bits = oct_item.item()
+    while parsed_bits:
+        bit_idx += 1
+        if parsed_bits & 1:
+            if len(prefix) > 0:
+                q.append(prefix + f'-{bit_idx}')
+            else:
+                q.append(prefix + f'{bit_idx}')
+        parsed_bits >>= 1
+    return prefix
+
+def format_octree_str(octree_byte, octree_path, level_idx, max_level):
+    text = []
+
+    level_color = color_by_level(level_idx - 1)
+    text += ['Level ' + colored(f'#{level_idx}, ', level_color)]
+
+    colored_path = []
+    for i in range(len(octree_path)):
+        level_color = color_by_level(i // 2)
+        if i % 2 == 0:
+            colored_path += [colored(octree_path[i], level_color)]
+        else:
+            colored_path += [octree_path[i]]
+    colored_path = ''.join(colored_path)
+    text += [f'Path{colored_path},    ']
+    text += [' ' for _ in range((max_level - level_idx) * 2)]
+
+    text += ['{0:08b}'.format(octree_byte)]
+
+    return ''.join(text)
+
+def describe_octree(octree, level, limit_levels=None):
+    bit_counter = lambda x: bin(x).count('1')
+    level_idx, curr_level_remaining_cells, next_level_cells = 1, 1, 0
+    octree_paths = deque('*')
+
+    for oct_idx, octree_byte in enumerate(octree):
+
+        octree_path = push_pop_octree(octree_paths, octree_byte)
+        if limit_levels is None or level_idx in limit_levels:
+            print(format_octree_str(octree_byte, octree_path, level_idx, level))
+        curr_level_remaining_cells -= 1
+        next_level_cells += bit_counter(octree_byte)
+
+        if not curr_level_remaining_cells:
+            level_idx += 1
+            curr_level_remaining_cells = next_level_cells
+            next_level_cells = 0
--- a/examples/tutorial/tutorial_common.py
+++ b/examples/tutorial/tutorial_common.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2019,20-22 NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import os
+import torch
+
+try:
+    import matplotlib.pyplot as plt
+except Exception as e:
+    pass
+
+
+FILE_DIR = os.path.abspath(os.path.dirname(os.path.abspath(__file__)))
+
+# Folder with common data samples (<kaolin_root>/sample_data)
+COMMON_DATA_DIR = os.path.realpath(os.path.join(FILE_DIR, os.pardir, os.pardir, 'sample_data'))
+
+# Folder with data specific to examples (<kaolin_root>/examples/samples)
+EXAMPLES_DATA_DIR = os.path.realpath(os.path.join(FILE_DIR, os.pardir, 'samples'))
+
+
+
--- a/examples/tutorial/understanding_spcs_tutorial.ipynb
+++ b/examples/tutorial/understanding_spcs_tutorial.ipynb
--- a/examples/tutorial/usd_kitchenset.py
+++ b/examples/tutorial/usd_kitchenset.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import argparse
+
+from kaolin.io import usd
+from kaolin.io.utils import mesh_handler_naive_triangulate
+
+
+def import_kitchen_set(kitchen_set_usd):
+    # The Kitchen Set example organizes assets in a particular way. Since we want to import complete objects and not
+    # not each separate part of an object, we'll find all the paths that are named :code:`Geom`:
+    scene_paths = usd.get_scene_paths(kitchen_set_usd, r'.*/Geom$')
+
+    # The meshes in this dataset have a heterogeneous topology, meaning the number of vertices 
+    # for each polygon varies. To deal with those, we'll pass in a handler function that will 
+    # homogenize those meshes to homogenous triangle meshes.
+    usd_meshes = usd.import_meshes(
+        kitchen_set_usd,
+        scene_paths=scene_paths,
+        heterogeneous_mesh_handler=mesh_handler_naive_triangulate
+    )
+    return usd_meshes
+
+
+def save_kitchen_set_dataset(meshes, out_dir):
+    for i, m in enumerate(meshes):
+        out_path = os.path.join(out_dir, f'mesh_{i}.usd')
+        usd.export_mesh(
+            file_path=out_path,
+            vertices=m.vertices[..., [0, 2, 1]],    # flipping Y and Z to make models Y-up
+            faces=m.faces
+        )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description=r'Convert Pixar\'s Kitchen Set scene (http://graphics.pixar.com/usd/downloads.html) '
+        'into a dataset of Pytorch Tensors, ready to be use to train our next awesome model.')
+    parser.add_argument('--kitchen_set_dir', type=str, required=True,
+                        help='Location of the kitchen_set data.')
+    parser.add_argument('--output_dir', type=str, required=True,
+                        help='Output directory to export the dataset to; must exist.')
+
+    args = parser.parse_args()
+
+    # We will be importing Pixar's Kitchen Set scene (http://graphics.pixar.com/usd/downloads.html) as a 
+    # dataset of Pytorch Tensors, ready to be used to train our next awesome model. 
+    kitchen_set_usd = os.path.join(args.kitchen_set_dir, 'Kitchen_set.usd')
+    meshes = import_kitchen_set(kitchen_set_usd)
+
+    print(len(meshes))  # 426
+    # And just like that, we have a dataset of 426 diverse objects for our use!
+
+    # Now let's save our dataset so we can use it again later.
+    save_kitchen_set_dataset(meshes, args.output_dir)
+
+    # We can now fire up Omniverse Kaolin and use the Dataset Visualizer extension to 
+    # see what this dataset looks like and start using it in our next project!
--- a/examples/tutorial/visualize_main.py
+++ b/examples/tutorial/visualize_main.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import logging
+import os
+import random
+import torch
+import sys
+
+import kaolin
+
+logger = logging.getLogger(__name__)
+
+def __normalize_vertices(vertices):
+    """
+    Normalizes vertices to fit an [-1...1] bounding box,
+    common during training, but not necessary for visualization.
+    """
+    return kaolin.ops.pointcloud.center_points(res.vertices.unsqueeze(0), normalize=True).squeeze(0) * 2
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description='Example exporting 3D data during training as timed USDs; '
+        'also demonstrates OBJ import and mesh to pointcloud conversions.')
+    parser.add_argument('--test_objs', type=str, required=True,
+                        help='Comma separated list of several example obj files.')
+    parser.add_argument('--output_dir', type=str, required=True,
+                        help='Output directory to write checkpoints to; must exist.')
+    parser.add_argument('--iterations', type=int, default=101,
+                        help='How many training iterations to emulate.')
+    parser.add_argument('--checkpoint_interval', type=int, default=10,
+                        help='Frequency with which to write out checkpoints.')
+    parser.add_argument('--skip_normalization', action='store_true',
+                        help='If not set, will normalize bounding box of each input '
+                        'to be within -1..1 cube.')
+    args = parser.parse_args()
+
+    logging.basicConfig(level=logging.INFO, stream=sys.stdout)
+
+    if not os.path.isdir(args.output_dir):
+        raise RuntimeError(
+            'Output directory does not exist: --output_dir={}'.format(
+                args.output_dir))
+
+    # Read test 3D models & setup fake training ----------------------------------
+    obj_files = args.test_objs.split(',')
+    logger.info('Parsing {} OBJ files: '.format(len(obj_files)))
+    face_list = []
+    gt_vert_list = []
+    input_pt_clouds = []
+    delta_list = []
+    delta_pt_list = []
+    # TODO: add textured example
+    for f in obj_files:
+        res = kaolin.io.obj.import_mesh(f)
+        vertices = res.vertices if args.skip_normalization else __normalize_vertices(vertices)
+        num_samples = random.randint(1000, 1500)  # Vary to ensure robustness
+        pts = kaolin.ops.mesh.sample_points(
+            vertices.unsqueeze(0), res.faces, num_samples)[0].squeeze(0)
+
+        # Randomly displace vertices to emulate training
+        delta = (2.0 * torch.rand(vertices.shape, dtype=vertices.dtype) - 1.0) * 0.25
+        delta_pts = (2.0 * torch.rand(pts.shape, dtype=pts.dtype) - 1.0) * 0.25
+
+        face_list.append(res.faces)
+        gt_vert_list.append(vertices)
+        delta_list.append(delta)
+        input_pt_clouds.append(pts)
+        delta_pt_list.append(delta_pts)
+
+    # Emulate visualizing during training -------------------------------------
+    logger.info('Emulating training run for {} iterations'.format(args.iterations))
+
+    # Create a Timelapse instance
+    timelapse = kaolin.visualize.Timelapse(args.output_dir)
+
+    # Save static objects such as ground truth or inputs that do not change with iterations
+    # just once.
+    timelapse.add_mesh_batch(
+        category='ground_truth',
+        faces_list=face_list,
+        vertices_list=gt_vert_list)
+    timelapse.add_pointcloud_batch(
+        category='input',
+        pointcloud_list=input_pt_clouds)
+
+    for iteration in range(args.iterations):
+        if iteration % args.checkpoint_interval == 0:
+            # Emulate a training update
+            out_pt_clouds = []
+            out_vert_list = []
+            out_voxels = []
+            for i in range(len(gt_vert_list)):
+                delta_weight = 1.0 - iteration / (args.iterations - 1)
+                out_vert_list.append(gt_vert_list[i] * (1.0 + delta_list[i] * delta_weight))
+                out_pt_clouds.append(input_pt_clouds[i] * (1.0 + delta_pt_list[i] * delta_weight))
+                vg = kaolin.ops.conversions.trianglemeshes_to_voxelgrids(
+                    out_vert_list[-1].unsqueeze(0), face_list[i], 30)
+                out_voxels.append(vg.squeeze(0).bool())
+
+            # Save model predictions to track training progress over time
+            timelapse.add_mesh_batch(
+                iteration=iteration,
+                category='output',
+                faces_list=face_list,
+                vertices_list=out_vert_list)
+            timelapse.add_pointcloud_batch(
+                iteration=iteration,
+                category='output',
+                pointcloud_list=out_pt_clouds)
+            timelapse.add_voxelgrid_batch(
+                iteration=iteration,
+                category='output',
+                voxelgrid_list=out_voxels)
+
+    logger.info('Emulated training complete!\n'
+                'You can now view created USD files by running:\n\n'
+                f'kaolin-dash3d --logdir={args.output_dir}\n\n'
+                'And then navigating to localhost:8080\n')
+
+    # TODO(mshugrina): once dash3d is also integrated, write an integration test
+    # to ensure timelapse output is properly parsed by the visualizer
--- a/examples/tutorial/working_with_meshes.ipynb
+++ b/examples/tutorial/working_with_meshes.ipynb
@@ -0,0 +1,846 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "e504f9ed",
+   "metadata": {},
+   "source": [
+    "# Working with Meshes\n",
+    "\n",
+    "This tutorial shows how to expedite working with kaolin operations using the `SurfaceMesh` container class. We will cover import/export, batching strategies, managing mesh data, rendering and visualization. \n",
+    "\n",
+    "Note that material support of `SurfaceMesh` is currently limited and is on the roadmap."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4d8db10b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import copy\n",
+    "import logging\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import sys\n",
+    "import torch\n",
+    "\n",
+    "import kaolin as kal\n",
+    "from kaolin.rep import SurfaceMesh\n",
+    "\n",
+    "from tutorial_common import COMMON_DATA_DIR\n",
+    "\n",
+    "def sample_mesh_path(fname):\n",
+    "    return os.path.join(COMMON_DATA_DIR, 'meshes', fname)\n",
+    "\n",
+    "def print_tensor(t, **kwargs):\n",
+    "    print(kal.utils.testing.tensor_info(t, **kwargs))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "16c66902",
+   "metadata": {},
+   "source": [
+    "## Understanding the SurfaceMesh Container\n",
+    "\n",
+    "`SurfaceMesh` can store information about a single mesh and a batch of meshes, following three batching\n",
+    "strategies:\n",
+    "   * `NONE` - a single mesh, not batched\n",
+    "   * `FIXED` - a batch of meshes with fixed topology (faces are fixed)\n",
+    "   * `LIST` - a list of variable topology meshes\n",
+    "\n",
+    "Automatically converting between these batching strategies allows quickly connecting to various Kaolin operations. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "7ea7171c",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Expected SurfaceMesh contents for batching strategy FIXED\n",
+      "            vertices: (torch.FloatTensor) of shape ['B', 'V', 3]\n",
+      "               faces: (torch.IntTensor)   of shape ['F', 'FSz']\n",
+      "       face_vertices: (torch.FloatTensor) of shape ['B', 'F', 'FSz', 3]\n",
+      "             normals: (torch.FloatTensor) of shape ['B', 'VN', 3]\n",
+      "    face_normals_idx: (torch.IntTensor)   of shape ['B', 'F', 'FSz']\n",
+      "        face_normals: (torch.FloatTensor) of shape ['B', 'F', 'FSz', 3]\n",
+      "                 uvs: (torch.FloatTensor) of shape ['B', 'U', 2]\n",
+      "        face_uvs_idx: (torch.IntTensor)   of shape ['B', 'F', 'FSz']\n",
+      "            face_uvs: (torch.FloatTensor) of shape ['B', 'F', 'FSz', 2]\n",
+      "      vertex_normals: (torch.FloatTensor) of shape ['B', 'V', 3]\n",
+      "     vertex_tangents: (torch.FloatTensor) of shape ['B', 'V', 3]\n",
+      "material_assignments: (torch.IntTensor)   of shape ['B', 'F']\n",
+      "           materials: non-tensor attribute\n",
+      "\n",
+      "Key: B - batch size, F - number of faces, FSz - face size, V - number of vertices,\n",
+      "     VN - number of normals, U - number of UVs\n"
+     ]
+    }
+   ],
+   "source": [
+    "# To get a sense for what the mesh can contain for different batching strategies, run:\n",
+    "\n",
+    "print(SurfaceMesh.attribute_info_string(SurfaceMesh.Batching.FIXED))\n",
+    "print('\\nKey: B - batch size, F - number of faces, FSz - face size, V - number of vertices,'\n",
+    "      '\\n     VN - number of normals, U - number of UVs')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bf6648b8",
+   "metadata": {},
+   "source": [
+    "## Constructor and Auto-computable Attributes\n",
+    "\n",
+    "A `SurfaceMesh` can be constructed from torch tensors with names, types and sizes as described above. Only `faces` and `vertices` are required, both of which are allowed to contain zero elements, and **many attributes can be computed automatically**. \n",
+    "\n",
+    "Important settings passed to the constructor:\n",
+    "* `unset_attributes_return_none` (default: `True`) - set this to `False` to raise an error when accessing mesh attributes that are missing\n",
+    "* `allow_auto_compute` (default: `True`) - set this to `False` to disable computation of attributes such as `face_uvs` and `vertex_normals`\n",
+    "* `strict_checks` (default: `True`) - set this to `False` to allow setting attributes to unexpected shapes\n",
+    "\n",
+    "You can also set `mesh.unset_attributes_return_none` or `mesh.allow_auto_compute` later to change mesh behavior."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8de1ea19",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "vertices: [10, 3] (torch.float32)[cpu]  \n",
+      "faces: [5, 3] (torch.int64)[cpu]  \n",
+      "face_vertices: None\n",
+      "face_vertices (auto-computed): [5, 3, 3] (torch.float32)[cpu]  \n"
+     ]
+    }
+   ],
+   "source": [
+    "# Let's construct a simple unbatched mesh\n",
+    "V, F, Fsz = 10, 5, 3\n",
+    "mesh = kal.rep.SurfaceMesh(vertices=torch.rand((V, 3)).float(),\n",
+    "                           faces=torch.randint(0, V, (F, Fsz)).long(),\n",
+    "                           allow_auto_compute=False)  # disable auto-compute for now\n",
+    "print_tensor(mesh.vertices, name='vertices')\n",
+    "print_tensor(mesh.faces, name='faces')\n",
+    "print_tensor(mesh.face_vertices, name='face_vertices')\n",
+    "\n",
+    "# Now let's enable auto-compute\n",
+    "mesh.allow_auto_compute=True\n",
+    "print_tensor(mesh.face_vertices, name='face_vertices (auto-computed)')                                "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "44055abc",
+   "metadata": {},
+   "source": [
+    "Batched meshes can also be instantiated by passing batched inputs to the constructor, for example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "59a0b773",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Instantiated mesh with batching FIXED and length 3\n",
+      "Instantiated mesh with batching LIST and length 2\n"
+     ]
+    }
+   ],
+   "source": [
+    "# FIXED: inputs are batched tensors with fixed faces across batches\n",
+    "B, VN = 3, 20\n",
+    "mesh_fixed = kal.rep.SurfaceMesh(vertices=torch.rand((B, V, 3)).float(),\n",
+    "                                 faces=torch.randint(0, V, (F, Fsz)).long(),\n",
+    "                                 normals=torch.rand((B, VN, 3)).float(),\n",
+    "                                 face_normals_idx=torch.randint(0, VN, (B, F, Fsz)))\n",
+    "print(f'Instantiated mesh with batching {mesh_fixed.batching} and length {len(mesh_fixed)}')\n",
+    "\n",
+    "# LIST: all inputs are lists of equal length\n",
+    "V2, F2 = 12, 20\n",
+    "mesh_list = kal.rep.SurfaceMesh(\n",
+    "    vertices=[torch.rand((V, 3)).float(), torch.rand((V2, 3)).float()],\n",
+    "    faces=[torch.randint(0, V, (F, Fsz)).long(), torch.randint(0, V2, (F2, Fsz)).long()])\n",
+    "print(f'Instantiated mesh with batching {mesh_list.batching} and length {len(mesh_list)}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "96d7d2be",
+   "metadata": {},
+   "source": [
+    "## Inspecting SurfaceMesh Objects\n",
+    "\n",
+    "Working with many batched mesh attributes can be confusing, and details really matter. `SurfaceMesh` provides multiple ways to inspect its contents. These print statements also make it clear, which attributes can be auto-computed and how."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "fb432078",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Mesh with batching NONE and length 1\n",
+      "\n",
+      "Attributes ['vertices', 'faces', 'face_vertices']\n",
+      "\n",
+      "Are face_normals set? False\n",
+      "\n",
+      "Are face_normals auto-computable? True\n",
+      "\n",
+      "Attributes (after accessing face_normals) ['vertices', 'faces', 'face_vertices', 'face_normals']\n",
+      "\n",
+      "Face normals        face_normals: [5, 3, 3] (torch.float32)[cpu]  \n",
+      "\n",
+      "\n",
+      "Does the mesh have expected shapes? True\n",
+      "SurfaceMesh object with batching strategy NONE\n",
+      "            vertices: [10, 3] (torch.float32)[cpu]  \n",
+      "               faces: [5, 3] (torch.int64)[cpu]  \n",
+      "       face_vertices: [5, 3, 3] (torch.float32)[cpu]  \n",
+      "        face_normals: [5, 3, 3] (torch.float32)[cpu]  \n",
+      "            face_uvs: if possible, computed on access from: (uvs, face_uvs_idx)\n",
+      "      vertex_normals: if possible, computed on access from: (faces, face_normals)\n",
+      "     vertex_tangents: if possible, computed on access from: (faces, vertices, face_uvs)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Get batching strategy and batch size (length)\n",
+    "print(f'\\nMesh with batching {mesh.batching} and length {len(mesh)}')\n",
+    "\n",
+    "# Get currently set attributes\n",
+    "print(f'\\nAttributes {mesh.get_attributes(only_tensors=True)}')\n",
+    "\n",
+    "# Check if an attribute is set without causing the mesh to auto-compute it\n",
+    "print(f'\\nAre face_normals set? {mesh.has_attribute(\"face_normals\")}')\n",
+    "\n",
+    "# Check if the attribute likely can be auto-computed without actually trying to\n",
+    "print(f'\\nAre face_normals auto-computable? {mesh.probably_can_compute_attribute(\"face_normals\")}')\n",
+    "\n",
+    "# Let's access face_normals and cause them to be computed\n",
+    "mesh.face_normals\n",
+    "print(f'\\nAttributes (after accessing face_normals) {mesh.get_attributes(only_tensors=True)}')\n",
+    "\n",
+    "# Check that face_normals are now set\n",
+    "print(f'\\nFace normals{mesh.describe_attribute(\"face_normals\")}\\n')\n",
+    "\n",
+    "# Check if mesh tensor sizes follow expected conventions\n",
+    "print(f'\\nDoes the mesh have expected shapes? {mesh.check_sanity()}')\n",
+    "\n",
+    "# Print mesh contents (and computable attributes)\n",
+    "print(mesh)\n",
+    "\n",
+    "# We can also convert mesh to string with more details and tensor stats\n",
+    "# print(f'\\nDetailed string of {mesh.to_string(detailed=True, print_stats=True)}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "10b7b49f",
+   "metadata": {},
+   "source": [
+    "## Explicit API\n",
+    "\n",
+    "In addition to default `SurfaceMesh` API that allows compute on access and automatic caching, this class also supports alternative more verbose API that makes these actions explicit."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "d81f1dfe",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Deleting face_vertices\n",
+      "Deleting face_normals\n",
+      "\n",
+      "Mesh attributes after deletion: ['vertices', 'faces']\n",
+      "\n",
+      "Get face_normals without computing: None\n",
+      "\n",
+      "Computed face_normals shape is torch.Size([5, 3, 3])\n",
+      "\n",
+      "Did mesh cache computed face_normals (and face_vertices required to compute them)?\n",
+      "False, False\n",
+      "\n",
+      "Did mesh cache computed face_normals (and face_vertices required to compute them)?\n",
+      "True, True\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Let's delete attributes we just computed\n",
+    "mesh.face_vertices = None\n",
+    "mesh.face_normals = None\n",
+    "\n",
+    "# Check attributes were removed\n",
+    "print(f'\\nMesh attributes after deletion: {mesh.get_attributes(only_tensors=True)}')\n",
+    "\n",
+    "# Get attribute without any auto-compute magic\n",
+    "print(f'\\nGet face_normals without computing: {mesh.get_attribute(\"face_normals\")}')\n",
+    "\n",
+    "# Compute attribute, but don't cache\n",
+    "face_normals = mesh.get_or_compute_attribute('face_normals', should_cache=False)\n",
+    "print(f'\\nComputed face_normals shape is {face_normals.shape}')\n",
+    "\n",
+    "# Verify attributes were not cached\n",
+    "print('\\nDid mesh cache computed face_normals (and face_vertices required to compute them)?')\n",
+    "print(f'{mesh.has_attribute(\"face_normals\")}, {mesh.has_attribute(\"face_vertices\")}')\n",
+    "\n",
+    "# Compute and cache\n",
+    "face_normals = mesh.get_or_compute_attribute('face_normals', should_cache=True)\n",
+    "\n",
+    "print('\\nDid mesh cache computed face_normals (and face_vertices required to compute them)?')\n",
+    "print(f'{mesh.has_attribute(\"face_normals\")}, {mesh.has_attribute(\"face_vertices\")}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "787ebbdc",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "## Importing Data\n",
+    "\n",
+    "Since version 0.14.0, kaolin `obj` and `usd` importers return a `SurfaceMesh`, which is nearly backward-compatible with the previous `named_tuple` return type, while providing mutability and convenient data management. \n",
+    "\n",
+    "**Porting from earlier versions:** If porting from kaolin<=0.13.0, `obj` importer now correctly uses `face_normals_idx` (previously `face_normals`) to refer to the face-vertex indices into normals and `normals` (previously `vertex_normals`) to refer to the normals array that may or may not have the same number of elements as vertices. In addition, `materials` are now imported in name-sorted order and `material_order` has been replaced with `material_assignments` tensor of shape `(num_faces,)`, with integer value indicating the material index assigned to the corresponding face."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "f316587e",
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Mesh imported from obj: SurfaceMesh object with batching strategy NONE\n",
+      "            vertices: [42, 3] (torch.float32)[cpu]  \n",
+      "               faces: [80, 3] (torch.int64)[cpu]  \n",
+      "             normals: [80, 3] (torch.float32)[cpu]  \n",
+      "    face_normals_idx: [80, 3] (torch.int64)[cpu]  \n",
+      "                 uvs: [63, 2] (torch.float32)[cpu]  \n",
+      "        face_uvs_idx: [80, 3] (torch.int64)[cpu]  \n",
+      "material_assignments: [80] (torch.int16)[cpu]  \n",
+      "           materials: list of length 1\n",
+      "       face_vertices: if possible, computed on access from: (faces, vertices)\n",
+      "        face_normals: if possible, computed on access from: (normals, face_normals_idx) or (vertices, faces)\n",
+      "            face_uvs: if possible, computed on access from: (uvs, face_uvs_idx)\n",
+      "      vertex_normals: if possible, computed on access from: (faces, face_normals)\n",
+      "     vertex_tangents: if possible, computed on access from: (faces, vertices, face_uvs)\n",
+      "\n",
+      "Mesh imported from usd: SurfaceMesh object with batching strategy NONE\n",
+      "            vertices: [42, 3] (torch.float32)[cpu]  \n",
+      "               faces: [80, 3] (torch.int64)[cpu]  \n",
+      "        face_normals: [80, 3, 3] (torch.float32)[cpu]  \n",
+      "                 uvs: [240, 2] (torch.float32)[cpu]  \n",
+      "        face_uvs_idx: [80, 3] (torch.int64)[cpu]  \n",
+      "material_assignments: [80] (torch.int16)[cpu]  \n",
+      "           materials: list of length 1\n",
+      "       face_vertices: if possible, computed on access from: (faces, vertices)\n",
+      "            face_uvs: if possible, computed on access from: (uvs, face_uvs_idx)\n",
+      "      vertex_normals: if possible, computed on access from: (faces, face_normals)\n",
+      "     vertex_tangents: if possible, computed on access from: (faces, vertices, face_uvs)\n",
+      "dict_keys(['batching', 'allow_auto_compute', 'unset_attributes_return_none', 'materials', 'vertices', 'face_normals', 'uvs', 'faces', 'face_uvs_idx', 'material_assignments'])\n"
+     ]
+    }
+   ],
+   "source": [
+    "import_args = {'with_materials' : True, 'with_normals' : True}\n",
+    "\n",
+    "# Let's import a single mesh from OBJ\n",
+    "mesh_obj = kal.io.obj.import_mesh(sample_mesh_path('ico_flat.obj'), **import_args)\n",
+    "\n",
+    "# Let's import the same mesh from its USD version\n",
+    "mesh_usd = kal.io.usd.import_mesh(sample_mesh_path('ico_flat.usda'), **import_args)\n",
+    "\n",
+    "# Let's inspect contents of both meshes (notice consistent naming of attributes)\n",
+    "print(f'\\nMesh imported from obj: {mesh_obj}')\n",
+    "print(f'\\nMesh imported from usd: {mesh_usd}')\n",
+    "\n",
+    "# Note: if you prefer to work with raw values, SurfaceMesh is convertible to dict\n",
+    "mesh_dict = mesh_usd.as_dict()\n",
+    "print(mesh_dict.keys())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fdfec845",
+   "metadata": {},
+   "source": [
+    "Although geometrically these objects are the same, you will notice that USD stroes UVs and normals differently from OBJ, resulting in different imported arrays. Despite these differences, actual UVs and normals auto-computed and assigned to faces are actually the same."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "5733dc8d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Are face_uvs same? True\n",
+      "\n",
+      "Are face_normals same? True\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f'\\nAre face_uvs same? {torch.allclose(mesh_obj.face_uvs, mesh_usd.face_uvs, atol=1e-4)}')\n",
+    "print(f'\\nAre face_normals same? {torch.allclose(mesh_obj.face_normals, mesh_usd.face_normals, atol=1e-4)}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c53e057",
+   "metadata": {},
+   "source": [
+    "## Working with Batches\n",
+    "\n",
+    "`SurfaceMesh` objects can be converted between batching strategies, as long as it is possible (for example list of meshes of variable topology cannot be converted to `Batching.FIXED`). "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "4f5476b2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SurfaceMesh object with batching strategy FIXED\n",
+      "            vertices: [1, 42, 3] (torch.float32)[cpu]  \n",
+      "               faces: [80, 3] (torch.int64)[cpu]  \n",
+      "             normals: [1, 80, 3] (torch.float32)[cpu]  \n",
+      "    face_normals_idx: [1, 80, 3] (torch.int64)[cpu]  \n",
+      "        face_normals: [1, 80, 3, 3] (torch.float32)[cpu]  \n",
+      "                 uvs: [1, 63, 2] (torch.float32)[cpu]  \n",
+      "        face_uvs_idx: [1, 80, 3] (torch.int64)[cpu]  \n",
+      "            face_uvs: [1, 80, 3, 2] (torch.float32)[cpu]  \n",
+      "material_assignments: [1, 80] (torch.int16)[cpu]  \n",
+      "           materials: [\n",
+      "                      0: list of length 1\n",
+      "                      ]\n",
+      "       face_vertices: if possible, computed on access from: (faces, vertices)\n",
+      "      vertex_normals: if possible, computed on access from: (faces, face_normals)\n",
+      "     vertex_tangents: if possible, computed on access from: (faces, vertices, face_uvs)\n",
+      "\n",
+      "SurfaceMesh object with batching strategy LIST\n",
+      "            vertices: [\n",
+      "                      0: [42, 3] (torch.float32)[cpu]  \n",
+      "                      ]\n",
+      "               faces: [\n",
+      "                      0: [80, 3] (torch.int64)[cpu]  \n",
+      "                      ]\n",
+      "        face_normals: [\n",
+      "                      0: [80, 3, 3] (torch.float32)[cpu]  \n",
+      "                      ]\n",
+      "                 uvs: [\n",
+      "                      0: [240, 2] (torch.float32)[cpu]  \n",
+      "                      ]\n",
+      "        face_uvs_idx: [\n",
+      "                      0: [80, 3] (torch.int64)[cpu]  \n",
+      "                      ]\n",
+      "            face_uvs: [\n",
+      "                      0: [80, 3, 2] (torch.float32)[cpu]  \n",
+      "                      ]\n",
+      "material_assignments: [\n",
+      "                      0: [80] (torch.int16)[cpu]  \n",
+      "                      ]\n",
+      "           materials: [\n",
+      "                      0: list of length 1\n",
+      "                      ]\n",
+      "       face_vertices: if possible, computed on access from: (faces, vertices)\n",
+      "      vertex_normals: if possible, computed on access from: (faces, face_normals)\n",
+      "     vertex_tangents: if possible, computed on access from: (faces, vertices, face_uvs)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Convert unbatched mesh to most commonly used FIXED batching\n",
+    "mesh_obj.to_batched()  # Shortcut for mesh_usd.set_batching(SurfaceMesh.Batching.FIXED)\n",
+    "print(mesh_obj)\n",
+    "\n",
+    "# Convert mesh to list batching\n",
+    "mesh_usd.set_batching(SurfaceMesh.Batching.LIST)\n",
+    "print(f'\\n{mesh_usd}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d6c2a73c",
+   "metadata": {},
+   "source": [
+    "We can also concatenate meshes of any batching strategy, with the output using `FIXED` (if `fixed_toplogy`) or `LIST` batching. Errors will be raised if concatentation is not possible for `vertices` or `faces`, and other attributes will be handled if possible. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "9cdad2dd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Cannot cat uvs arrays of given shapes; trying to concatenate face_uvs instead, due to: stack expects each tensor to be equal size, but got [63, 2] at entry 0 and [240, 2] at entry 1\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SurfaceMesh object with batching strategy FIXED\n",
+      "            vertices: [2, 42, 3] (torch.float32)[cpu]  \n",
+      "               faces: [80, 3] (torch.int64)[cpu]  \n",
+      "        face_normals: [2, 80, 3, 3] (torch.float32)[cpu]  \n",
+      "            face_uvs: [2, 80, 3, 2] (torch.float32)[cpu]  \n",
+      "material_assignments: [2, 80] (torch.int16)[cpu]  \n",
+      "           materials: [\n",
+      "                      0: list of length 1\n",
+      "                      1: list of length 1\n",
+      "                      ]\n",
+      "       face_vertices: if possible, computed on access from: (faces, vertices)\n",
+      "      vertex_normals: if possible, computed on access from: (faces, face_normals)\n",
+      "     vertex_tangents: if possible, computed on access from: (faces, vertices, face_uvs)\n"
+     ]
+    }
+   ],
+   "source": [
+    "mesh = SurfaceMesh.cat([mesh_obj, mesh_usd], fixed_topology=True)\n",
+    "\n",
+    "# Notice that the concatenated mesh:\n",
+    "# 1. does not have uvs, as those could not be stacked, but face_uvs were computed and stacked instead.\n",
+    "# 2. does not have normals, as only first mesh had them, but face_normals were computed and stacked. \n",
+    "print(mesh)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0091b207",
+   "metadata": {},
+   "source": [
+    "With `fixed_topology=False`, it is also possible to concatenate meshes of variable topology into a list representation. For example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "541462ee",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SurfaceMesh object with batching strategy LIST\n",
+      "            vertices: [\n",
+      "                      0: [42, 3] (torch.float32)[cpu]  \n",
+      "                      1: [42, 3] (torch.float32)[cpu]  \n",
+      "                      2: [482, 3] (torch.float32)[cpu]  \n",
+      "                      ]\n",
+      "               faces: [\n",
+      "                      0: [80, 3] (torch.int64)[cpu]  \n",
+      "                      1: [80, 3] (torch.int64)[cpu]  \n",
+      "                      2: [960, 3] (torch.int64)[cpu]  \n",
+      "                      ]\n",
+      "        face_normals: [\n",
+      "                      0: [80, 3, 3] (torch.float32)[cpu]  \n",
+      "                      1: [80, 3, 3] (torch.float32)[cpu]  \n",
+      "                      2: [960, 3, 3] (torch.float32)[cpu]  \n",
+      "                      ]\n",
+      "material_assignments: [\n",
+      "                      0: [80] (torch.int16)[cpu]  \n",
+      "                      1: [80] (torch.int16)[cpu]  \n",
+      "                      2: [960] (torch.int16)[cpu]  \n",
+      "                      ]\n",
+      "           materials: [\n",
+      "                      0: list of length 1\n",
+      "                      1: list of length 1\n",
+      "                      2: list of length 2\n",
+      "                      ]\n",
+      "       face_vertices: if possible, computed on access from: (faces, vertices)\n",
+      "            face_uvs: if possible, computed on access from: (uvs, face_uvs_idx)\n",
+      "      vertex_normals: if possible, computed on access from: (faces, face_normals)\n",
+      "     vertex_tangents: if possible, computed on access from: (faces, vertices, face_uvs)\n",
+      "\n",
+      "Note that auto-compute is still supported, e.g. after access:\n",
+      "      vertex_normals: [\n",
+      "                      0: [42, 3] (torch.float32)[cpu]  \n",
+      "                      1: [42, 3] (torch.float32)[cpu]  \n",
+      "                      2: [482, 3] (torch.float32)[cpu]  \n",
+      "                      ]\n"
+     ]
+    }
+   ],
+   "source": [
+    "tmp = SurfaceMesh.cat([mesh, kal.io.usd.import_mesh(sample_mesh_path('pizza.usda'), **import_args)],\n",
+    "                      fixed_topology=False)\n",
+    "print(tmp)\n",
+    "tmp.vertex_normals\n",
+    "print(f'\\nNote that auto-compute is still supported, e.g. after access:')\n",
+    "print(f'{tmp.describe_attribute(\"vertex_normals\")}')\n",
+    "\n",
+    "del tmp  # not needed later"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4b7855ae",
+   "metadata": {},
+   "source": [
+    "## Convenience Methods and Mutability\n",
+    "\n",
+    "Now let's see a few useful capabilities of `SurfaceMesh`. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "640359ed",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SurfaceMesh object with batching strategy FIXED\n",
+      "            vertices: [2, 42, 3] (torch.float32)[cuda:0]  \n",
+      "               faces: [80, 3] (torch.int64)[cuda:0]  \n",
+      "        face_normals: [2, 80, 3, 3] (torch.float32)[cuda:0]  \n",
+      "            face_uvs: [2, 80, 3, 2] (torch.float32)[cuda:0]  \n",
+      "      vertex_normals: [2, 42, 3] (torch.float32)[cpu]  \n",
+      "material_assignments: [2, 80] (torch.int16)[cuda:0]  \n",
+      "           materials: [\n",
+      "                      0: list of length 1\n",
+      "                      1: list of length 1\n",
+      "                      ]\n",
+      "       face_vertices: if possible, computed on access from: (faces, vertices)\n",
+      "     vertex_tangents: if possible, computed on access from: (faces, vertices, face_uvs)\n",
+      "            vertices: [2, 42, 3] (torch.float32)[cuda:0]  - [min -1.0000, max 1.0000, mean -0.0000] \n",
+      "            vertices: [2, 42, 3] (torch.float32)[cuda:0]  - [min -0.5000, max 0.5000, mean -0.0000] \n",
+      "dict_keys(['vertices', 'faces', 'face_normals', 'face_uvs', 'vertex_normals', 'material_assignments'])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Recall that mesh contains two fixed topology meshes\n",
+    "\n",
+    "# Let's move it to cuda (you can also specify particular attributes to move)\n",
+    "mesh = mesh.cuda()\n",
+    "\n",
+    "# Let's say we actually don't need vertex_normals on the GPU\n",
+    "mesh = mesh.cpu(attributes=['vertex_normals'])\n",
+    "print(mesh)\n",
+    "\n",
+    "# We can also directly set mesh attributes, for example:\n",
+    "print(mesh.describe_attribute('vertices', print_stats=True))\n",
+    "mesh.vertices = kal.ops.pointcloud.center_points(mesh.vertices, normalize=True)\n",
+    "print(mesh.describe_attribute('vertices', print_stats=True))\n",
+    "\n",
+    "# Mesh also supports copy and deepcopy\n",
+    "mesh_copy = copy.copy(mesh)\n",
+    "mesh_copy = copy.deepcopy(mesh)\n",
+    "\n",
+    "# Finally, mesh can be converted to a simple dict\n",
+    "mesh_dict = mesh.as_dict(only_tensors=True)\n",
+    "print(mesh_dict.keys())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bdd71c5b",
+   "metadata": {},
+   "source": [
+    "## Optimization and Gradients\n",
+    "\n",
+    "It is also possible to optimize mesh attributes by going through auto-computed attributes. However, take care to set `requires_grad` before auto-computed attribute is cached. This causes auto-computed attributes to be computed every time, allowing gradients to flow."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "9b053b52",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Has face_vertices? False\n",
+      "computed face_vertices: [2, 80, 3, 3] (torch.float32)[cuda:0]  \n",
+      "Were face_vertices cached? False\n",
+      "\n",
+      "Sample loss 1.7655433416366577\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Let's try to optimize vertices\n",
+    "mesh.vertices.requires_grad = True\n",
+    "\n",
+    "# Check that mesh does not cache face_vertices\n",
+    "print(f'Has face_vertices? {mesh.has_attribute(\"face_vertices\")}')\n",
+    "\n",
+    "# Check that we can actually compute them\n",
+    "face_vertices = mesh.face_vertices\n",
+    "print_tensor(face_vertices, name='computed face_vertices')\n",
+    "\n",
+    "# However, because mesh.vertices.requires_grad, this value is not cached\n",
+    "print(f'Were face_vertices cached? {mesh.has_attribute(\"face_vertices\")}')\n",
+    "\n",
+    "# Now we can use mesh.face_vertices in a loss function, while optimizing mesh.vertices, e.g.:\n",
+    "sample_pt_cloud = torch.randn((2, 100, 3), dtype=mesh.vertices.dtype, device=mesh.vertices.device)\n",
+    "sample_loss = kal.metrics.trianglemesh.point_to_mesh_distance(sample_pt_cloud, mesh.face_vertices)[0].mean()\n",
+    "print(f'\\nSample loss {sample_loss}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "67097c5d",
+   "metadata": {},
+   "source": [
+    "## Exporting\n",
+    "\n",
+    "Automatic conversion to `LIST` batching also makes it easy to export a batch of USD meshes to file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "bf0591c9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Exporting to USD: 100%|█████████████████████████████████████████████| 2/2 [00:00<00:00, 190.03mesh/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SurfaceMesh object with batching strategy LIST\n",
+      "            vertices: [\n",
+      "                      0: [42, 3] (torch.float32)[cpu]  \n",
+      "                      1: [42, 3] (torch.float32)[cpu]  \n",
+      "                      ]\n",
+      "               faces: [\n",
+      "                      0: [80, 3] (torch.int64)[cpu]  \n",
+      "                      1: [80, 3] (torch.int64)[cpu]  \n",
+      "                      ]\n",
+      "        face_normals: [\n",
+      "                      0: [80, 3, 3] (torch.float32)[cpu]  \n",
+      "                      1: [80, 3, 3] (torch.float32)[cpu]  \n",
+      "                      ]\n",
+      "       face_vertices: if possible, computed on access from: (faces, vertices)\n",
+      "            face_uvs: if possible, computed on access from: (uvs, face_uvs_idx)\n",
+      "      vertex_normals: if possible, computed on access from: (faces, face_normals)\n",
+      "     vertex_tangents: if possible, computed on access from: (faces, vertices, face_uvs)\n",
+      "True\n",
+      "True\n",
+      "True\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "/kaolin/kaolin/io/usd/mesh.py:371: UserWarning: Some child prims for /World/Meshes/mesh_0 are missing uvs; skipping importing uvs.\n",
+      "  warnings.warn(f'Some child prims for {scene_path} are missing {k}; skipping importing {k}.', UserWarning)\n",
+      "/kaolin/kaolin/io/usd/mesh.py:371: UserWarning: Some child prims for /World/Meshes/mesh_0 are missing face_uvs_idx; skipping importing face_uvs_idx.\n",
+      "  warnings.warn(f'Some child prims for {scene_path} are missing {k}; skipping importing {k}.', UserWarning)\n",
+      "/kaolin/kaolin/io/usd/mesh.py:371: UserWarning: Some child prims for /World/Meshes/mesh_1 are missing uvs; skipping importing uvs.\n",
+      "  warnings.warn(f'Some child prims for {scene_path} are missing {k}; skipping importing {k}.', UserWarning)\n",
+      "/kaolin/kaolin/io/usd/mesh.py:371: UserWarning: Some child prims for /World/Meshes/mesh_1 are missing face_uvs_idx; skipping importing face_uvs_idx.\n",
+      "  warnings.warn(f'Some child prims for {scene_path} are missing {k}; skipping importing {k}.', UserWarning)\n"
+     ]
+    }
+   ],
+   "source": [
+    "mesh = mesh.set_batching(SurfaceMesh.Batching.LIST)\n",
+    "\n",
+    "# Note: you can only run this once due to USD caching; restart Kernel to rerun cell without errors\n",
+    "kal.io.usd.export_meshes('/tmp/out.usd', vertices=mesh.vertices, faces=mesh.faces, face_normals=mesh.face_normals)\n",
+    "\n",
+    "# Verify we can read back the same meshes we exported\n",
+    "imported_meshes = SurfaceMesh.cat(\n",
+    "    kal.io.usd.import_meshes('/tmp/out.usd', with_normals=True), fixed_topology=False)\n",
+    "mesh = mesh.cpu()\n",
+    "print(imported_meshes)\n",
+    "print(kal.utils.testing.contained_torch_equal(mesh.vertices, imported_meshes.vertices, approximate=True))\n",
+    "print(kal.utils.testing.contained_torch_equal(mesh.faces, imported_meshes.faces))\n",
+    "print(kal.utils.testing.contained_torch_equal(mesh.face_normals, imported_meshes.face_normals, approximate=True))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}