提交测试

2024-01-16 17:22:21 +08:00
parent 92862c0372
commit 73635fda01
654 changed files with 178015 additions and 2 deletions
--- a/examples/recipes/camera/camera_coordinate_systems.py
+++ b/examples/recipes/camera/camera_coordinate_systems.py
@@ -0,0 +1,25 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to change the coordinate system of the camera.
+# ==============================================================================================================
+
+import math
+import torch
+import numpy as np
+from kaolin.render.camera import Camera, blender_coords
+
+device = 'cuda'
+
+camera = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    fov=30 * np.pi / 180,  # In radians
+    width=800, height=800,
+    device=device
+)
+
+print(camera.basis_change_matrix)
+camera.change_coordinate_system(blender_coords())
+print(camera.basis_change_matrix)
+camera.reset_coordinate_system()
+print(camera.basis_change_matrix)
--- a/examples/recipes/camera/camera_init_explicit.py
+++ b/examples/recipes/camera/camera_init_explicit.py
@@ -0,0 +1,88 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to initialize instances of kaolin's pinhole / ortho cameras
+# explicitly.
+# Also review `camera_init_simple` which greatly simplifies the construction methods shown here.
+# ==============================================================================================================
+
+import math
+import torch
+from kaolin.render.camera import Camera, CameraExtrinsics, PinholeIntrinsics, OrthographicIntrinsics
+
+#################################################################
+#   Camera 1: from eye, at, up and focal length (Perspective)   #
+#################################################################
+# Build the camera extrinsics object from lookat
+eye = torch.tensor([0.0, 0.0, -1.0], device='cuda') # Camera positioned here in world coords
+at = torch.tensor([0.0, 0.0, 0.0], device='cuda')   # Camera observing this world point
+up = torch.tensor([0.0, 1.0, 0.0], device='cuda')   # Camera up direction vector
+extrinsics = CameraExtrinsics.from_lookat(eye, at, up)
+
+# Build a pinhole camera's intrinsics. This time we use focal length (other useful args: focal_y, x0, y0)
+intrinsics = PinholeIntrinsics.from_focal(width=800, height=600, focal_x=1.0, device='cuda')
+
+# Combine extrinsics and intrinsics to obtain the full camera object
+camera_1 = Camera(extrinsics=extrinsics, intrinsics=intrinsics)
+print('--- Camera 1 ---')
+print(camera_1)
+
+########################################################################
+#   Camera 2: from camera position, orientation and fov (Perspective)  #
+########################################################################
+# Build the camera extrinsics object from lookat
+cam_pos = torch.tensor([0.0, 0.0, -1.0], device='cuda')
+cam_dir = torch.tensor([[1.0, 0.0, 0.0],
+                        [0.0, 1.0, 0.0],
+                        [0.0, 0.0, 1.0]], device='cuda')  # 3x3 orientation within the world
+extrinsics = CameraExtrinsics.from_camera_pose(cam_pos=cam_pos, cam_dir=cam_dir)
+
+# Use pinhole camera intrinsics, construct using field-of-view (other useful args: camera_fov_direction, x0, y0)
+intrinsics = PinholeIntrinsics.from_fov(width=800, height=600, fov=math.radians(45.0), device='cuda')
+camera_2 = Camera(extrinsics=extrinsics, intrinsics=intrinsics)
+
+print('--- Camera 2 ---')
+print(camera_2)
+
+####################################################################
+#   Camera 3: camera view matrix, (Orthographic)                   #
+####################################################################
+# Build the camera extrinsics object from lookat
+world2cam = torch.tensor([[1.0, 0.0, 0.0, 0.5],
+                          [0.0, 1.0, 0.0, 0.5],
+                          [0.0, 0.0, 1.0, 0.5],
+                          [0.0, 0.0, 0.0, 1.0]], device='cuda')  # 3x3 orientation within the world
+extrinsics = CameraExtrinsics.from_view_matrix(view_matrix=world2cam)
+
+# Use pinhole camera intrinsics, construct using field-of-view (other useful args: camera_fov_direction, x0, y0)
+intrinsics = OrthographicIntrinsics.from_frustum(width=800, height=600, near=-800, far=800,
+                                                 fov_distance=1.0, device='cuda')
+camera_3 = Camera(extrinsics=extrinsics, intrinsics=intrinsics)
+
+print('--- Camera 3 ---')
+print(camera_3)
+
+
+##########################################################
+#   Camera 4: Combining cameras                          #
+##########################################################
+# Must be of the same intrinsics type, and non params fields such as width, height, near, far
+# (currently we don't perform validation)
+camera_4 = Camera.cat((camera_1, camera_2))
+
+print('--- Camera 4 ---')
+print(camera_4)
+
+
+##########################################################
+#   Camera 5: constructing a batch of cameras together   #
+##########################################################
+
+# Extrinsics are created using batched tensors. The intrinsics are automatically broadcasted.
+camera_5 = Camera.from_args(
+    eye=torch.tensor([[4.0, 4.0, 4.0], [4.0, 4.0, 4.0]]),
+    at=torch.tensor([[0.0, 0.0, 0.0], [4.0, 4.0, 4.0]]),
+    up=torch.tensor([[0.0, 1.0, 0.0], [4.0, 4.0, 4.0]]),
+    width=800, height=600, focal_x=300.0
+)
+
+print('--- Camera 5 ---')
+print(camera_5)
--- a/examples/recipes/camera/camera_init_simple.py
+++ b/examples/recipes/camera/camera_init_simple.py
@@ -0,0 +1,65 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to initialize instances of kaolin's pinhole / ortho cameras.
+# ==============================================================================================================
+
+import math
+import torch
+import numpy as np
+from kaolin.render.camera import Camera
+
+device = 'cuda'
+
+perspective_camera_1 = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    fov=30 * np.pi / 180,  # In radians
+    x0=0.0, y0=0.0,
+    width=800, height=800,
+    near=1e-2, far=1e2,
+    dtype=torch.float64,
+    device=device
+)
+
+print('--- Perspective Camera 1 ---')
+print(perspective_camera_1)
+
+perspective_camera_2 = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    fov=30 * np.pi / 180,  # In radians
+    width=800, height=800,
+    device=device
+)
+
+print('--- Perspective Camera 2 ---')
+print(perspective_camera_2)
+
+ortho_camera_1 = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    width=800, height=800,
+    near=-800, far=800,
+    fov_distance=1.0,
+    dtype=torch.float64,
+    device=device
+)
+
+print('--- Orthographic Camera 1 ---')
+print(ortho_camera_1)
+
+
+ortho_camera_2 = Camera.from_args(
+    view_matrix=torch.tensor([[1.0, 0.0, 0.0, 0.5],
+                              [0.0, 1.0, 0.0, 0.5],
+                              [0.0, 0.0, 1.0, 0.5],
+                              [0.0, 0.0, 0.0, 1.0]]),
+    width=800, height=800,
+    dtype=torch.float64,
+    device=device
+)
+
+print('--- Orthographic Camera 2 ---')
+print(ortho_camera_2)
--- a/examples/recipes/camera/camera_movement.py
+++ b/examples/recipes/camera/camera_movement.py
@@ -0,0 +1,27 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to manipulate kaolin's camera.
+# ==============================================================================================================
+
+import torch
+from kaolin.render.camera import Camera
+
+
+camera = Camera.from_args(
+    eye=torch.tensor([0.0, 0.0, -1.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    width=800, height=600,
+    fov=1.0,
+    device='cuda'
+)
+
+# Extrinsic rigid transformations managed by CameraExtrinsics
+camera.move_forward(amount=10.0)               # Translate forward in world coordinates (this is wisp's mouse zoom)
+camera.move_right(amount=-5.0)                 # Translate left in world coordinates
+camera.move_up(amount=5.0)                     # Translate up in world coordinates
+camera.rotate(yaw=0.1, pitch=0.02, roll=1.0)   # Rotate the camera
+
+# Intrinsic lens transformations managed by CameraIntrinsics
+# Zoom in to decrease field of view - for Orthographic projection the internal implementation differs
+# as there is no acual fov or depth concept (hence we use a "made up" fov distance parameter, see the projection matrix)
+camera.zoom(amount=0.5)
--- a/examples/recipes/camera/camera_opengl_shaders.py
+++ b/examples/recipes/camera/camera_opengl_shaders.py
@@ -0,0 +1,57 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to use the camera for generating a view-projection matrix
+# as used in opengl shaders.
+# ==============================================================================================================
+
+import torch
+import numpy as np
+from kaolin.render.camera import Camera
+
+# !!! This example is not runnable -- it is minimal to contain integration between the opengl shader and !!!
+# !!! the camera matrix                                                                                  !!!
+try:
+    from glumpy import gloo
+except:
+    class DummyGloo(object):
+        def Program(self, vertex, fragment):
+            # see: https://glumpy.readthedocs.io/en/latest/api/gloo-shader.html#glumpy.gloo.Program
+            return dict([])
+    gloo = DummyGloo()
+
+
+device = 'cuda'
+
+camera = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    fov=30 * np.pi / 180,  # In radians
+    x0=0.0, y0=0.0,
+    width=800, height=800,
+    near=1e-2, far=1e2,
+    dtype=torch.float64,
+    device=device
+)
+
+
+vertex = """
+            uniform mat4   u_viewprojection;
+            attribute vec3 position;
+            attribute vec4 color;
+            varying vec4 v_color;
+            void main()
+            {
+                v_color = color;
+                gl_Position = u_viewprojection * vec4(position, 1.0f);
+            } """
+
+fragment = """
+            varying vec4 v_color;
+            void main()
+            {
+                gl_FragColor = v_color;
+            } """
+
+# Compile GL program
+gl_program = gloo.Program(vertex, fragment)
+gl_program["u_viewprojection"] = camera.view_projection_matrix()[0].cpu().numpy().T
--- a/examples/recipes/camera/camera_properties.py
+++ b/examples/recipes/camera/camera_properties.py
@@ -0,0 +1,47 @@
+# ==============================================================================================================
+# The following snippet demonstrates various camera properties
+# ==============================================================================================================
+
+import math
+import torch
+import numpy as np
+from kaolin.render.camera import Camera
+
+device = 'cuda'
+
+camera = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    fov=30 * np.pi / 180,  # In radians
+    width=800, height=800,
+    dtype=torch.float32,
+    device=device
+)
+
+print(camera.width)
+print(camera.height)
+print(camera.lens_type)
+
+print(camera.device)
+camera = camera.cpu()
+print(camera.device)
+
+# Create a batched camera and view single element
+camera = Camera.cat((camera, camera))
+print(camera)
+camera = camera[0]
+print(camera)
+
+print(camera.dtype)
+camera = camera.half()
+print(camera.dtype)
+camera = camera.double()
+print(camera.dtype)
+camera = camera.float()
+print(camera.dtype)
+
+print(camera.extrinsics.requires_grad)
+print(camera.intrinsics.requires_grad)
+
+print(camera.to('cuda', torch.float64))
--- a/examples/recipes/camera/camera_ray_tracing.py
+++ b/examples/recipes/camera/camera_ray_tracing.py
@@ -0,0 +1,71 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to use the camera for implementing a ray-generation function
+# for ray based applications.
+# ==============================================================================================================
+
+import torch
+import numpy as np
+from typing import Tuple
+from kaolin.render.camera import Camera, CameraFOV
+
+def generate_pixel_grid(res_x=None, res_y=None, device='cuda'):
+    h_coords = torch.arange(res_x, device=device)
+    w_coords = torch.arange(res_y, device=device)
+    pixel_y, pixel_x = torch.meshgrid(h_coords, w_coords)
+    pixel_x = pixel_x + 0.5
+    pixel_y = pixel_y + 0.5
+    return pixel_y, pixel_x
+
+
+def generate_perspective_rays(camera: Camera, pixel_grid: Tuple[torch.Tensor, torch.Tensor]):
+    # coords_grid should remain immutable (a new tensor is implicitly created here)
+    pixel_y, pixel_x = pixel_grid
+    pixel_x = pixel_x.to(camera.device, camera.dtype)
+    pixel_y = pixel_y.to(camera.device, camera.dtype)
+
+    # Account for principal point offset from canvas center
+    pixel_x = pixel_x - camera.x0
+    pixel_y = pixel_y + camera.y0
+
+    # pixel values are now in range [-1, 1], both tensors are of shape res_y x res_x
+    pixel_x = 2 * (pixel_x / camera.width) - 1.0
+    pixel_y = 2 * (pixel_y / camera.height) - 1.0
+
+    ray_dir = torch.stack((pixel_x * camera.tan_half_fov(CameraFOV.HORIZONTAL),
+                           -pixel_y * camera.tan_half_fov(CameraFOV.VERTICAL),
+                           -torch.ones_like(pixel_x)), dim=-1)
+
+    ray_dir = ray_dir.reshape(-1, 3)    # Flatten grid rays to 1D array
+    ray_orig = torch.zeros_like(ray_dir)
+
+    # Transform from camera to world coordinates
+    ray_orig, ray_dir = camera.extrinsics.inv_transform_rays(ray_orig, ray_dir)
+    ray_dir /= torch.linalg.norm(ray_dir, dim=-1, keepdim=True)
+    ray_orig, ray_dir = ray_orig[0], ray_dir[0]  # Assume a single camera
+
+    return ray_orig, ray_dir, camera.near, camera.far
+
+
+camera = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    fov=30 * np.pi / 180,  # In radians
+    x0=0.0, y0=0.0,
+    width=800, height=800,
+    near=1e-2, far=1e2,
+    dtype=torch.float64,
+    device='cuda'
+)
+
+pixel_grid = generate_pixel_grid(200, 200)
+ray_orig, ray_dir, near, far = generate_perspective_rays(camera, pixel_grid)
+
+print('Ray origins:')
+print(ray_orig)
+print('Ray directions:')
+print(ray_dir)
+print('Near clipping plane:')
+print(near)
+print('Far clipping plane:')
+print(far)
--- a/examples/recipes/camera/camera_transforms.py
+++ b/examples/recipes/camera/camera_transforms.py
@@ -0,0 +1,59 @@
+# ==============================================================================================================
+# The following snippet demonstrates how to use the camera transform directly on vectors
+# ==============================================================================================================
+
+import math
+import torch
+import numpy as np
+from kaolin.render.camera import Camera
+
+device = 'cuda'
+
+camera = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    fov=30 * np.pi / 180,  # In radians
+    width=800, height=800,
+    dtype=torch.float32,
+    device=device
+)
+
+print('View projection matrix')
+print(camera.view_projection_matrix())
+
+print('View matrix: world2cam')
+print(camera.view_matrix())
+
+print('Inv View matrix: cam2world')
+print(camera.inv_view_matrix())
+
+print('Projection matrix')
+print(camera.projection_matrix())
+
+vectors = torch.randn(10, 3).to(camera.device, camera.dtype)   # Create a batch of points
+
+# For ortho and perspective: this is equivalent to multiplying camera.projection_matrix() @ vectors
+# and then dividing by the w coordinate (perspective division)
+print(camera.transform(vectors))
+
+# For ray tracing we have camera.inv_transform_rays which performs multiplication with inv_view_matrix()
+# (just for the extrinsics part)
+
+# Can also access properties directly:
+# --
+# View matrix components (camera space)
+print(camera.R)
+print(camera.t)
+
+# Camera axes and position in world coordinates
+print(camera.cam_pos())
+print(camera.cam_right())
+print(camera.cam_pos())
+print(camera.cam_forward())
+
+print(camera.focal_x)
+print(camera.focal_y)
+print(camera.x0)
+print(camera.y0)
+
--- a/examples/recipes/camera/cameras_differentiable.py
+++ b/examples/recipes/camera/cameras_differentiable.py
@@ -0,0 +1,65 @@
+# ====================================================================================================================
+# The following snippet demonstrates how cameras can be used for optimizing specific extrinsic / intrinsic parameters
+# ====================================================================================================================
+
+import torch
+import torch.optim as optim
+from kaolin.render.camera import Camera
+
+# Create simple perspective camera
+cam = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    width=800, height=600, focal_x=300.0
+)
+
+# When requires_grad is on, the camera will automatically switch to differentiation friendly backend
+# (implicitly calling cam.switch_backend('matrix_6dof_rotation') )
+cam.requires_grad_(True)
+
+# Constraint camera to optimize only fov and camera position (cannot rotate)
+ext_mask, int_mask = cam.gradient_mask('t', 'focal_x', 'focal_y')
+ext_params, int_params = cam.parameters()
+ext_params.register_hook(lambda grad: grad * ext_mask.float())
+grad_scale = 1e5    # Used to move the projection matrix elements faster
+int_params.register_hook(lambda grad: grad * int_mask.float() * grad_scale)
+
+# Make the camera a bit noisy
+# Currently can't copy the camera here after requires_grad is true because we're still missing a camera.detach() op
+target = Camera.from_args(
+    eye=torch.tensor([4.0, 4.0, 4.0]),
+    at=torch.tensor([0.0, 0.0, 0.0]),
+    up=torch.tensor([0.0, 1.0, 0.0]),
+    width=800, height=600, focal_x=300.0
+)
+target.t = target.t + torch.randn_like(target.t)
+target.focal_x = target.focal_x + torch.randn_like(target.focal_x)
+target.focal_y = target.focal_y + torch.randn_like(target.focal_y)
+target_mat = target.view_projection_matrix()
+
+# Save for later so we have some comparison of what changed
+initial_view = cam.view_matrix().detach().clone()
+initial_proj = cam.projection_matrix().detach().clone()
+
+# Train a few steps
+optimizer = optim.SGD(cam.parameters(), lr=0.1, momentum=0.9)
+for idx in range(10):
+    view_proj = cam.view_projection_matrix()
+    optimizer.zero_grad()
+    loss = torch.nn.functional.mse_loss(target_mat, view_proj)
+    loss.backward()
+    optimizer.step()
+    print(f'Iteration {idx}:')
+    print(f'Loss: {loss.item()}')
+    print(f'Extrinsics: {cam.extrinsics.parameters()}')
+    print(f'Intrinsics: {cam.intrinsics.parameters()}')
+
+# Projection matrix grads are much smaller as they're scaled by the view-frustum dimensions..
+print(f'View matrix before: {initial_view}')
+print(f'View matrix after: {cam.view_matrix()}')
+print(f'Projection matrix before: {initial_proj}')
+print(f'Projection matrix after: {cam.projection_matrix()}')
+
+print('Did the camera change?')
+print(not torch.allclose(cam, target))