Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Project 4: Bowen Deng #12

Open
wants to merge 30 commits into
base: base-code
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
8421472
Remove unused freeglut copyrights
shehzan10 Jan 16, 2021
a79ca55
Update CUDA Computes List
shehzan10 Sep 10, 2021
78e028a
Retab
shehzan10 Sep 10, 2021
c03780d
Update instructions for Fall 2021
shehzan10 Sep 18, 2021
80ad206
Add vulkan option
shehzan10 Sep 18, 2021
cc3da39
Add sample readmes for inspiration
shehzan10 Sep 19, 2021
784f5ca
Merge pull request #1 from CIS565-Fall-2021/update-instructions-2021
shehzan10 Sep 21, 2021
8c8e194
format change
7DBW13 Sep 29, 2021
0fb9f5e
basic part done (BSDF shader for ideal diffuse and perfect specular, …
7DBW13 Sep 30, 2021
e7c005c
refractive material
7DBW13 Oct 2, 2021
2462e4c
depth of field
7DBW13 Oct 3, 2021
44a8d42
antialiasing
7DBW13 Oct 3, 2021
2c52400
motion blur
7DBW13 Oct 3, 2021
158bf75
direct lighting
7DBW13 Oct 4, 2021
ed51984
obj load
7DBW13 Oct 5, 2021
07d9ce0
direct lighting revised; trace shadow ray for final rays
7DBW13 Oct 5, 2021
8a00060
texture and normal mapping
7DBW13 Oct 6, 2021
3721c1f
measure performance
7DBW13 Oct 7, 2021
7fc403f
code done
7DBW13 Oct 9, 2021
9925085
add README and images
7DBW13 Oct 9, 2021
e24281d
polish README
7DBW13 Oct 11, 2021
d65dfaa
update reference
7DBW13 Oct 11, 2021
4e3c702
add denoiser base code (gui, dummy gbuffer)
7DBW13 Oct 16, 2021
b8a753f
basic denoiser works
7DBW13 Oct 18, 2021
8ace785
denoiser done
7DBW13 Oct 21, 2021
43cdbd3
all code done
7DBW13 Oct 22, 2021
ebd9a80
update README, imgs, tools
7DBW13 Oct 22, 2021
a1de972
update INS
7DBW13 Oct 22, 2021
6e225a6
update config
7DBW13 Oct 22, 2021
0e646ee
update represent img
7DBW13 Oct 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.1)

project(cis565_path_tracer)
project(cis565_denoiser)

set_property(GLOBAL PROPERTY USE_FOLDERS ON)

Expand Down Expand Up @@ -86,16 +86,34 @@ set(sources
src/utilities.cpp
)

set(imgui
imgui/imconfig.h
imgui/imgui.cpp
imgui/imgui.h
imgui/imgui_draw.cpp
imgui/imgui_internal.h
imgui/imgui_widgets.cpp
imgui/imgui_demo.cpp
imgui/imgui_impl_glfw.cpp
imgui/imgui_impl_glfw.h
imgui/imgui_impl_opengl2.cpp
imgui/imgui_impl_opengl2.h
imgui/imgui_impl_opengl3.cpp
imgui/imgui_impl_opengl3.h
)

list(SORT headers)
list(SORT sources)
list(SORT imgui)

source_group(Headers FILES ${headers})
source_group(Sources FILES ${sources})
source_group(imgui FILES ${imgui})

#add_subdirectory(stream_compaction) # TODO: uncomment if using your stream compaction

cuda_add_executable(${CMAKE_PROJECT_NAME} ${sources} ${headers})
cuda_add_executable(${CMAKE_PROJECT_NAME} ${sources} ${headers} ${imgui})
target_link_libraries(${CMAKE_PROJECT_NAME}
${LIBRARIES}
#stream_compaction # TODO: uncomment if using your stream compaction
)
)
382 changes: 131 additions & 251 deletions INSTRUCTION.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@
<stringAttribute key="org.eclipse.cdt.launch.DEBUGGER_ID" value="com.nvidia.cuda.ide.debug.cudagdb"/>
<stringAttribute key="org.eclipse.cdt.launch.DEBUGGER_START_MODE" value="run"/>
<stringAttribute key="org.eclipse.cdt.launch.PROGRAM_ARGUMENTS" value="scenes/sphere.txt"/>
<stringAttribute key="org.eclipse.cdt.launch.PROGRAM_NAME" value="build/cis565_path_tracer"/>
<stringAttribute key="org.eclipse.cdt.launch.PROJECT_ATTR" value="Project3-CUDA-Path-Tracer"/>
<stringAttribute key="org.eclipse.cdt.launch.PROGRAM_NAME" value="build/cis565_denoiser"/>
<stringAttribute key="org.eclipse.cdt.launch.PROJECT_ATTR" value="Project4-CUDA-Denoiser"/>
<booleanAttribute key="org.eclipse.cdt.launch.PROJECT_BUILD_CONFIG_AUTO_ATTR" value="true"/>
<stringAttribute key="org.eclipse.cdt.launch.PROJECT_BUILD_CONFIG_ID_ATTR" value=""/>
<booleanAttribute key="org.eclipse.cdt.launch.use_terminal" value="true"/>
<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
<listEntry value="/Project3-CUDA-Path-Tracer"/>
<listEntry value="/Project4-CUDA-Denoiser"/>
</listAttribute>
<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
<listEntry value="4"/>
</listAttribute>
<stringAttribute key="process_factory_id" value="org.eclipse.cdt.dsf.gdb.GdbProcessFactory"/>
</launchConfiguration>
</launchConfiguration>
169 changes: 161 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,166 @@
CUDA Path Tracer
================
CUDA Denoiser For CUDA Path Tracer
==================================

**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 3**
**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 4**

* (TODO) YOUR NAME HERE
* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
* Bowen Deng
* [LinkedIn](www.linkedin.com/in/bowen-deng-7dbw13)
* Tested on: Windows 10, AMD Ryzen 9 5900HX with Radeon Graphics @ 3.30GHz 16GB, GeForce RTX 3070 Laptop GPU 8GB (Personal Computer)

### (TODO: Your README)
## Abstract

*DO NOT* leave the README to the last minute! It is a crucial part of the
project, and we will not be able to grade you without a good README.
A pathtracing denoiser based on the paper [Edge-Avoiding À-Trous Wavelet Transform for fast Global Illumination Filtering](https://jo.dreggn.org/home/2010_atrous.pdf), which utilizes geometry buffers to guide a smoothing filter.

![](img/res/represent.png)
> Scene: scenes/denoise.txt

## Edge-Avoiding À-Trous Wavelet Filter

The À-Trous wavelet transform is kind of technique that approximates blur filters (like Gaussian), with fewer memory reads. The key is to use a small blur filter, and iteratively space out the samples going through it.

![](img/res/atrous.png)

As shown above, a 5x5 filter is applied to approximate 16x16 blur by 3 iterations.

However, simply running a blur filter on an image often reduces the amount of detail, smoothing sharp edges. That is where "edge-avoiding" comes in. For a 3D scene, some information stored in geometry buffers (G-buffers) can be useful for guiding the filter to detect and preserve edges, such as per-pixel normals and per-pixel positions.

| scene | normal | position |
| ------------------------ | ------------------------ | ----------------------- |
| ![](img/res/ref.png) | ![](img/res/norm.png) | ![](img/res/pos.png) |
> Scene: scenes/cornell_ceiling_light.txt

According to these G-buffers, the edge-stopping function is added to the À-Trous filter, which diminshes the influence of neighboring samples that has far different values in G-buffers. As a result, the edge-avoiding À-Trous wavelet filter provides nice smoothing on planar surfaces while keeps the original edges.

| raw pathtraced image | simple blur | blur guided by G-buffers |
| ------------------------ | ------------------------ | ----------------------- |
| ![](img/res/raw.png) | ![](img/res/blur.png) | ![](img/res/denoise.png) |

## Performance Analysis

### Measurement Metric

**A. Runtime Measurement**

To measure performance, CUDA events are applied to record the time cost for each iteration. The average runtime of path tracer is defined as total iterations rendered divided by total time elapsed, while the average runtime of denoiser is defined as its average time cost for running 100 times.

Both of the results will be shown in the title of the application.

![](img/res/title.png)

This feature can be enabled by toggling this macro defined in `src/main.h`
```
// Toggle for measuring performance
#define RUN_TIME 1
```

**B. Image Comparison**

The quality of a denoised result is measured as its difference from a reference image. Such ground truth is obtained by path tracing at 10000 spp. We use structural similarity index (SSIM) to perform image comparison, which ranges from 0 (totally different) to 1 (same).

The comparison script is located in `image_diff_tool/ssim.py`. It can be used in the following way and the computed SSIM will be printed.
```
python ssim.py -f DIR_TO_FIRST_IMG -s DIR_TO_SECOND_IMG
```

### Performance of Denoising

| pathtraced image at 10spp | denoised image | reference |
| ------------------------ | ------------------------ | ----------------------- |
| ![](img/res/raw.png) | ![](img/res/denoise.png) | ![](img/res/ref.png) |
| SSIM = 0.4707 | SSIM = 0.9902 | SSIM = 1 |

![](img/res/time.png)

The result shows significant improvement after applying denoiser to the raw pathtraced image, both visually and from SSIM measurement. If we consider `SSIM > 0.98` and no obvious artifacts as a "acceptably smooth" result, experiments show that at least 2500 iterations are needed for a path tracer.

| pathtraced image at 2500spp | reference |
| ------------------------ | ----------------------- |
| ![](img/res/2500.png) | ![](img/res/ref.png) |
| SSIM = 0.9831 | SSIM = 1 |

However, with fine-tuned parameters, the denoised image with 10 iterations path tracing shows even higher similarity! The parameters we use are shown as below.

![](img/res/para.png)

### Denoising at Different Image Resolutions

![](img/res/resolution.png)

If the resolution of the rendered image changes, the runtime of denoiser should change with the same ratio, since it performs filtering for fixed iterations. This can be confirmed in above figure. On the other hand, it is noticed that as the resolution grows, the proportion of total runtime for denoising becomes larger.

### Denoising with Different Filter Sizes

Since À-Trous wavelet transform uses more filtering iterations to approximate larger filter, the effective filter sizes are 5, 9, 17, ... (corresponding to 1, 2, 3, ... iterations).

| pathtraced image at 10spp | 5x5 filter | 9x9 filter |
| ------------------------ | ------------------------ | ----------------------- |
| ![](img/res/raw.png) | ![](img/res/iter1.png) | ![](img/res/iter2.png) |
| SSIM = 0.4707 | SSIM = 0.8094 | SSIM = 0.9659 |

| 17x17 filter | 33x33 filter | 65x65 filter |
| ------------------------ | ------------------------ | ----------------------- |
| ![](img/res/iter3.png) | ![](img/res/iter4.png) | ![](img/res/denoise.png) |
| SSIM = 0.9876 | SSIM = 0.9901 | SSIM = 0.9902 |

As the filter grows larger and larger, the result is more and more smooth. Speckles can be observed when the filter size is small, and they becomes larger and less obvious until vanishing if applying a larger filter. Anyway, even denoising with a small filter can help improve SSIM a lot, since SSIM measures the structural information. The visual quality does not scale uniformly with filter size. Also, the runtime is measured at each filter size.

![](img/res/filter.png)

As expected, the runtime increases nearly linearly as the filter size (i.e. the number of filtering iterations).

### Denoising for Different Material Types

The motivation of the work in [Edge-Avoiding À-Trous Wavelet Transform for fast Global Illumination Filtering](https://jo.dreggn.org/home/2010_atrous.pdf) is from the following observation.

>The incident irradiance at a single point on a surface is described by the integral over the hemisphere. Under interactive or real-time constraints a path tracer can only trace a single path per pixel thus estimating the integral with a single sample only. But if neighboring hemispheres are similar one would expect similar integrals. Therefore the smoothing tries to average samples with a similar hemisphere.

It can be inferred that such denoising method may work well on diffuse surface, where the reflection is evenly distributed in the hemisphere. However, for specular or refractive surface it may fail since the light distributions vary a lot even for neighbor points.

| pathtraced image at 10spp | denoised image | reference |
| ------------------------ | ------------------------ | ----------------------- |
| ![](img/res/mat_raw.png) | ![](img/res/mat_denoise.png) | ![](img/res/mat_ref.png) |
> Scene: scenes/diff_mat_ceiling_light.txt

The lower-left diffuse sphere seems very close to reference after denoising, but the other two does not. For the upper specular sphere, the reflected two spheres on its surface are actually blurred, since our G-buffers can not capture edges there. For the lower-right refractive sphere, the denoised result loses many details because the reflections/refractions are very point-dependent, and so benefits from averaging neighbor points are limited.

### Denoising for Different Scenes

The effect of denoising can differ greatly for different scenes.

| pathtraced image at 10spp | denoised image | reference |
| ------------------------ | ------------------------ | ----------------------- |
| ![](img/res/dark_raw.png) | ![](img/res/dark_denoise.png) | ![](img/res/dark_ref.png) |
| SSIM = 0.2198 | SSIM = 0.9559 | SSIM = 1 |
> Scene: scenes/cornell.txt

Compared to result on `Scene: scenes/cornell_ceiling_light.txt`, the denoised image for this scene shows many artifacts, especially on surface of the sphere. This is because the light in this scene is very small that many samples do not reach it before terminated. The lack of information in original image makes neighbor average based denoising less effective.

**NOTE: Due to some technical issues, this denoiser can not work with the antialiasing feature of my path tracer for now :(. I will try to fix it in future.**

## Extra Part

### À-Trous vs. Gaussian

Recall that À-Trous is used to approximate a Gaussian filter. To make a comparison, a Gaussian filter is implemented with sigma of 20.

| pathtraced image at 10spp | reference |
| ------------------------ | ----------------------- |
| ![](img/res/raw.png) | ![](img/res/ref.png) |
| SSIM = 0.4707 | SSIM = 1 |

| 5-pass À-Trous | 65x65 Gaussian |
| ------------------------ | ----------------------- |
| ![](img/res/denoise.png) | ![](img/res/G_denoise.png) |
| SSIM = 0.9902 | SSIM = 0.9557 |

The main difference between results from these two filtering is that Gaussian blurs the surface of the specular sphere. A possible reason is that À-Trous samples neighbor pixels sparsely, and so better keeps edges which can not be captured by G-buffers.

![](img/res/gaussian.png)

Surprisingly, using the full Gaussian filter dramatically reduces the performance. The runtime of denoising is even multiple times more than that of path tracing itself.

## Reference

* Edge-Avoiding À-Trous Wavelet Transform for fast Global Illumination Filtering, https://jo.dreggn.org/home/2010_atrous.pdf.
* ocornut/imgui, https://github.com/ocornut/imgui.
* Gaussian Filter Generation in C++, https://www.geeksforgeeks.org/gaussian-filter-generation-c/.
8 changes: 5 additions & 3 deletions cmake/CUDAComputesList.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ IF( CUDA_COMPUTE_20
OR CUDA_COMPUTE_70
OR CUDA_COMPUTE_72
OR CUDA_COMPUTE_75
OR CUDA_COMPUTE_80
OR CUDA_COMPUTE_86
)
SET(FALLBACK OFF)
ELSE()
Expand All @@ -70,8 +72,8 @@ LIST(LENGTH COMPUTES_DETECTED_LIST COMPUTES_LEN)
IF(${COMPUTES_LEN} EQUAL 0 AND ${FALLBACK})
MESSAGE(STATUS "You can use -DCOMPUTES_DETECTED_LIST=\"AB;XY\" (semicolon separated list of CUDA Compute versions to enable the specified computes")
MESSAGE(STATUS "Individual compute versions flags are also available under CMake Advance options")
LIST(APPEND COMPUTES_DETECTED_LIST "30" "50" "60" "70")
MESSAGE(STATUS "No computes detected. Fall back to 30, 50, 60 70")
LIST(APPEND COMPUTES_DETECTED_LIST "30" "50" "60" "70" "80")
MESSAGE(STATUS "No computes detected. Fall back to 30, 50, 60, 70, 80")
ENDIF()

LIST(LENGTH COMPUTES_DETECTED_LIST COMPUTES_LEN)
Expand All @@ -90,7 +92,7 @@ MACRO(SET_COMPUTE VERSION)
ENDMACRO(SET_COMPUTE)

# Iterate over compute versions. Create variables and enable computes if needed
FOREACH(VER 20 30 32 35 37 50 52 53 60 61 62 70 72 75)
FOREACH(VER 20 30 32 35 37 50 52 53 60 61 62 70 72 75 80 86)
OPTION(CUDA_COMPUTE_${VER} "CUDA Compute Capability ${VER}" OFF)
MARK_AS_ADVANCED(CUDA_COMPUTE_${VER})
IF(${CUDA_COMPUTE_${VER}})
Expand Down
96 changes: 48 additions & 48 deletions cmake/FindGLFW.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,66 +20,66 @@
include(FindPackageHandleStandardArgs)

if (WIN32)
# Find include files
find_path(
GLFW_INCLUDE_DIR
NAMES GLFW/glfw3.h
PATHS
$ENV{PROGRAMFILES}/include
${GLFW_ROOT_DIR}/include
DOC "The directory where GLFW/glfw.h resides")
# Find include files
find_path(
GLFW_INCLUDE_DIR
NAMES GLFW/glfw3.h
PATHS
$ENV{PROGRAMFILES}/include
${GLFW_ROOT_DIR}/include
DOC "The directory where GLFW/glfw.h resides")

# Use glfw3.lib for static library
if (GLFW_USE_STATIC_LIBS)
set(GLFW_LIBRARY_NAME glfw3)
else()
set(GLFW_LIBRARY_NAME glfw3dll)
endif()
# Use glfw3.lib for static library
if (GLFW_USE_STATIC_LIBS)
set(GLFW_LIBRARY_NAME glfw3)
else()
set(GLFW_LIBRARY_NAME glfw3dll)
endif()

# Find library files
find_library(
GLFW_LIBRARY
NAMES ${GLFW_LIBRARY_NAME}
PATHS
$ENV{PROGRAMFILES}/lib
${GLFW_ROOT_DIR}/lib)
# Find library files
find_library(
GLFW_LIBRARY
NAMES ${GLFW_LIBRARY_NAME}
PATHS
$ENV{PROGRAMFILES}/lib
${GLFW_ROOT_DIR}/lib)

unset(GLFW_LIBRARY_NAME)
unset(GLFW_LIBRARY_NAME)
else()
# Find include files
find_path(
GLFW_INCLUDE_DIR
NAMES GLFW/glfw.h
PATHS
/usr/include
/usr/local/include
/sw/include
/opt/local/include
DOC "The directory where GL/glfw.h resides")
# Find include files
find_path(
GLFW_INCLUDE_DIR
NAMES GLFW/glfw.h
PATHS
/usr/include
/usr/local/include
/sw/include
/opt/local/include
DOC "The directory where GL/glfw.h resides")

# Find library files
# Try to use static libraries
find_library(
GLFW_LIBRARY
NAMES glfw3
PATHS
/usr/lib64
/usr/lib
/usr/local/lib64
/usr/local/lib
/sw/lib
/opt/local/lib
${GLFW_ROOT_DIR}/lib
DOC "The GLFW library")
# Find library files
# Try to use static libraries
find_library(
GLFW_LIBRARY
NAMES glfw3
PATHS
/usr/lib64
/usr/lib
/usr/local/lib64
/usr/local/lib
/sw/lib
/opt/local/lib
${GLFW_ROOT_DIR}/lib
DOC "The GLFW library")
endif()

# Handle REQUIRD argument, define *_FOUND variable
find_package_handle_standard_args(GLFW DEFAULT_MSG GLFW_INCLUDE_DIR GLFW_LIBRARY)

# Define GLFW_LIBRARIES and GLFW_INCLUDE_DIRS
if (GLFW_FOUND)
set(GLFW_LIBRARIES ${OPENGL_LIBRARIES} ${GLFW_LIBRARY})
set(GLFW_INCLUDE_DIRS ${GLFW_INCLUDE_DIR})
set(GLFW_LIBRARIES ${OPENGL_LIBRARIES} ${GLFW_LIBRARY})
set(GLFW_INCLUDE_DIRS ${GLFW_INCLUDE_DIR})
endif()

# Hide some variables
Expand Down
Loading