Skip to content

Commit

Permalink
optimize performace
Browse files Browse the repository at this point in the history
Signed-off-by: ZhuohaoHe <[email protected]>
  • Loading branch information
ZzzhHe committed Sep 17, 2024
1 parent 081444c commit 02caa9e
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 19 deletions.
6 changes: 3 additions & 3 deletions src/include/face.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ class Face {

// Get functions
// 获取函数
const std::array<size_t, 3>& GetIndices() const { return indices_; }
const size_t GetIndex(size_t index) const { return indices_[index]; }
const Material& GetMaterial() const { return material_; }
inline const std::array<size_t, 3>& GetIndices() const { return indices_; }
inline const size_t GetIndex(size_t index) const { return indices_[index]; }
inline const Material& GetMaterial() const { return material_; }

private:
std::array<size_t, 3> indices_;
Expand Down
4 changes: 1 addition & 3 deletions src/include/renderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ class SimpleRenderer {
private:
const size_t height_;
const size_t width_;
std::shared_ptr<float[]> depth_buffer_;
LogSystem log_system_;

std::shared_ptr<Shader> shader_;
Expand All @@ -69,8 +68,7 @@ class SimpleRenderer {
* @param model 模型
*/
void DrawModel(const Model &model, uint32_t *buffer);

void ClearDepthBuffer();
void DrawModelSlower(const Model &model, uint32_t *buffer);
};
} // namespace simple_renderer

Expand Down
8 changes: 4 additions & 4 deletions src/include/vertex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ class Vertex {

// Getter functions
// 获取函数
[[nodiscard]] Vector4f GetPosition() const { return position_; }
[[nodiscard]] Vector3f GetNormal() const { return normal_; }
[[nodiscard]] Vector2f GetTexCoords() const { return texCoords_; }
[[nodiscard]] Color GetColor() const { return color_; }
[[nodiscard]] inline Vector4f GetPosition() const { return position_; }
[[nodiscard]] inline Vector3f GetNormal() const { return normal_; }
[[nodiscard]] inline Vector2f GetTexCoords() const { return texCoords_; }
[[nodiscard]] inline Color GetColor() const { return color_; }

private:
Vector4f position_; // 3D position, 3D顶点坐标
Expand Down
138 changes: 129 additions & 9 deletions src/renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,28 +37,148 @@ SimpleRenderer::SimpleRenderer(size_t width, size_t height)
width_(width),
log_system_(LogSystem(kLogFilePath, kLogFileMaxSize, kLogFileMaxCount)) {
rasterizer_ = std::make_shared<Rasterizer>(width, height);
// init depth buffer
depth_buffer_ = std::shared_ptr<float[]>(new float[width * height],
std::default_delete<float[]>());
}

bool SimpleRenderer::Render(const Model &model, const Shader &shader,
uint32_t *buffer) {
SPDLOG_INFO("render model: {}", model.GetModelPath());
ClearDepthBuffer();
shader_ = std::make_shared<Shader>(shader);
DrawModel(model, buffer);
return true;
}

void SimpleRenderer::ClearDepthBuffer() {
std::fill(depth_buffer_.get(), depth_buffer_.get() + width_ * height_,
std::numeric_limits<float>::infinity());
}
/*
Optimizes performance by performing depth testing during rasterization, keeping
only the closest fragment per pixel, and avoiding storing all
fragments—resulting in faster rendering.
通过在光栅化过程中执行深度测试,仅保留每个像素的深度值最近的片段,避免存储所有片段,从而优化性能,实现更快的渲染。
*/
void SimpleRenderer::DrawModel(const Model &model, uint32_t *buffer) {
SPDLOG_INFO("draw {}", model.GetModelPath());

/* * * Vertex Shader * * */
std::vector<Vertex> processedVertices;
std::vector<std::vector<Vertex>> processed_vertices_all_thread(kNProc);
#pragma omp parallel num_threads(kNProc) default(none) \
shared(shader_, processed_vertices_all_thread) firstprivate(model)
{
int thread_id = omp_get_thread_num();
std::vector<Vertex> &processedVertices_per_thread =
processed_vertices_all_thread[thread_id];

#pragma omp for
for (const auto &v : model.GetVertices()) {
auto vertex = shader_->VertexShader(v);
processedVertices_per_thread.push_back(vertex);
}
}

for (const auto &processedVertices_per_thread :
processed_vertices_all_thread) {
processedVertices.insert(processedVertices.end(),
processedVertices_per_thread.begin(),
processedVertices_per_thread.end());
}
/* * * * * * * */

/* * * Rasterization * * */
std::vector<std::unique_ptr<float[]>> depthBuffer_all_thread(kNProc);
std::vector<std::unique_ptr<uint32_t[]>> colorBuffer_all_thread(kNProc);

for (size_t thread_id = 0; thread_id < kNProc; thread_id++) {
depthBuffer_all_thread[thread_id] =
std::make_unique<float[]>(width_ * height_);
colorBuffer_all_thread[thread_id] =
std::make_unique<uint32_t[]>(width_ * height_);

std::fill_n(depthBuffer_all_thread[thread_id].get(), width_ * height_,
std::numeric_limits<float>::infinity());
std::fill_n(colorBuffer_all_thread[thread_id].get(), width_ * height_, 0);
}

#pragma omp parallel num_threads(kNProc) default(none) \
shared(processedVertices, rasterizer_, shader_, width_, height_, \
depthBuffer_all_thread, colorBuffer_all_thread) \
firstprivate(model)
{
int thread_id = omp_get_thread_num();
auto &depthBuffer_per_thread = depthBuffer_all_thread[thread_id];
auto &colorBuffer_per_thread = colorBuffer_all_thread[thread_id];
#pragma omp for
for (const auto &f : model.GetFaces()) {
auto v0 = processedVertices[f.GetIndex(0)];
auto v1 = processedVertices[f.GetIndex(1)];
auto v2 = processedVertices[f.GetIndex(2)];

const Material *material = &f.GetMaterial();

auto fragments = rasterizer_->Rasterize(v0, v1, v2);

for (auto &fragment : fragments) {
fragment.material = material;

size_t x = fragment.screen_coord[0];
size_t y = fragment.screen_coord[1];

if (x >= width_ || y >= height_) {
continue;
}

size_t index = x + y * width_;

if (fragment.depth < depthBuffer_per_thread[index]) {
depthBuffer_per_thread[index] = fragment.depth;

/* * * Fragment Shader * * */
auto color = shader_->FragmentShader(fragment);
colorBuffer_per_thread[index] = uint32_t(color);
}
}
}
}

// Merge
std::unique_ptr<float[]> depthBuffer =
std::make_unique<float[]>(width_ * height_);
std::unique_ptr<uint32_t[]> colorBuffer =
std::make_unique<uint32_t[]>(width_ * height_);

std::fill_n(depthBuffer.get(), width_ * height_,
std::numeric_limits<float>::infinity());
std::fill_n(colorBuffer.get(), width_ * height_, 0);

#pragma omp parallel for
for (size_t i = 0; i < width_ * height_; i++) {
float min_depth = std::numeric_limits<float>::infinity();
uint32_t color = 0;

for (size_t thread_id = 0; thread_id < kNProc; thread_id++) {
float depth = depthBuffer_all_thread[thread_id][i];
if (depth < min_depth) {
min_depth = depth;
color = colorBuffer_all_thread[thread_id][i];
}
}
depthBuffer[i] = min_depth;
colorBuffer[i] = color;
}

std::memcpy(buffer, colorBuffer.get(), width_ * height_ * sizeof(uint32_t));
}

/*
Organizes processing to simulate how OpenGL works with GPUs by collecting all
fragments per pixel before processing, closely mimicking the GPU pipeline but
leading to increased memory usage and slower performance.
组织处理方式模拟 OpenGL 在 GPU
上的工作原理,先收集每个像素的所有片段再并行处理屏幕上的每个像素,模仿 GPU
管线,但导致内存使用增加和渲染速度变慢
*/
void SimpleRenderer::DrawModelSlower(const Model &model, uint32_t *buffer) {
SPDLOG_INFO("draw {}", model.GetModelPath());

/* * * Vertex Shader * * */
std::vector<Vertex> processedVertex;
std::vector<std::vector<Vertex>> processed_vertices_per_thread(kNProc);
Expand Down Expand Up @@ -152,6 +272,6 @@ void SimpleRenderer::DrawModel(const Model &model, uint32_t *buffer) {
}
}
/* * * * * * * */
} // namespace simple_renderer
}

} // namespace simple_renderer

0 comments on commit 02caa9e

Please sign in to comment.