Skip to content

Commit

Permalink
Added AVX512 support for SkRasterPipeline_opts.h
Browse files Browse the repository at this point in the history
Hi,
The patch has integrated AVX512 support for certain functions in both highp and lowp. Testing and verification were conducted within the Pdfium repository, where it passed the pdfium_embeddertests.exe. Performance-wise, the AVX512 code path shows significant enhancement over the standard SSE and AVX2 paths. This performance boost was confirmed through testing with PDF files sourced from the resources folder of the Pdfium library.

This is an imported pull request from
google#149

GitOrigin-RevId: 3dfeb3b
Change-Id: I91f95a69d914ed57707239b7d2257a6c8f0c3ffa

This is an imported pull request from
google#151

GitOrigin-RevId: 354e9e0
Change-Id: Ia674977e3c1a083938bbfda1e9d785595896cb88
  • Loading branch information
pingladd committed Jan 2, 2024
1 parent c9c9ff7 commit a97503a
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 74 deletions.
62 changes: 37 additions & 25 deletions BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ config("skia_public") {
defines = [
"SK_DEFAULT_TYPEFACE_IS_EMPTY",
"SK_DISABLE_LEGACY_DEFAULT_TYPEFACE",

# TODO(305780908) re-enable after fixing Android and G3
#"SK_DISABLE_LEGACY_FONTMGR_FACTORY",
#"SK_DISABLE_LEGACY_FONTMGR_REFDEFAULT",
]
cflags_objcc = []
if (is_component_build) {
Expand Down Expand Up @@ -182,6 +186,19 @@ opts("hsw") {
}
}

opts("skx") {
enabled = is_x86
sources = skia_opts.skx_sources
if (is_win) {
cflags = [ "/arch:AVX512" ]
} else {
cflags = [ "-march=skylake-avx512" ]
if (is_mac && is_debug) {
cflags += [ "-fno-stack-check" ] # Work around skia:9709
}
}
}

# Any feature of Skia that requires third-party code should be optional and use this template.
template("optional") {
if (invoker.enabled) {
Expand Down Expand Up @@ -426,7 +443,7 @@ optional("fontmgr_FontConfigInterface_factory") {

optional("fontmgr_fuchsia") {
enabled = skia_enable_fontmgr_fuchsia

public_defines = [ "SK_FONTMGR_FUCHSIA_AVAILABLE" ]
deps = []

if (is_fuchsia && using_fuchsia_sdk) {
Expand Down Expand Up @@ -910,7 +927,6 @@ optional("gpu_shared") {

if (skia_use_dawn) {
public_defines += [ "SK_DAWN" ]
sources += skia_shared_dawn_sources

# When building for WASM, the WebGPU headers are provided by Emscripten. For native builds we
# have to depend on Dawn directly.
Expand All @@ -921,8 +937,8 @@ optional("gpu_shared") {
"//third_party/externals/dawn/src/dawn:proc",
]

if (dawn_enable_d3d12 || dawn_enable_desktop_gl || dawn_enable_metal ||
dawn_enable_opengles || dawn_enable_vulkan) {
if (dawn_enable_d3d12 || dawn_enable_d3d11 || dawn_enable_desktop_gl ||
dawn_enable_metal || dawn_enable_opengles || dawn_enable_vulkan) {
public_deps += [ "//third_party/externals/dawn/src/dawn/native" ]
}
if (dawn_enable_d3d12) {
Expand Down Expand Up @@ -959,6 +975,10 @@ optional("gpu_shared") {
public_defines += [ "SK_METAL" ]
sources += skia_shared_mtl_sources
}

if (is_android) {
sources += skia_shared_android_sources
}
}

optional("gpu") {
Expand Down Expand Up @@ -1230,7 +1250,8 @@ optional("graphite") {
}

optional("pdf") {
enabled = skia_use_zlib && skia_enable_pdf
enabled = skia_use_zlib && skia_enable_pdf && skia_use_libjpeg_turbo_decode &&
skia_use_libjpeg_turbo_encode
public_defines = [ "SK_SUPPORT_PDF" ]

deps = [ "//third_party/zlib" ]
Expand All @@ -1245,21 +1266,10 @@ optional("pdf") {
defines = [ "SK_PDF_USE_SFNTLY" ]
}

if (skia_use_libjpeg_turbo_decode) {
deps += [
":jpeg_decode",

# This is not a public_dep so we need to directly depend on it
# to use jpeg headers
"//third_party/libjpeg-turbo:libjpeg",
]
sources += [ "src/pdf/SkJpegInfo_libjpegturbo.cpp" ]
} else {
sources += [ "src/pdf/SkJpegInfo_none.cpp" ]
}
if (skia_use_libjpeg_turbo_encode) {
deps += [ ":jpeg_encode" ]
}
deps += [
":jpeg_decode",
":jpeg_encode",
]
}

optional("xps") {
Expand Down Expand Up @@ -1494,6 +1504,7 @@ skia_component("skia") {
":fontmgr_factory",
":heif",
":hsw",
":skx",
":jpeg_decode",
":jpegxl_decode",
":minify_sksl",
Expand Down Expand Up @@ -2023,7 +2034,11 @@ if (skia_enable_tools) {
"X11",
]
} else if (is_win) {
sources += [ "tools/gpu/gl/win/CreatePlatformGLTestContext_win.cpp" ]
sources += [
"tools/gpu/gl/win/CreatePlatformGLTestContext_win.cpp",
"tools/gpu/gl/win/SkWGL.h",
"tools/gpu/gl/win/SkWGL_win.cpp",
]
libs += [ "Gdi32.lib" ]
if (target_cpu != "arm64") {
libs += [ "OpenGL32.lib" ]
Expand Down Expand Up @@ -2680,10 +2695,7 @@ if (skia_enable_tools) {
"tools/skdiff/skdiff_main.cpp",
"tools/skdiff/skdiff_utils.cpp",
]
deps = [
":skia",
":tool_utils",
]
deps = [ ":skia" ]
}

test_app("skp_parser") {
Expand Down
1 change: 1 addition & 0 deletions gn/opts.gni
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
_src = get_path_info("../src", "abspath")

hsw = [ "$_src/opts/SkOpts_hsw.cpp" ]
skx = [ "$_src/opts/SkOpts_skx.cpp" ]
1 change: 1 addition & 0 deletions gn/shared_sources.gni
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ import("xml.gni")
import("xps.gni")
skia_opts = {
hsw_sources = hsw
skx_sources = skx
}
12 changes: 10 additions & 2 deletions src/core/SkOpts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,23 @@ namespace SkOpts {
StageFn ops_highp[] = { SK_RASTER_PIPELINE_OPS_ALL(M) };
StageFn just_return_highp = (StageFn)SK_OPTS_NS::just_return;
void (*start_pipeline_highp)(size_t, size_t, size_t, size_t, SkRasterPipelineStage*,
SkSpan<SkRasterPipeline_MemoryCtxPatch>) =
SkSpan<SkRasterPipeline_MemoryCtxPatch>,
uint8_t*) =
SK_OPTS_NS::start_pipeline;
#undef M

#define M(st) (StageFn)SK_OPTS_NS::lowp::st,
StageFn ops_lowp[] = { SK_RASTER_PIPELINE_OPS_LOWP(M) };
StageFn just_return_lowp = (StageFn)SK_OPTS_NS::lowp::just_return;
void (*start_pipeline_lowp)(size_t, size_t, size_t, size_t, SkRasterPipelineStage*,
SkSpan<SkRasterPipeline_MemoryCtxPatch>) =
SkSpan<SkRasterPipeline_MemoryCtxPatch>,
uint8_t*) =
SK_OPTS_NS::lowp::start_pipeline;
#undef M

// Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
void Init_hsw();
void Init_skx();

static bool init() {
#if defined(SK_ENABLE_OPTIMIZE_SIZE)
Expand All @@ -51,6 +54,11 @@ namespace SkOpts {
#if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_AVX2
if (SkCpu::Supports(SkCpu::HSW)) { Init_hsw(); }
#endif

#if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_SKX
if (SkCpu::Supports(SkCpu::SKX)) { Init_skx(); }
#endif

#endif
return true;
}
Expand Down
73 changes: 41 additions & 32 deletions src/core/SkRasterPipelineOpContexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ namespace SkSL { class TraceHook; }
// by stages that have no lowp implementation. They can therefore use the (smaller) highp value to
// save memory in the arena.
inline static constexpr int SkRasterPipeline_kMaxStride = 16;
inline static constexpr int SkRasterPipeline_kMaxStride_highp = 16;
inline static constexpr int SkRasterPipeline_kMaxStride_highp = 8;
inline static constexpr int SkRasterPipeline_kMaxStride_highp_skx = 16;

// How much space to allocate for each MemoryCtx scratch buffer, as part of tail-pixel handling.
inline static constexpr size_t SkRasterPipeline_MaxScratchPerPatch =
std::max(SkRasterPipeline_kMaxStride_highp * 16, // 16 == largest highp bpp (RGBA_F32)
std::max(SkRasterPipeline_kMaxStride_highp_skx * 16, // 16 == largest highp bpp (RGBA_F32)
SkRasterPipeline_kMaxStride * 4); // 4 == largest lowp bpp (RGBA_8888)

// These structs hold the context data for many of the Raster Pipeline ops.
Expand Down Expand Up @@ -74,17 +75,17 @@ struct SkRasterPipeline_GatherCtx {

// State shared by save_xy, accumulate, and bilinear_* / bicubic_*.
struct SkRasterPipeline_SamplerCtx {
float x[SkRasterPipeline_kMaxStride_highp];
float y[SkRasterPipeline_kMaxStride_highp];
float fx[SkRasterPipeline_kMaxStride_highp];
float fy[SkRasterPipeline_kMaxStride_highp];
float scalex[SkRasterPipeline_kMaxStride_highp];
float scaley[SkRasterPipeline_kMaxStride_highp];
float x[SkRasterPipeline_kMaxStride_highp_skx];
float y[SkRasterPipeline_kMaxStride_highp_skx];
float fx[SkRasterPipeline_kMaxStride_highp_skx];
float fy[SkRasterPipeline_kMaxStride_highp_skx];
float scalex[SkRasterPipeline_kMaxStride_highp_skx];
float scaley[SkRasterPipeline_kMaxStride_highp_skx];

// for bicubic_[np][13][xy]
float weights[16];
float wx[4][SkRasterPipeline_kMaxStride_highp];
float wy[4][SkRasterPipeline_kMaxStride_highp];
float wx[4][SkRasterPipeline_kMaxStride_highp_skx];
float wy[4][SkRasterPipeline_kMaxStride_highp_skx];
};

struct SkRasterPipeline_TileCtx {
Expand All @@ -111,14 +112,14 @@ struct SkRasterPipeline_DecalTileCtx {
// State used by mipmap_linear_*
struct SkRasterPipeline_MipmapCtx {
// Original coords, saved before the base level logic
float x[SkRasterPipeline_kMaxStride_highp];
float y[SkRasterPipeline_kMaxStride_highp];
float x[SkRasterPipeline_kMaxStride_highp_skx];
float y[SkRasterPipeline_kMaxStride_highp_skx];

// Base level color
float r[SkRasterPipeline_kMaxStride_highp];
float g[SkRasterPipeline_kMaxStride_highp];
float b[SkRasterPipeline_kMaxStride_highp];
float a[SkRasterPipeline_kMaxStride_highp];
float r[SkRasterPipeline_kMaxStride_highp_skx];
float g[SkRasterPipeline_kMaxStride_highp_skx];
float b[SkRasterPipeline_kMaxStride_highp_skx];
float a[SkRasterPipeline_kMaxStride_highp_skx];

// Scale factors to transform base level coords to lower level coords
float scaleX;
Expand All @@ -138,22 +139,22 @@ struct SkRasterPipeline_CallbackCtx {

// When called, fn() will have our active pixels available in rgba.
// When fn() returns, the pipeline will read back those active pixels from read_from.
float rgba[4*SkRasterPipeline_kMaxStride_highp];
float rgba[4*SkRasterPipeline_kMaxStride_highp_skx];
float* read_from = rgba;
};

// state shared by stack_checkpoint and stack_rewind
struct SkRasterPipelineStage;

struct SkRasterPipeline_RewindCtx {
float r[SkRasterPipeline_kMaxStride_highp];
float g[SkRasterPipeline_kMaxStride_highp];
float b[SkRasterPipeline_kMaxStride_highp];
float a[SkRasterPipeline_kMaxStride_highp];
float dr[SkRasterPipeline_kMaxStride_highp];
float dg[SkRasterPipeline_kMaxStride_highp];
float db[SkRasterPipeline_kMaxStride_highp];
float da[SkRasterPipeline_kMaxStride_highp];
float r[SkRasterPipeline_kMaxStride_highp_skx];
float g[SkRasterPipeline_kMaxStride_highp_skx];
float b[SkRasterPipeline_kMaxStride_highp_skx];
float a[SkRasterPipeline_kMaxStride_highp_skx];
float dr[SkRasterPipeline_kMaxStride_highp_skx];
float dg[SkRasterPipeline_kMaxStride_highp_skx];
float db[SkRasterPipeline_kMaxStride_highp_skx];
float da[SkRasterPipeline_kMaxStride_highp_skx];
std::byte* base;
SkRasterPipelineStage* stage;
};
Expand Down Expand Up @@ -192,14 +193,18 @@ struct SkRasterPipeline_TablesCtx {

using SkRPOffset = uint32_t;

struct SkRasterPipeline_InitLaneMasksCtx {
uint8_t* tail;
};

struct SkRasterPipeline_ConstantCtx {
float value;
SkRPOffset dst;
};

struct SkRasterPipeline_UniformCtx {
float *dst;
const float *src;
float* dst;
const float* src;
};

struct SkRasterPipeline_BinaryOpCtx {
Expand Down Expand Up @@ -227,20 +232,20 @@ struct SkRasterPipeline_SwizzleCtx {
};

struct SkRasterPipeline_ShuffleCtx {
float *ptr;
float* ptr;
int count;
uint16_t offsets[16]; // values must be byte offsets (4 * highp-stride * component-index)
};

struct SkRasterPipeline_SwizzleCopyCtx {
float *dst;
float *src; // src values must _not_ overlap dst values
float* dst;
float* src; // src values must _not_ overlap dst values
uint16_t offsets[4]; // values must be byte offsets (4 * highp-stride * component-index)
};

struct SkRasterPipeline_CopyIndirectCtx {
float *dst;
const float *src;
float* dst;
const float* src;
const uint32_t *indirectOffset; // this applies to `src` or `dst` based on the op
uint32_t indirectLimit; // the indirect offset is clamped to this upper bound
uint32_t slots; // the number of slots to copy
Expand All @@ -254,6 +259,10 @@ struct SkRasterPipeline_BranchCtx {
int offset; // contains the label ID during compilation, and the program offset when compiled
};

struct SkRasterPipeline_BranchIfAllLanesActiveCtx : public SkRasterPipeline_BranchCtx {
uint8_t* tail = nullptr; // lanes past the tail are _never_ active, so we need to exclude them
};

struct SkRasterPipeline_BranchIfEqualCtx : public SkRasterPipeline_BranchCtx {
int value;
const int* ptr;
Expand Down
34 changes: 34 additions & 0 deletions src/opts/SkOpts_skx.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright 2018 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/

#include "src/core/SkOpts.h"

#if !defined(SK_ENABLE_OPTIMIZE_SIZE)

#define SK_OPTS_NS skx
#include "src/opts/SkRasterPipeline_opts.h"

namespace SkOpts {
void Init_skx() {
raster_pipeline_lowp_stride = SK_OPTS_NS::raster_pipeline_lowp_stride();
raster_pipeline_highp_stride = SK_OPTS_NS::raster_pipeline_highp_stride();

#define M(st) ops_highp[(int)SkRasterPipelineOp::st] = (StageFn)SK_OPTS_NS::st;
SK_RASTER_PIPELINE_OPS_ALL(M)
just_return_highp = (StageFn)SK_OPTS_NS::just_return;
start_pipeline_highp = SK_OPTS_NS::start_pipeline;
#undef M

#define M(st) ops_lowp[(int)SkRasterPipelineOp::st] = (StageFn)SK_OPTS_NS::lowp::st;
SK_RASTER_PIPELINE_OPS_LOWP(M)
just_return_lowp = (StageFn)SK_OPTS_NS::lowp::just_return;
start_pipeline_lowp = SK_OPTS_NS::lowp::start_pipeline;
#undef M
}
} // namespace SkOpts

#endif // SK_ENABLE_OPTIMIZE_SIZE
Loading

0 comments on commit a97503a

Please sign in to comment.