diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..6313b56c57 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf diff --git a/.github/ISSUE_TEMPLATE/1-browser.md b/.github/ISSUE_TEMPLATE/1-browser.md new file mode 100644 index 0000000000..9a12f1c372 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1-browser.md @@ -0,0 +1,26 @@ +--- +name: Browser/implementation bug +about: 'For bugs in a WebGPU implementation, please file against the relevant browser/implementation instead' +title: '' +labels: 'invalid' +assignees: '' + +--- + +**Please do not file browser/implementation bugs on this GitHub repository.** +Instead, file a bug against the relevant browser. + +Chrome (or Dawn/Tint): + Known issues: + https://issues.chromium.org/savedsearches/6760928 + To file a new issue: + https://issues.chromium.org/issues/new?noWizard=true&component=1456980 + +WebKit: + https://bugs.webkit.org/buglist.cgi?bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&component=WebGPU + +Firefox: + https://bugzilla.mozilla.org/buglist.cgi?product=Core&component=Graphics%3A%20WebGPU + +See also: + [Implementation Status](https://github.com/gpuweb/gpuweb/wiki/Implementation-Status) wiki page diff --git a/.github/ISSUE_TEMPLATE/2-question.md b/.github/ISSUE_TEMPLATE/2-question.md new file mode 100644 index 0000000000..73da8a67eb --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2-question.md @@ -0,0 +1,11 @@ +--- +name: Question about WebGPU +about: 'Question? Please open a GitHub "Discussion" instead' +title: '' +labels: 'invalid' +assignees: '' + +--- + +If you have a Q&A style question about using WebGPU, please use a GitHub "Discussion": +https://github.com/gpuweb/gpuweb/discussions diff --git a/.github/ISSUE_TEMPLATE/3-webgpu.md b/.github/ISSUE_TEMPLATE/3-webgpu.md new file mode 100644 index 0000000000..25261061b4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/3-webgpu.md @@ -0,0 +1,8 @@ +--- +name: WebGPU API spec issue (not WGSL) +about: 'API standardization/specification issue' +title: '' +labels: 'api' +assignees: '' + +--- diff --git a/.github/ISSUE_TEMPLATE/4-wgsl.md b/.github/ISSUE_TEMPLATE/4-wgsl.md new file mode 100644 index 0000000000..62bcbb4b63 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/4-wgsl.md @@ -0,0 +1,8 @@ +--- +name: WGSL spec issue +about: 'WebGPU Shading Language standardization/specification issue' +title: '' +labels: 'wgsl' +assignees: '' + +--- diff --git a/.github/ISSUE_TEMPLATE/5-other.md b/.github/ISSUE_TEMPLATE/5-other.md new file mode 100644 index 0000000000..448b14d779 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/5-other.md @@ -0,0 +1,8 @@ +--- +name: Misc, reference docs, wiki, etc. +about: 'Something other than WebGPU/WGSL spec - reference docs, wiki, GitHub infra, etc.' +title: '' +labels: '' +assignees: '' + +--- diff --git a/.github/ISSUE_TEMPLATE/browser.md b/.github/ISSUE_TEMPLATE/browser.md deleted file mode 100644 index 6dd9786f5a..0000000000 --- a/.github/ISSUE_TEMPLATE/browser.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -name: Bug -about: 'Bug - please file against relevant browser instead' -title: '' -labels: '' -assignees: '' - ---- - -**Please do not file bugs on this GitHub repository.** -Instead, file a bug against the relevant browser. - -Chrome: - Known issues: https://bugs.chromium.org/p/chromium/issues/list?q=component:Blink%3EWebGPU - To file a new issue: https://bugs.chromium.org/p/chromium/issues/entry?components=Blink%3EWebGPU - -WebKit: -https://bugs.webkit.org/buglist.cgi?bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&component=WebGPU - -Firefox: -https://bugzilla.mozilla.org/buglist.cgi?product=Core&component=Graphics%3A%20WebGPU - -See also: [implementation status](https://github.com/gpuweb/gpuweb/wiki/Implementation-Status). diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000000..ea56ea0f84 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,2 @@ +# "webgpu.md" is the catch-all template, we don't need a "blank issue" button +blank_issues_enabled: false diff --git a/.github/ISSUE_TEMPLATE/issue.md b/.github/ISSUE_TEMPLATE/issue.md deleted file mode 100644 index e9f2e1909b..0000000000 --- a/.github/ISSUE_TEMPLATE/issue.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -name: WebGPU spec issue -about: 'Standardization/specification issue' -title: '' -labels: '' -assignees: '' - ---- - - diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md deleted file mode 100644 index 8f6e199050..0000000000 --- a/.github/ISSUE_TEMPLATE/question.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -name: WebGPU question. -about: 'Browser-non-specific WebGPU question' -title: '' -labels: 'question' -assignees: '' - ---- - - -If you have a Q&A style question about using WebGPU, consider using a GitHub "Discussion". diff --git a/.github/ISSUE_TEMPLATE/wgsl.md b/.github/ISSUE_TEMPLATE/wgsl.md deleted file mode 100644 index 61dd038832..0000000000 --- a/.github/ISSUE_TEMPLATE/wgsl.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -name: WebGPU Shading Language Issue. -about: 'WebGPU Shading Language Issues' -title: '' -labels: 'wgsl' -assignees: '' - ---- diff --git a/.github/workflows/build-push-custom-image.yml b/.github/workflows/build-push-custom-image.yml index dd1356b53d..f87c067f9f 100644 --- a/.github/workflows/build-push-custom-image.yml +++ b/.github/workflows/build-push-custom-image.yml @@ -51,7 +51,8 @@ jobs: - name: Build and push Docker image uses: docker/build-push-action@v5.1.0 with: - context: tools/custom-action + context: . + file: tools/custom-action/Dockerfile platforms: linux/amd64,linux/arm64 push: true tags: ${{ steps.meta.outputs.tags }} diff --git a/.github/workflows/build-validate-publish.yml b/.github/workflows/build-validate-publish.yml index be00c85116..88862bb420 100644 --- a/.github/workflows/build-validate-publish.yml +++ b/.github/workflows/build-validate-publish.yml @@ -10,13 +10,13 @@ name: build-validate-publish on: pull_request: paths-ignore: [ "tools/custom-action/Dockerfile" ] - + push: branches: [main] paths-ignore: - "tools/custom-action/Dockerfile" - "tools/custom-action/entrypoint.sh" - + # Allows admins to trigger the workflow manually from GitHub's UI. workflow_dispatch: diff --git a/.github/workflows/preview-pull-request.yml b/.github/workflows/preview-pull-request.yml index 335615f402..3d38e3a197 100644 --- a/.github/workflows/preview-pull-request.yml +++ b/.github/workflows/preview-pull-request.yml @@ -53,7 +53,7 @@ jobs: uses: ./tools/custom-action/ with: check-repo-clean: 'OFF' - + # Adjusts Bikeshed specs - name: Adjust Bikeshed if: ${{ github.event.workflow_run.event == 'pull_request' && env.PR }} diff --git a/.github/workflows/publish-TR-webgpu.yml b/.github/workflows/publish-TR-webgpu.yml index 44deb0f047..3dc6da5ddd 100644 --- a/.github/workflows/publish-TR-webgpu.yml +++ b/.github/workflows/publish-TR-webgpu.yml @@ -34,5 +34,4 @@ jobs: W3C_ECHIDNA_TOKEN: ${{ secrets.ECHIDNA_TOKEN_WEBGPU }} W3C_WG_DECISION_URL: https://lists.w3.org/Archives/Public/public-gpu/2021Apr/0004.html W3C_BUILD_OVERRIDE: | - group: gpuwg - status: WD + status: CRD diff --git a/.github/workflows/publish-TR-wgsl.yml b/.github/workflows/publish-TR-wgsl.yml index a0481d5ff0..50ac413d46 100644 --- a/.github/workflows/publish-TR-wgsl.yml +++ b/.github/workflows/publish-TR-wgsl.yml @@ -34,5 +34,4 @@ jobs: W3C_ECHIDNA_TOKEN: ${{ secrets.ECHIDNA_TOKEN_WGSL }} W3C_WG_DECISION_URL: https://lists.w3.org/Archives/Public/public-gpu/2021Apr/0004.html W3C_BUILD_OVERRIDE: | - group: gpuwg - status: WD + status: CRD diff --git a/.gitignore b/.gitignore index f3bb780865..6089155d43 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,29 @@ out/ spec/index.html spec/index.pre.html spec/webgpu.idl -wgsl/grammar/ +wgsl/grammar/** +!wgsl/grammar/bindings +!wgsl/grammar/bindings/c +!wgsl/grammar/bindings/c/tree-sitter-wgsl.pc.in +!wgsl/grammar/bindings/go +!wgsl/grammar/bindings/go/binding.go +!wgsl/grammar/bindings/python +!wgsl/grammar/bindings/python/tree_sitter_wgsl +!wgsl/grammar/bindings/python/tree_sitter_wgsl/__init__.py +!wgsl/grammar/bindings/rust +!wgsl/grammar/bindings/rust/build.rs +!wgsl/grammar/src +!wgsl/grammar/src/scanner.c +!wgsl/grammar/.gitignore +!wgsl/grammar/binding.gyp +!wgsl/grammar/Cargo.toml +!wgsl/grammar/go.mod +!wgsl/grammar/Makefile +!wgsl/grammar/package.json +!wgsl/grammar/Package.swift +!wgsl/grammar/pyproject.toml +!wgsl/grammar/README.md +!wgsl/grammar/setup.py wgsl/index.html wgsl/index.pre.html wgsl/index.bs.pre diff --git a/.vscode/settings.json b/.vscode/settings.json index a743ef65b5..4757593b6f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -9,6 +9,9 @@ "files.associations": { "*.bs.include": "bikeshed", }, + "files.eol": "\n", + "files.trimTrailingWhitespace": true, + "files.insertFinalNewline": true, "[bikeshed]": { "editor.detectIndentation": false, "editor.indentSize": 4, diff --git a/correspondence/img/.gitignore b/correspondence/img/.gitignore index e69de29bb2..8b13789179 100644 --- a/correspondence/img/.gitignore +++ b/correspondence/img/.gitignore @@ -0,0 +1 @@ + diff --git a/correspondence/index.bs b/correspondence/index.bs index 86643cbd18..eb4092d07f 100644 --- a/correspondence/index.bs +++ b/correspondence/index.bs @@ -2,7 +2,7 @@ Title: WebGPU Correspondence Reference Shortname: webgpu-correspondence Level: None -Status: LD +Status: UD Group: webgpu URL: https://gpuweb.github.io/gpuweb/correspondence/ !Participate: File an issue (open issues) @@ -151,6 +151,17 @@ User agents are not required to use these formulas and may expose whatever they When not using argument buffers, `maxStorageBuffersPerShaderStage` Issue: When using argument buffers: ? + + `maxBindGroupsPlusVertexBuffers` + [#2749](https://github.com/gpuweb/gpuweb/issues/2749) + No limit. Choose e.g. `max(default, maxBindGroups + maxVertexBuffers)`. + *Strategy-dependent.* When using argument buffers, `Maximum number of _____ you can access, per stage, from an argument buffer` + No limit. Choose e.g. `max(default, maxBindGroups + maxVertexBuffers)`. + + `maxBindingsPerBindGroup` + [#3279](https://github.com/gpuweb/gpuweb/issues/3279), + [#3864](https://github.com/gpuweb/gpuweb/issues/3864) + Limit is arbitrary to allow implementations to treat binding space as an array. `maxSamplersPerShaderStage` [#409](https://github.com/gpuweb/gpuweb/issues/409) @@ -166,12 +177,14 @@ User agents are not required to use these formulas and may expose whatever they `maxStorageTexturesPerShaderStage` [#409](https://github.com/gpuweb/gpuweb/issues/409) - `maxPerStageDescriptorStorageImages` + *Strategy-dependent.* Choose a value ≤ `maxPerStageDescriptorStorageImages` + while adhering to [[#vulkan-maxFragmentCombinedOutputResources]]. *Strategy-dependent.* Allocate `Maximum number of Unordered Access Views in all descriptor tables across all stages` (guaranteed to be 64) across stages across these two limits. For example, 32 for each shader stage, split as 16 textures and 16 buffers per shader stage. `maxStorageBuffersPerShaderStage` [#409](https://github.com/gpuweb/gpuweb/issues/409) - `maxPerStageDescriptorStorageBuffers` + *Strategy-dependent.* Choose a value ≤ `maxPerStageDescriptorStorageBuffers` + while adhering to [[#vulkan-maxFragmentCombinedOutputResources]]. *Strategy-dependent.* Allocate `Maximum number of entries in the buffer argument table, per graphics or kernel function` across these three limits. `maxUniformBuffersPerShaderStage` @@ -218,25 +231,29 @@ User agents are not required to use these formulas and may expose whatever they `maxVertexInputBindingStride` *No documented limit?* 2048 B = `D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES` - - `maxInterStageShaderComponents` - [#1962](https://github.com/gpuweb/gpuweb/issues/1962) - `min(maxVertexOutputComponents, maxFragmentInputComponents)` - `Maximum number of input components to a fragment function, declared with the stage_in qualifier`, subtract 4 for non-Apple GPUs - 120 = `maxInterStageShaderVariables * 4` `maxInterStageShaderVariables` - [#1962](https://github.com/gpuweb/gpuweb/issues/1962) + [#1962](https://github.com/gpuweb/gpuweb/issues/1962#issuecomment-1136316791) `min(maxVertexOutputComponents // 4, maxFragmentInputComponents // 4)` - `Maximum number of inputs (scalars or vectors) to a fragment function, declared with the stage_in qualifier`, subtract 2 for non-Apple GPUs + Min of: + + - `Maximum scalar or vector inputs to a fragment function`, subtract 2 for non-Apple GPUs + - `(Maximum number of input components to a fragment function) / 4`, subtract 1 for non-Apple GPUs 30 = `min(D3D12_VS_OUTPUT_REGISTER_COUNT - 1, D3D12_PS_INPUT_REGISTER_COUNT - 2)` `maxColorAttachments` [#2820](https://github.com/gpuweb/gpuweb/issues/2820) - `min(maxColorAttachments, maxFragmentOutputAttachments, maxFragmentCombinedOutputResources)` - `Maximum number of color render targets per render - pass descriptor` + *Strategy-dependent.* Choose a value ≤ `min(maxColorAttachments, maxFragmentOutputAttachments)` + while adhering to [[#vulkan-maxFragmentCombinedOutputResources]]. + `Maximum number of color render targets per render pass descriptor` 8 = `D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT` + + `maxColorAttachmentBytesPerSample` + [#2965](https://github.com/gpuweb/gpuweb/issues/2965) +

*No documented limit?* + Mostly `Maximum total render target size, per pixel, when using multiple color render targets`, + but it's [a bit more complicated than that](https://github.com/gpuweb/gpuweb/issues/2965#issuecomment-1223792432) +

*No documented limit?* `maxComputeWorkgroupStorageSize` [#1863](https://github.com/gpuweb/gpuweb/issues/1863) @@ -271,3 +288,16 @@ User agents are not required to use these formulas and may expose whatever they

*No documented limit?* 65535 = `D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION` + +## Vulkan `maxFragmentCombinedOutputResources` ## {#vulkan-maxFragmentCombinedOutputResources} + +Choose `maxStorageBuffersPerShaderStage`, `maxStorageTexturesPerShaderStage`, and `maxColorAttachments` +such that their sum is ≤ Vulkan's `maxFragmentCombinedOutputResources`. + +

+Warning: +`maxFragmentCombinedOutputResources` is incorrectly reported on many +[Intel, AMD, NVIDIA](https://github.com/gpuweb/gpuweb/issues/4018#issuecomment-1499725189), and +[Imagination](https://github.com/gpuweb/gpuweb/issues/3631#issuecomment-1498747606) drivers. +On these drivers, the combined limit may need to be ignored. +

diff --git a/design/AdapterIdentifiers.md b/design/AdapterIdentifiers.md index 9a90e4c098..dd913c6711 100644 --- a/design/AdapterIdentifiers.md +++ b/design/AdapterIdentifiers.md @@ -1,217 +1,222 @@ -# WebGPU Adapter Identifiers - -## Introduction - -The WebGL extension [WEBGL_debug_renderer_info](https://www.khronos.org/registry/webgl/extensions/WEBGL_debug_renderer_info/) reports identifying information about a device's graphics driver for the purposes of debugging or detection and avoidance of bugs or performance pitfalls on a particular driver or piece of hardware. - -These identifiers have proven to be a valuable tool for developers over the years (See [Appendix B: Motivating real-world use cases](#Appendix-B-Motivating-real-world-use-cases)), but have also been observed to be frequently used as a source of high-entropy fingerprinting data. Additionally, the format that WebGL returns the identifiers in (a string of undefined structure) is difficult to work with, akin to the user agent string. - -For WebGPU we need mechanisms which report similar data about the GPU hardware (called an "adapter" in WebGPU) to enable legitimate development use cases, such as driver bug workarounds, while minimizing the amount of fingerprintable data that is exposed without user consent. This document will refer to that data as "adapter identifiers". - -## Use Cases: - -### Bug workarounds -A WebGPU developer wants to ensure that their content works on all devices, but is aware of a bug on a specific family of GPUs that causes corrupted rendering. Using a minimal subset of adapter identifiers they can identify when a user's GPUs is part of a group which includes the known-buggy hardware and switch to a slower code path that doesn't provoke the issue. - -### Filing issue reports -A WebGPU developer has included a "Report an issue" button on their page. Normally they have found that they need very little adapter information to operate, but when users experience a problem they want to gather as much data as they can about the problem. On the report filing page they include UI to allow the user to include their GPU information in the report, which when checked causes the browser to confirm that they want to let the page know their full adapter details. - -### Performance optimization -A WebGPU developer wants all users to experience good performance on their page, but has developed some effects that are not practical on mobile GPUs. They check the adapter identifiers on page load to get a broad idea of what family of GPU the user has to start them off with a reasonable set of defaults. On a settings page, however, they can include a button which detects the best settings for their device. Clicking it may prompt the user for consent to see more detailed GPU information so that ideal settings for their device can be selected. - -### WebGPU developer community assets -A common and useful asset for developers is sites such as https://gpuinfo.org/, which visualize your current devices capabilities in an easy to read format and (with user's consent) can collect information about GPU capabilities to report in aggregate to other developers, giving them a sense of how widespread various capabilities are. Offering a way for users to opt-in to contributing to such a database is desirable. - -## Goals: - - Offer a mechanism to report GPU adapter identifiers in a scalable way. - - Allow for reporting no information (tracking prevention modes, privacy-oriented UAs). - - Enable UAs to decide for themselves how much information to expose by default. - - Allow developers to have some input on how much information they need, especially with respect to triggering user prompts. - - Any such feature needs to be invokable late in the device lifetime, to allow for cases like filing bug reports. - - Developers need to know when a call may cause a user prompt to be shown so that they can avoid that path if desired. - - Offer control over how much data is exposed to embedded content/iframes. - - Minimize string parsing for accuracy and developer convenience. - -## API usage - -> Full details of how to use WebGPU will not be covered here. Please refer to the [WebGPU explainer](https://gpuweb.github.io/gpuweb/explainer/) or [WebGPU spec](https://gpuweb.github.io/gpuweb) for further information. - -### Masked adapter identifiers - -The first step when using WebGPU is to query a `GPUAdapter`, of which there may be several available to the system. This will typically correspond to a physical or software emulated GPU. - -```js -const gpuAdapter = await navigator.gpu.requestAdapter(); -``` - -WebGPU applications often require a significant amount of resource initialization at startup, and it's possible that the resources being initialized may need to be altered depending on the adapter in use. For example: Shader sources may need to be re-written to avoid a known bug or lower detail meshes and textures may need to be fetched to avoid overtaxing a slower device. In these cases some amount of adapter identifiers need to be queried very early in the application's lifetime, and preferably without invoking a user consent prompt. (Nobody likes to be asked for permission immediately on navigation, at which point they likely have little to know context for why the permission is needed.) - -In this case, the developer would call the `requestAdapterInfo()` method of the `GPUAdapter`, which returns a `GPUAdapterInfo` interface containing several potential identifiers for the adapter, and may contain values similar to the following: - -```js -const adapterInfo = await gpuAdapter.requestAdapterInfo(); -console.log(adapterInfo); - -// Output: -{ - vendor: 'nvidia', - architecture: 'turing', - device: '', - description: '' -} -``` - -Note that some values of the interface are the empty string, because the UA deemed that they were too high-entropy to return without explicit user consent. If the UA wished, it would have the ability to return empty string for all values. This would be most commonly expected in "enhanced privacy" modes like [Edge's strict tracking prevention](https://support.microsoft.com/en-us/microsoft-edge/learn-about-tracking-prevention-in-microsoft-edge-5ac125e8-9b90-8d59-fa2c-7f2e9a44d869) or [Firefox's Enhanced Tracking Protection](https://support.mozilla.org/en-US/kb/enhanced-tracking-protection-firefox-desktop). Ideally returning little to no identifiers is common enough that user agents that wish to expose very little information by default can do so without severe compatibility concerns. - -The information that _is_ returned should be helpful in identifying broad buckets of adapters with similar capabilities and performance characteristics. For example, Nvidia's "Turing" architecture [covers a range of nearly 40 different GPUs](https://en.wikipedia.org/wiki/Turing_(microarchitecture)#Products_using_Turing) across a wide range of prices and form factors. Identifing the adapter as an Turing device is enough to allow developers to activate broad workarounds aimed at that family of hardware and make some assumptions about baseline performance, but is also broad enough to not give away too much identifiable information about the user. - -Additionally, in some cases the UA may find it beneficial to return a value that is not the most accurate one that could be reported but still gives developers a reasonable reference point with a lower amount of entropy. - -Finally, it may not always be possible or practical to detemine a value for some fields (like a GPU's architecture) and in those cases returning empty string is acceptible even if the user agent would have considered the information low-entropy. - -### Unmasked adapter identifiers - -At some point during the lifetime of the application the developer may determine that they need more information about the user's specific adapter. A common scenario would be filing a bug report. The developer will be able to best respond to the user's issue if they know exactly what device is being used. In this case, they can request an "unmasked" version any fields of the `GPUAdapterInfo`: - -```js -feedbackButton.addEventListener('click', async ()=> { - const unmaskHints = ['architecture', 'device', 'description']; - const unmaskedAdapterInfo = await gpuAdapter.requestAdapterInfo(unmaskHints); - generateUserFeedback(unmaskedAdapterInfo); -}); -``` - -The resolved value is the adapter's `GPUAdapterInfo` with any fields specified by `unmaskHints` that were previously omitted or reported with a less accurate value now populated with the most accurate information the UA will deliver. For example: - -```js -console.log(unmaskedAdapterInfo); - -// Output: -{ - vendor: 'nvidia', - architecture: 'turing', - device: '0x8644', - description: 'NVIDIA GeForce GTX 1660 SUPER' -} -``` - -Because the unmasked values may contain higher entropy identifying information, the bar for querying it is quite a bit higher. Calling `requestAdapterInfo()` with any `unmaskHints` requires user activation, and will reject the promise otherwise. If the `unmaskHints` array contains any previously masked value it also requires that user consent be given before returning, and as such may display a prompt to the user asking if the page can access the newly requested GPU details before allowing the promise to resolve. If the user declines to give consent then the promise is rejected. - -Once the user has given their consent any future calls to `requestAdapterInfo()` should return the unmasked fields even if no `unmaskHints` are specified, and future instances of the same underlying adapter returned from `navigator.gpu.requestAdapter()` on that page load should also return unmasked data without requiring hints to be passed. - -Even after `unmaskHints` have been passed to `requestAdapterInfo()` the UA is still allowed to return empty string for attributes requested in the `unmaskHints` array if the UA cannot determine the value in question or decides not to reveal it. (UAs should not request user consent when unmasking is requested for attributes that will be left empty.) - -### Identifier formatting - -To minimize developer work and reduce the chances of fingerprinting via casing differences between platforms, and string values reported as part of the `GPUAdapterInfo` conform to strict formatting rules. They must be lowercase ASCII strings containing no spaces, with separate words concatenated with a hyphen ("-") character. - -The exception to this is `description`, which may be a string reported directly from the driver without modification. As a result, however, `description` should always be omitted from masked adapters. Additionally, enough information should be offered via other fields that developers don't feel the need to attempt parsing the `description` string. - -User agents should also make an effort to normalize the strings returned, ideally through a public registery. This especially applies to fields like `vendor` which are presumed to have a relatively low number of possible values. - -Some values, such as `architecture`, are unlikely to be directly provided by the driver. As such, User Agents are expected to make a best-effort at identifying and reporting common architectures, and report empty string otherwise. - -### Iframe controls - -In addition to using the above mechanisms to hit a balance between offering developers useful information and mitigating fingerprinting concerns, [Permissions Policy](https://w3c.github.io/webappsec-permissions-policy/) should be used to control whether or not WebGPU features are exposed to iframes. - -The recommended feature identifier is `"webgpu"`, and the [default allowlist](https://w3c.github.io/webappsec-permissions-policy/#default-allowlist) for this feature would be `["self"]`. This allows documents from the top level browsing context use the feature by default, but requires documents included in iframes to be explicitly granted permission from the top level context in order to use WebGPU, like so: - -```html - -``` - -If the `"webgpu"` feature is not granted to a page, all calls that page makes to `navigator.gpu.requestAdapter()` will resolve to `null`. - -This helps strike a balance between enabling powerful rendering and computation capabilities on the web and a desire to mitigate abuse by bad actors. - -## Proposed IDL - -```webidl -partial interface GPUAdapter { - Promise requestAdapterInfo(optional sequence unmaskHints = []); -}; - -interface GPUAdapterInfo { - DOMString vendor; - DOMString architecture; - DOMString device; - DOMString description; -}; -``` - -## Appendix A: Alternatives considered - -### A single identifier string -Previously the WebGPU spec had a single string identifier, `GPUAdapter.name`, which would have reported a string very similar to the values reported by `WEBGL_debug_renderer_info`. [Concerns were raised about this approach](https://github.com/gpuweb/gpuweb/issues/2191), and the group generally agreed that we wanted something with finer grained control over the values reported and that was less problematic to parse for developers. - -### Force reliance on feature detection -It was suggested that, similar to other web platform features, no identifiers should be exposed at all and instead developers should rely on feature tests to determine if they need to take a different code path. Unfortunately this is impractical for GPU APIs such as WebGPU or WebGL. There have been multiple documented bugs in the past that are not trivially detectable, such as bugs which are only provoked under high memory usage situations or which only occur intermittently over long time periods. In addition, reading back information from the GPU in order to detect certain classes of issues is not trivial, and in some cases may actually change the driver's behavior. - -This means that realtime bug detection can be extremely constly, and may incur performance penalties or add significantly to startup time. As such it is not desirable or practical to ask developers to try and provoke any known driver issues on application startup. - -### Rely on the UA, etc. to fix bugs -It was also suggested that developers should generally not be the ones shouldering the burden of detecting and working around driver or hardware issues, and instead that responsibility should lie with the hardware manufacturer, OS, or User Agent. In general we agree with this sentiment! User agents, in particular, have a history of implementing workarounds for issues observed on a specific OS, GPU, or driver, as well as working with the appropriate parties to ensure that the problems are fixed upstream. (For example, you can see the [list of bugs that Chromium works around currently here](https://source.chromium.org/chromium/chromium/src/+/main:gpu/config/gpu_driver_bug_list.json). All modern browsers have some variation of this type of workaround list.) This is work we expect to continue in perpetuity. - -However, we have also observed that developers cannot rely on platform owners alone to resolve issues. For one, no matter how quickly a user agent or hardware manufacturer responds to bug reports there will always be some period of development, testing, and deployment before developers can rely on the fix, and even then they will likely have to contend with users on older software versions for a long time. This effect is exaggerated when considering that in some cases user agents only release new updates on a yearly cadence. - -In some other cases, the issue may not be one of correctness, but of performance. If a certain technique is performed by the GPU in a conformant manner but performs poorly compared to other devices it is generally not the User Agent's place to intervene. An individual developer, however, can make quality vs. performance tradeoffs that are appropriate for their application as long as they are given sufficent information to know when the tradeoff in necessary. - -### Inference from other signals -There are some other properties, such as a `GPUAdapter`'s limits and available features, that could be used in some cases to infer what kind of device a developer is using. Additionally, developers could use other platform signals (user agent string, screen resolution, etc) to infer that they are on a known device which has a certain class of GPU. (For example, a specific generation iPhone.) The concern with this approach is that it encourages developers to collect _more_ identifiable user information for a less reliable result. - -In practical terms it's likely that not providing adapter identifiers via WebGPU will simply encourage developers to initialize and tear down a WebGL context prior to initializing WebGPU simply to get the `WEBGL_debug_renderer_info` strings, which may return info from the incorrect adapter and is not a pattern we want to encourage. - -## Appendix B: Motivating real-world use cases - -These are some known use cases for GPU identifiers that we have heard of in the past. These refer to WebGL applications specifically, but we have every reason to expect that they will be applicable to WebGPU as well. - -### Developer feedback on WEBGL\_debug\_renderer\_info: -Ken Russell (@kenrussell) collected quotes from various WebGL developers and reported them to the WebGL Working Group in 2019. - -The following are some quoted reasons why various pages use `WEBGL_debug_renderer_info`: - -**Unity** - - Using exact GPU info+device+OS+browser to ... identify weak fillrate systems for whether to use "SD" or "HD" rendering - -**Uber** - - Use this feature to activate nVidia/Intel specific GLSL workarounds. - - Print the driver in the console when we create contexts, so that when remote operators (e.g. in Asia/Australia) report problems we can ... unblock them with minimal effort. - -**[Sketchfab](https://sketchfab.com)** - - Report user GPU in our automatic error reporting tools. When we need to reproduce shader bugs it's invaluable. - - Warn users when they are switched to software webgl acceleration. "Otherwise users might think the Sketchfab render is very slow, using their laptop batteries, and pushing laptop fan to the max where just restarting/reloading chrome fixes it." - -**[Scirra](https://www.construct.net/en)** - - identifying GPUs affected by driver bugs, and working around it - - analytics on the unmasked renderer to identify the impact of such bugs and help us decide how to respond - - identifying which GPU is really in use on dual-GPU systems - - displaying it to the user as a diagnostic (also for them to identify which GPU is in use)." - -**[Figma](https://www.figma.com/)** - - Rely on this feature to be able to track down and detect obscure GPU issues with users that have old unreliable hardware. - - "Without this information, we would have been unable to debug and fix these WebGL implementation bugs that we've been encountering." - - Use this information to enable workarounds for WebGL implementation bugs. "The workarounds are not enabled by default because they are slower, and in some cases actually even incorrect (but less incorrect than when the bug is triggered)." - -**[noclip.website](https://noclip.website/)** - - detect and work around known bugs in drivers - - provide better error messages to users - - "The immediate impact if this extension was removed would be that all Apple devices would fail to render." (Due to a driver bug at the time.) - -### Tweets replying to [Dean Jackson's](https://twitter.com/grorgwork/status/1062395616867700736) inquiry about removing WEBGL\_debug\_renderer\_info: - - - Google maps, [to identify poorly performing devices.](https://twitter.com/gfxprogrammer/status/1062422760662528000?s=20) - - [Active Theory](https://activetheory.net/), [to scale visual quality](https://twitter.com/michaeltheory/status/1062402110396874752?s=20) - - [2DKit](http://2dkit.com/), [to estimate available memory and scale quality](https://twitter.com/b_garcia/status/1062413508212600832?s=20) - - [Matterport](https://matterport.com/), [to identify when to serve higher resolution textures](https://twitter.com/haeric/status/1134155677411110913?s=20) - -## Appendix C: API Prior Art - -### Native equivalents: -The following structures are what expose similar information in the various native libraries, though they obviously don't have the same privacy considerations. Included here as reference. - - [VkPhysicalDeviceProperties](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkPhysicalDeviceProperties.html) - - [DXGI_ADAPTER_DESC](https://docs.microsoft.com/en-us/windows/win32/api/dxgi/ns-dxgi-dxgi_adapter_desc) - - [MTLDevice](https://developer.apple.com/documentation/metal/mtldevice) - -### Prior art on the Web Platform: -[User-Agent client hints](https://web.dev/user-agent-client-hints/), and especially [NavigatorUAData.getHighEntropyValues()](https://developer.mozilla.org/en-US/docs/Web/API/NavigatorUAData/getHighEntropyValues), have been introduced previously as a more privacy preserving and developer friendly alternative to UA string parsing. +# WebGPU Adapter Identifiers + +**This document is outdated. `adapter.requestAdapterInfo()` has been replaced with +`adapter.info` and `unmaskHints` doesn't exist anymore. See: +[#4536](https://github.com/gpuweb/gpuweb/issues/4536), +[#4316](https://github.com/gpuweb/gpuweb/pull/4316). + +## Introduction + +The WebGL extension [WEBGL_debug_renderer_info](https://www.khronos.org/registry/webgl/extensions/WEBGL_debug_renderer_info/) reports identifying information about a device's graphics driver for the purposes of debugging or detection and avoidance of bugs or performance pitfalls on a particular driver or piece of hardware. + +These identifiers have proven to be a valuable tool for developers over the years (See [Appendix B: Motivating real-world use cases](#Appendix-B-Motivating-real-world-use-cases)), but have also been observed to be frequently used as a source of high-entropy fingerprinting data. Additionally, the format that WebGL returns the identifiers in (a string of undefined structure) is difficult to work with, akin to the user agent string. + +For WebGPU we need mechanisms which report similar data about the GPU hardware (called an "adapter" in WebGPU) to enable legitimate development use cases, such as driver bug workarounds, while minimizing the amount of fingerprintable data that is exposed without user consent. This document will refer to that data as "adapter identifiers". + +## Use Cases: + +### Bug workarounds +A WebGPU developer wants to ensure that their content works on all devices, but is aware of a bug on a specific family of GPUs that causes corrupted rendering. Using a minimal subset of adapter identifiers they can identify when a user's GPUs is part of a group which includes the known-buggy hardware and switch to a slower code path that doesn't provoke the issue. + +### Filing issue reports +A WebGPU developer has included a "Report an issue" button on their page. Normally they have found that they need very little adapter information to operate, but when users experience a problem they want to gather as much data as they can about the problem. On the report filing page they include UI to allow the user to include their GPU information in the report, which when checked causes the browser to confirm that they want to let the page know their full adapter details. + +### Performance optimization +A WebGPU developer wants all users to experience good performance on their page, but has developed some effects that are not practical on mobile GPUs. They check the adapter identifiers on page load to get a broad idea of what family of GPU the user has to start them off with a reasonable set of defaults. On a settings page, however, they can include a button which detects the best settings for their device. Clicking it may prompt the user for consent to see more detailed GPU information so that ideal settings for their device can be selected. + +### WebGPU developer community assets +A common and useful asset for developers is sites such as https://gpuinfo.org/, which visualize your current devices capabilities in an easy to read format and (with user's consent) can collect information about GPU capabilities to report in aggregate to other developers, giving them a sense of how widespread various capabilities are. Offering a way for users to opt-in to contributing to such a database is desirable. + +## Goals: + - Offer a mechanism to report GPU adapter identifiers in a scalable way. + - Allow for reporting no information (tracking prevention modes, privacy-oriented UAs). + - Enable UAs to decide for themselves how much information to expose by default. + - Allow developers to have some input on how much information they need, especially with respect to triggering user prompts. + - Any such feature needs to be invokable late in the device lifetime, to allow for cases like filing bug reports. + - Developers need to know when a call may cause a user prompt to be shown so that they can avoid that path if desired. + - Offer control over how much data is exposed to embedded content/iframes. + - Minimize string parsing for accuracy and developer convenience. + +## API usage + +> Full details of how to use WebGPU will not be covered here. Please refer to the [WebGPU explainer](https://gpuweb.github.io/gpuweb/explainer/) or [WebGPU spec](https://gpuweb.github.io/gpuweb) for further information. + +### Masked adapter identifiers + +The first step when using WebGPU is to query a `GPUAdapter`, of which there may be several available to the system. This will typically correspond to a physical or software emulated GPU. + +```js +const gpuAdapter = await navigator.gpu.requestAdapter(); +``` + +WebGPU applications often require a significant amount of resource initialization at startup, and it's possible that the resources being initialized may need to be altered depending on the adapter in use. For example: Shader sources may need to be re-written to avoid a known bug or lower detail meshes and textures may need to be fetched to avoid overtaxing a slower device. In these cases some amount of adapter identifiers need to be queried very early in the application's lifetime, and preferably without invoking a user consent prompt. (Nobody likes to be asked for permission immediately on navigation, at which point they likely have little to know context for why the permission is needed.) + +In this case, the developer would call the `requestAdapterInfo()` method of the `GPUAdapter`, which returns a `GPUAdapterInfo` interface containing several potential identifiers for the adapter, and may contain values similar to the following: + +```js +const adapterInfo = await gpuAdapter.requestAdapterInfo(); +console.log(adapterInfo); + +// Output: +{ + vendor: 'nvidia', + architecture: 'turing', + device: '', + description: '' +} +``` + +Note that some values of the interface are the empty string, because the UA deemed that they were too high-entropy to return without explicit user consent. If the UA wished, it would have the ability to return empty string for all values. This would be most commonly expected in "enhanced privacy" modes like [Edge's strict tracking prevention](https://support.microsoft.com/en-us/microsoft-edge/learn-about-tracking-prevention-in-microsoft-edge-5ac125e8-9b90-8d59-fa2c-7f2e9a44d869) or [Firefox's Enhanced Tracking Protection](https://support.mozilla.org/en-US/kb/enhanced-tracking-protection-firefox-desktop). Ideally returning little to no identifiers is common enough that user agents that wish to expose very little information by default can do so without severe compatibility concerns. + +The information that _is_ returned should be helpful in identifying broad buckets of adapters with similar capabilities and performance characteristics. For example, Nvidia's "Turing" architecture [covers a range of nearly 40 different GPUs](https://en.wikipedia.org/wiki/Turing_(microarchitecture)#Products_using_Turing) across a wide range of prices and form factors. Identifing the adapter as an Turing device is enough to allow developers to activate broad workarounds aimed at that family of hardware and make some assumptions about baseline performance, but is also broad enough to not give away too much identifiable information about the user. + +Additionally, in some cases the UA may find it beneficial to return a value that is not the most accurate one that could be reported but still gives developers a reasonable reference point with a lower amount of entropy. + +Finally, it may not always be possible or practical to detemine a value for some fields (like a GPU's architecture) and in those cases returning empty string is acceptible even if the user agent would have considered the information low-entropy. + +### Unmasked adapter identifiers + +At some point during the lifetime of the application the developer may determine that they need more information about the user's specific adapter. A common scenario would be filing a bug report. The developer will be able to best respond to the user's issue if they know exactly what device is being used. In this case, they can request an "unmasked" version any fields of the `GPUAdapterInfo`: + +```js +feedbackButton.addEventListener('click', async ()=> { + const unmaskHints = ['architecture', 'device', 'description']; + const unmaskedAdapterInfo = await gpuAdapter.requestAdapterInfo(unmaskHints); + generateUserFeedback(unmaskedAdapterInfo); +}); +``` + +The resolved value is the adapter's `GPUAdapterInfo` with any fields specified by `unmaskHints` that were previously omitted or reported with a less accurate value now populated with the most accurate information the UA will deliver. For example: + +```js +console.log(unmaskedAdapterInfo); + +// Output: +{ + vendor: 'nvidia', + architecture: 'turing', + device: '0x8644', + description: 'NVIDIA GeForce GTX 1660 SUPER' +} +``` + +Because the unmasked values may contain higher entropy identifying information, the bar for querying it is quite a bit higher. Calling `requestAdapterInfo()` with any `unmaskHints` requires user activation, and will reject the promise otherwise. If the `unmaskHints` array contains any previously masked value it also requires that user consent be given before returning, and as such may display a prompt to the user asking if the page can access the newly requested GPU details before allowing the promise to resolve. If the user declines to give consent then the promise is rejected. + +Once the user has given their consent any future calls to `requestAdapterInfo()` should return the unmasked fields even if no `unmaskHints` are specified, and future instances of the same underlying adapter returned from `navigator.gpu.requestAdapter()` on that page load should also return unmasked data without requiring hints to be passed. + +Even after `unmaskHints` have been passed to `requestAdapterInfo()` the UA is still allowed to return empty string for attributes requested in the `unmaskHints` array if the UA cannot determine the value in question or decides not to reveal it. (UAs should not request user consent when unmasking is requested for attributes that will be left empty.) + +### Identifier formatting + +To minimize developer work and reduce the chances of fingerprinting via casing differences between platforms, and string values reported as part of the `GPUAdapterInfo` conform to strict formatting rules. They must be lowercase ASCII strings containing no spaces, with separate words concatenated with a hyphen ("-") character. + +The exception to this is `description`, which may be a string reported directly from the driver without modification. As a result, however, `description` should always be omitted from masked adapters. Additionally, enough information should be offered via other fields that developers don't feel the need to attempt parsing the `description` string. + +User agents should also make an effort to normalize the strings returned, ideally through a public registery. This especially applies to fields like `vendor` which are presumed to have a relatively low number of possible values. + +Some values, such as `architecture`, are unlikely to be directly provided by the driver. As such, User Agents are expected to make a best-effort at identifying and reporting common architectures, and report empty string otherwise. + +### Iframe controls + +In addition to using the above mechanisms to hit a balance between offering developers useful information and mitigating fingerprinting concerns, [Permissions Policy](https://w3c.github.io/webappsec-permissions-policy/) should be used to control whether or not WebGPU features are exposed to iframes. + +The recommended feature identifier is `"webgpu"`, and the [default allowlist](https://w3c.github.io/webappsec-permissions-policy/#default-allowlist) for this feature would be `["self"]`. This allows documents from the top level browsing context use the feature by default, but requires documents included in iframes to be explicitly granted permission from the top level context in order to use WebGPU, like so: + +```html + +``` + +If the `"webgpu"` feature is not granted to a page, all calls that page makes to `navigator.gpu.requestAdapter()` will resolve to `null`. + +This helps strike a balance between enabling powerful rendering and computation capabilities on the web and a desire to mitigate abuse by bad actors. + +## Proposed IDL + +```webidl +partial interface GPUAdapter { + Promise requestAdapterInfo(optional sequence unmaskHints = []); +}; + +interface GPUAdapterInfo { + DOMString vendor; + DOMString architecture; + DOMString device; + DOMString description; +}; +``` + +## Appendix A: Alternatives considered + +### A single identifier string +Previously the WebGPU spec had a single string identifier, `GPUAdapter.name`, which would have reported a string very similar to the values reported by `WEBGL_debug_renderer_info`. [Concerns were raised about this approach](https://github.com/gpuweb/gpuweb/issues/2191), and the group generally agreed that we wanted something with finer grained control over the values reported and that was less problematic to parse for developers. + +### Force reliance on feature detection +It was suggested that, similar to other web platform features, no identifiers should be exposed at all and instead developers should rely on feature tests to determine if they need to take a different code path. Unfortunately this is impractical for GPU APIs such as WebGPU or WebGL. There have been multiple documented bugs in the past that are not trivially detectable, such as bugs which are only provoked under high memory usage situations or which only occur intermittently over long time periods. In addition, reading back information from the GPU in order to detect certain classes of issues is not trivial, and in some cases may actually change the driver's behavior. + +This means that realtime bug detection can be extremely constly, and may incur performance penalties or add significantly to startup time. As such it is not desirable or practical to ask developers to try and provoke any known driver issues on application startup. + +### Rely on the UA, etc. to fix bugs +It was also suggested that developers should generally not be the ones shouldering the burden of detecting and working around driver or hardware issues, and instead that responsibility should lie with the hardware manufacturer, OS, or User Agent. In general we agree with this sentiment! User agents, in particular, have a history of implementing workarounds for issues observed on a specific OS, GPU, or driver, as well as working with the appropriate parties to ensure that the problems are fixed upstream. (For example, you can see the [list of bugs that Chromium works around currently here](https://source.chromium.org/chromium/chromium/src/+/main:gpu/config/gpu_driver_bug_list.json). All modern browsers have some variation of this type of workaround list.) This is work we expect to continue in perpetuity. + +However, we have also observed that developers cannot rely on platform owners alone to resolve issues. For one, no matter how quickly a user agent or hardware manufacturer responds to bug reports there will always be some period of development, testing, and deployment before developers can rely on the fix, and even then they will likely have to contend with users on older software versions for a long time. This effect is exaggerated when considering that in some cases user agents only release new updates on a yearly cadence. + +In some other cases, the issue may not be one of correctness, but of performance. If a certain technique is performed by the GPU in a conformant manner but performs poorly compared to other devices it is generally not the User Agent's place to intervene. An individual developer, however, can make quality vs. performance tradeoffs that are appropriate for their application as long as they are given sufficent information to know when the tradeoff in necessary. + +### Inference from other signals +There are some other properties, such as a `GPUAdapter`'s limits and available features, that could be used in some cases to infer what kind of device a developer is using. Additionally, developers could use other platform signals (user agent string, screen resolution, etc) to infer that they are on a known device which has a certain class of GPU. (For example, a specific generation iPhone.) The concern with this approach is that it encourages developers to collect _more_ identifiable user information for a less reliable result. + +In practical terms it's likely that not providing adapter identifiers via WebGPU will simply encourage developers to initialize and tear down a WebGL context prior to initializing WebGPU simply to get the `WEBGL_debug_renderer_info` strings, which may return info from the incorrect adapter and is not a pattern we want to encourage. + +## Appendix B: Motivating real-world use cases + +These are some known use cases for GPU identifiers that we have heard of in the past. These refer to WebGL applications specifically, but we have every reason to expect that they will be applicable to WebGPU as well. + +### Developer feedback on WEBGL\_debug\_renderer\_info: +Ken Russell (@kenrussell) collected quotes from various WebGL developers and reported them to the WebGL Working Group in 2019. + +The following are some quoted reasons why various pages use `WEBGL_debug_renderer_info`: + +**Unity** + - Using exact GPU info+device+OS+browser to ... identify weak fillrate systems for whether to use "SD" or "HD" rendering + +**Uber** + - Use this feature to activate nVidia/Intel specific GLSL workarounds. + - Print the driver in the console when we create contexts, so that when remote operators (e.g. in Asia/Australia) report problems we can ... unblock them with minimal effort. + +**[Sketchfab](https://sketchfab.com)** + - Report user GPU in our automatic error reporting tools. When we need to reproduce shader bugs it's invaluable. + - Warn users when they are switched to software webgl acceleration. "Otherwise users might think the Sketchfab render is very slow, using their laptop batteries, and pushing laptop fan to the max where just restarting/reloading chrome fixes it." + +**[Scirra](https://www.construct.net/en)** + - identifying GPUs affected by driver bugs, and working around it + - analytics on the unmasked renderer to identify the impact of such bugs and help us decide how to respond + - identifying which GPU is really in use on dual-GPU systems + - displaying it to the user as a diagnostic (also for them to identify which GPU is in use)." + +**[Figma](https://www.figma.com/)** + - Rely on this feature to be able to track down and detect obscure GPU issues with users that have old unreliable hardware. + - "Without this information, we would have been unable to debug and fix these WebGL implementation bugs that we've been encountering." + - Use this information to enable workarounds for WebGL implementation bugs. "The workarounds are not enabled by default because they are slower, and in some cases actually even incorrect (but less incorrect than when the bug is triggered)." + +**[noclip.website](https://noclip.website/)** + - detect and work around known bugs in drivers + - provide better error messages to users + - "The immediate impact if this extension was removed would be that all Apple devices would fail to render." (Due to a driver bug at the time.) + +### Tweets replying to [Dean Jackson's](https://twitter.com/grorgwork/status/1062395616867700736) inquiry about removing WEBGL\_debug\_renderer\_info: + + - Google maps, [to identify poorly performing devices.](https://twitter.com/gfxprogrammer/status/1062422760662528000?s=20) + - [Active Theory](https://activetheory.net/), [to scale visual quality](https://twitter.com/michaeltheory/status/1062402110396874752?s=20) + - [2DKit](http://2dkit.com/), [to estimate available memory and scale quality](https://twitter.com/b_garcia/status/1062413508212600832?s=20) + - [Matterport](https://matterport.com/), [to identify when to serve higher resolution textures](https://twitter.com/haeric/status/1134155677411110913?s=20) + +## Appendix C: API Prior Art + +### Native equivalents: +The following structures are what expose similar information in the various native libraries, though they obviously don't have the same privacy considerations. Included here as reference. + - [VkPhysicalDeviceProperties](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkPhysicalDeviceProperties.html) + - [DXGI_ADAPTER_DESC](https://docs.microsoft.com/en-us/windows/win32/api/dxgi/ns-dxgi-dxgi_adapter_desc) + - [MTLDevice](https://developer.apple.com/documentation/metal/mtldevice) + +### Prior art on the Web Platform: +[User-Agent client hints](https://web.dev/user-agent-client-hints/), and especially [NavigatorUAData.getHighEntropyValues()](https://developer.mozilla.org/en-US/docs/Web/API/NavigatorUAData/getHighEntropyValues), have been introduced previously as a more privacy preserving and developer friendly alternative to UA string parsing. diff --git a/design/RejectedErrorHandling.md b/design/RejectedErrorHandling.md index a0e5b3698f..230d9fca1f 100644 --- a/design/RejectedErrorHandling.md +++ b/design/RejectedErrorHandling.md @@ -30,7 +30,7 @@ interface GPUDeviceLostEvent : Event { }; ``` -If `GPUAdapter`'s `isReady` attribute is false, `createDevice` will fail. +If `GPUAdapter`'s `isReady` attribute is false, `createDevice` will fail. `isReady` may be set to `false` when a `"gpu-device-lost"` event fires. It will always be set to `true` when a `"gpu-adapter-ready"` event fires. diff --git a/explainer/index.bs b/explainer/index.bs index f0524c7c0b..4fbf30755b 100644 --- a/explainer/index.bs +++ b/explainer/index.bs @@ -189,12 +189,12 @@ issued through "child" objects. To get an adapter, an application calls `navigator.gpu.requestAdapter()`, optionally passing options which may influence what adapter is chosen, like a `powerPreference` (`"low-power"` or `"high-performance"`) or -`forceSoftware` to force a software implementation. +`forceFallbackAdapter` to force a software implementation. `requestAdapter()` never rejects, but may resolve to null if an adapter can't be returned with the specified options. -A returned adapter exposes a `name` (implementation-defined), a boolean `isSoftware` so +A returned adapter exposes `info` (`vendor`/`architecture`/etc., implementation-defined), a boolean `isFallbackAdapter` so applications with fallback paths (like WebGL or 2D canvas) can avoid slow software implementations, and the [[#optional-capabilities]] available on the adapter. @@ -1267,5 +1267,3 @@ However investigation in WebGL show that GPU timings can be used to leak from su # WebGPU Shading Language # {#wgsl} - - diff --git a/proposals/compatibility-mode.md b/proposals/compatibility-mode.md index 4feabd017f..991180e8ef 100644 --- a/proposals/compatibility-mode.md +++ b/proposals/compatibility-mode.md @@ -2,7 +2,7 @@ This proposal is **under active development, but has not been standardized for inclusion in the WebGPU specification**. WebGPU implementations **must not** expose this functionality; doing so is a spec violation. Note however, an implementation might provide an option (e.g. command line flag) to enable a draft implementation, for developers who want to test this proposal. -The changes merged into this document are those for which the GPU for the Web Community Group has achieved **tentative** consensus prior to official standardization of the whole propsal. New items will be added to this doc as tentative consensus on further issues is achieved. +The changes merged into this document are those for which the GPU for the Web Community Group has achieved **tentative** consensus prior to official standardization of the whole proposal. New items will be added to this doc as tentative consensus on further issues is achieved. ## Problem @@ -10,30 +10,23 @@ WebGPU is a good match for modern explicit graphics APIs such as Vulkan, Metal a ## Goals -The primary goal of WebGPU Compatibility mode is to increase the reach of WebGPU by providing an opt-in, slightly restricted subset of WebGPU which will run on older APIs such as D3D11 and OpenGL ES. The set of restrictions in Compatibility mode should be kept to a minimum in order to make it easy to port exsting WebGPU applications. This will increase adoption of WebGPU applications via a wider userbase. +The primary goal of WebGPU Compatibility mode is to increase the reach of WebGPU by providing an opt-in, slightly restricted subset of WebGPU which will run on older APIs such as D3D11 and OpenGL ES. The set of restrictions in Compatibility mode should be kept to a minimum in order to make it easy to port existing WebGPU applications. This will increase adoption of WebGPU applications via a wider userbase. Since WebGPU Compatibility mode is a subset of WebGPU, all valid Compatibility mode applications are also valid WebGPU applications. Consequently, Compatibility mode applications will also run on user agents which do not support Compatibility mode. Such user agents will simply ignore the option requesting a Compatibility mode Adapter and return a Core WebGPU Adapter instead. ## WebGPU Spec Changes -```webidl -partial dictionary GPURequestAdapterOptions { - boolean compatibilityMode = false; -} -``` - -When calling `GPU.RequestAdapter()`, passing `compatibilityMode = true` in the `GPURequestAdapterOptions` will indicate to the User Agent to select the Compatibility subset of WebGPU. Any Devices created from the resulting Adapter on supporting UAs will support only Compatibility mode. Calls to APIs unsupported by Compatibility mode will result in validation errors. +When calling `GPU.requestAdapter()`, passing `featureLevel = "compatibility"` in the `GPURequestAdapterOptions` will indicate to the User Agent to select the Compatibility subset of WebGPU. Any Devices created from the resulting Adapter on supporting UAs will support only Compatibility mode. Calls to APIs unsupported by Compatibility mode will result in validation errors. -Note that a supporting User Agent may return a `compatibilityMode = true` Adapter which is backed by a fully WebGPU-capable hardware adapter, such as D3D12, Metal or Vulkan, so long as it validates all subsequent API calls made on the Adapter and the objects it vends against the Compatibility subset. +Note that a supporting User Agent may return a `featureLevel = "compatibility"` Adapter which is backed by a fully WebGPU-capable hardware adapter, such as D3D12, Metal or Vulkan, so long as it validates all subsequent API calls made on the Adapter and the objects it vends against the Compatibility subset. ```webidl partial interface GPUAdapter { - readonly attribute boolean isCompatibilityMode; + readonly attribute DOMstring featureLevel; } ``` -As a convenience to the developer, the Adapter returned will have the `isCompatibilityMode` property set to `true`. - +As a convenience to the developer, the Adapter returned will have the `featureLevel` property set to `"compatibility"`. ```webidl partial dictionary GPUTextureDescriptor { @@ -45,7 +38,7 @@ See "Texture view dimension may be specified", below. ## Compatibility mode restrictions -### 1. Texture view dimension may be specified +### 1. Texture view dimension may be specified When specifying a texture, a `textureBindingViewDimension` property determines the views which can be bound from that texture for sampling (see "Proposed IDL changes", above). Binding a view of a different dimension for sampling than specified at texture creation time will cause a validation error. If `textureBindingViewDimension` is unspecified, use [the same algorithm as `createView()`](https://gpuweb.github.io/gpuweb/#abstract-opdef-resolving-gputextureviewdescriptor-defaults): ``` @@ -77,11 +70,11 @@ Each `GPUColorTargetState` in a `GPUFragmentState` must have the same `blend.alp **Justification**: OpenGL ES does not support Cube Array textures. -### 5. Views of the same texture used in a single draw may not differ in mip levels. +### 5. Views of the same texture used in a single draw may not differ in aspect or mip levels. -A draw call may not bind two views of the same texture differing in `baseMipLevel` or `mipLevelCount`. Only a single mip level range range per texture is supported. This is enforced via validation at draw time. +A draw call may not bind two views of the same texture differing in `aspect`, `baseMipLevel`, or `mipLevelCount`. Only a single aspect and mip level range per texture is supported. This is enforced via validation at draw time. -**Justification**: OpenGL ES does not support texture views, but one mip level subset may be specified per texture using `glTexParameter*()` via the `GL_TEXTURE_BASE_LEVEL` and `GL_TEXTURE_MAX_LEVEL` parameters. +**Justification**: OpenGL ES does not support texture views, but one set of these parameters per texture is supported via glTexParameteri(). In particular, one depth/stencil aspect may be specified via `GL_DEPTH_STENCIL_TEXTURE_MODE`, and one mip level subset via the `GL_TEXTURE_BASE_LEVEL` and `GL_TEXTURE_MAX_LEVEL` parameters. ### 6. Array texture views used in bind groups must consist of the entire array. That is, `baseArrayLayer` must be zero, and `arrayLayerCount` must be equal to the size of the texture array. @@ -91,7 +84,7 @@ A bind group may not reference a subset of array layers. Only views of the entir ### 7. Disallow `sample_mask` and `sample_index` builtins in WGSL. -Use of the `sample_mask` or `sample_index` builtins would cause a validation error at pipeline creation time. +Use of the `sample_mask` or `sample_index` builtins would cause a validation error at shader module creation time. **Justification**: OpenGL ES 3.1 does not support `gl_SampleMask`, `gl_SampleMaskIn`, or `gl_SampleID`. @@ -99,15 +92,15 @@ Use of the `sample_mask` or `sample_index` builtins would cause a validation err The `rg32uint`, `rg32sint`, and `rg32float` texture formats no longer support the `"write-only" or "read-only" STORAGE_BINDING` capability by default. -Calls to `createTexture()` or `createBindGroupLayout()` with this combination cause a validation error. Calls to pipeline creation functions with pipeline `layout` set to `"auto"` and a storage texture binding of those format types cause a validation error (in the internal call to `createBindGroupLayout()`). +Calls to `createTexture()` or `createBindGroupLayout()` with this combination cause a validation error. Calls to `createShaderModule()` will fail if these formats are referenced as storage textures. **Justification**: GLSL ES 3.1 (section 4.4.7, "Format Layout Qualifiers") does not permit any two-component (RG) texture formats in a format layout qualifier. ### 9. Depth bias clamp must be zero. -During createRenderPipeline(), GPUDepthStencilState.depthBiasClamp must be zero, or a validation error occurs. +During `createRenderPipeline()` and `createRenderPipelineAsync()`, `GPUDepthStencilState.depthBiasClamp` must be zero, or a validation error occurs. -**Justification**: GLSL ES 3.1 does not support glPolygonOffsetClamp(). +**Justification**: GLSL ES 3.1 does not support `glPolygonOffsetClamp()`. ### 10. Lower limits. @@ -148,8 +141,7 @@ In WGSL, an inter-stage variable can be marked with one of three interpolation t `'linear'`, `'flat'`, and one of three interpolation sampling modes: `'center'`, `'centroid'`, `'sample'` In compatibility mode, `'linear'` type and sampling mode `'sample'` are disallowed. -If used via an entry point in a shader module passed to `createRenderPipeline`, `createRenderPipelineAsync`, -`createComputePipeline`, or `createComputePipelineAsync` a validation error is generated. +If either are used in the code passed to `createShaderModule` a validation error is generated. **Justification**: OpenGL ES 3.1 does not support `linear` interpolation nor `sample` sampling. @@ -159,8 +151,112 @@ If used via an entry point in a shader module passed to `createRenderPipeline`, **Justification**: OpenGL ES 3.1 does not support copying of multisample textures. +### 13. Disallow texture format reinterpretation + +When calling `createTexture`, the `viewFormats`, if specified, must be the same format as the texture. + +**Justification**: OpenGL ES 3.1 does not support texture format reinterpretation. + +### 14. Require `depthOrArrayLayers` to be compatible with `textureBindingViewDimension` in `createTexture`. + +When creating a texture you can pass in a `textureBindingViewDimension`. + +* If `textureBindingViewDimension` is `"2d"` and `depthOrArrayLayers` is not 1, a validation error is generated. + +* If `textureBindingViewDimension` is `"cube"` and `depthOrArrayLayers` is not 6, a validation error is generated. + +**Justification**: OpenGL ES 3.1 cannot create 2d textures with more than 1 layer nor can it +create cube maps that are not exactly 6 layers. + +## 15. Disallow bgra8unorm-srgb textures + +**Justification**: OpenGL ES 3.1 does not support bgra8unorm-srgb textures. + +## 16. Disallow `textureLoad` with `texture_depth?` textures + +If a `texture_depth`, `texture_depth_2d_array`, or `texture_depth_cube` are used in a `textureLoad` call +in code passed to `createShaderModule` a validation error is generated. + +**Justification**: OpenGL ES 3.1 does not support `texelFetch` for depth textures. + +Note: this does not affect textures made with depth formats bound to `texture_2d`. + +## 17. Disallow `@interpolation(flat)` and `@interpolation(flat, first)` + +If code is passed to `createShaderModule` that uses `@interpolation(flat)` or `@interpolation(flat, first)` +generate a validation error. + +**Justification**: OpenGL ES 3.1 only supports the last vertex as the provoking vertex where as +other APIs only support the first vertex so only `@interpolation(flat, either)` is supported in +compatibility mode. + +## 18. Introduce new `maxStorageBuffersInVertexStage` and `maxStorageTexturesInVertexStage` limits. + +If the number of shader variables of type `storage_buffer` in a vertex shader exceeds the `maxStorageBuffersInVertexStage` limit, a validation error will occur at pipeline creation time. + +If the number of shader variables of type `texture_storage_1d`, `texture_storage_2d`, `texture_storage_2d_array` and `texture_storage_3d` in a vertex shader exceeds the `maxStorageTexturesInVertexStage` limit, a validation error will occur at pipeline creation time. + +In Compatibility mode, these new limits will have a default of zero. In Core mode, they will default to the maximum value of a GPUSize32. + +In addition to the new limits, the existing `maxStorageBuffersPerShaderStage` and `maxStorageTexturesPerShaderStage` limits continue to apply to all stages. E.g., the effective storage buffer limit in the vertex stage is `min(maxStorageBuffersPerShaderStage, maxStorageBuffersInVertexStage)`. + +**Justification**: OpenGL ES 3.1 allows `MAX_VERTEX_SHADER_STORAGE_BLOCKS` and `MAX_VERTEX_IMAGE_UNIFORMS` to be zero, and there are a significant number of devices in the field with that value. + +## 19. Introduce new `maxStorageBuffersInFragmentStage` and `maxStorageTexturesInFragmentStage` limits. + +If the number of shader variables of type `storage_buffer` in a fragment shader exceeds the `maxStorageBuffersInFragmentStage` limit, a validation error will occur at pipeline creation time. + +If the number of shader variables of type `texture_storage_1d`, `texture_storage_2d`, `texture_storage_2d_array` and `texture_storage_3d` in a fragment shader exceeds the `maxStorageTexturesInFragmentStage` limit, a validation error will occur at pipeline creation time. + +In Compatibility mode, these new limits will have a default of zero. In Core mode, they will default to the maximum value of a GPUSize32. + +In addition to the new limits, the existing `maxStorageBuffersPerShaderStage` and `maxStorageTexturesPerShaderStage` limits continue to apply to all stages. E.g., the effective storage buffer limit in the fragment stage is `min(maxStorageBuffersPerShaderStage, maxStorageBuffersInFragmentStage)`. + +**Justification**: OpenGL ES 3.1 allows `MAX_FRAGMENT_SHADER_STORAGE_BLOCKS` and `MAX_FRAGMENT_IMAGE_UNIFORMS` to be zero, and there are a significant number of devices in the field with that value. + +## 20. Disallow using a depth texture with a non-comparison sampler + +Using a depth texture `texture_depth_2d`, `texture_depth_cube`, `texture_depth_2d_array` with a non-comparison +sampler in a shader will generate a validation error at pipeline creation time. + +**Justification**: OpenGL ES 3.1 says such usage has undefined behavior. + +## 21. Limit the number of texture+sampler combinations in a stage. + +If the number of texture+sampler combinations used a in single stage in a pipeline exceeds +`min(maxSampledTexturesPerShaderStage, maxSamplersPerShaderStage)` a validation error is generated. + +The validation occurs as follows: + +``` +maxCombinationsPerStage = min(maxSampledTexturesPerShaderStage, maxSamplersPerShaderStage) +for each stage of the pipeline: + sum = 0 + for each texture binding in the pipeline layout which is visible to that stage: + sum += max(1, number of texture sampler combos for that texture binding) + for each external texture binding in the pipeline layout which is visible to that stage: + sum += 1 // for LUT texture + LUT sampler + sum += 3 * max(1, number of external_texture sampler combos) // for Y+U+V + if sum > maxCombinationsPerStage + generate a validation error. +``` + +**Justification**: In OpenGL ES 3.1 does not support more combinations. Sampler units and texture units are bound together. Texture unit X uses sampler unit X. + +## 22. Introduce new `float16-renderable` and `float32-renderable` features. + +When supported, `float16-renderable` allows the `RENDER_ATTACHMENT` usage on textures with format `"r16float"`, `"rg16float"`, and `"rgba16float"`. + +When supported, `float32-renderable` allows the `RENDER_ATTACHMENT` usage on textures with format `"r32float"`, `"rg32float"`, and `"rgba32float"`. + +Without support, an error will occur at texture creation time as described in section 6.1.3. + +Support for both features is mandatory in core WebGPU. + +**Justification**: OpenGL ES 3.1 does not require the relevant f16- or f32-based texture formats (`R16F`, `RG16F`, `RGBA16F`, `R32F`, `RG32F`, and `RGBA32F`) to be color-renderable. While there exist OpenGL ES extensions to enable renderability (`GL_EXT_COLOR_BUFFER_HALF_FLOAT` and `GL_EXT_COLOR_BUFFER_FLOAT`), there are a significant number of devices which lack support for these extensions. + ## Issues Q: OpenGL ES does not have "coarse" and "fine" variants of the derivative instructions (`dFdx()`, `dFdy()`, `fwidth()`). Should WGSL's "fine" derivatives (`dpdxFine()`, `dpdyFine()`, and `fwidthFine()`) be required to deliver high precision results? See [Issue 4325](https://github.com/gpuweb/gpuweb/issues/4325). -A: Unclear. In SPIR-V, Fine variants must include the value of P for the local fragment, while Coarse variants do not. WGSL is less constraining, and simply says that Coarse "may result in fewer unique positions that dpdxFine(e)." +A: Unclear. In SPIR-V, Fine variants must include the value of P for the local fragment, while Coarse variants do not. WGSL is less constraining, and simply says that Coarse "may result in fewer unique positions than `dpdxFine(e)`." diff --git a/proposals/push-constants.md b/proposals/push-constants.md new file mode 100644 index 0000000000..55ffc2012b --- /dev/null +++ b/proposals/push-constants.md @@ -0,0 +1,125 @@ +# ImmediateData + +**Roadmap:** This proposal is **under active development, but has not been standardized for inclusion in the WebGPU specification. The proposal is likely to change before it is standardized.** WebGPU implementations **must not** expose this functionality; doing so is a spec violation. Note however, an implementation might provide an option (e.g. command line flag) to enable a draft implementation, for developers who want to test this proposal. + +Last modified: 2024-05-30 + +Issue: #75 + +# Requirements + +No special requirements. + +# WGSL + + +## Address Spaces + +| Address space | Sharing among invocations | Default access mode | Notes | +| --- | --- | --- | --- | +| `immediate_data` | Invocations in [shader stage](https://www.w3.org/TR/WGSL/#shader-stages) | [read](https://www.w3.org/TR/WGSL/#access-read) | For [uniform buffer](https://www.w3.org/TR/WGSL/#uniform-buffer) variables exclude [array types](https://www.w3.org/TR/WGSL/#array-types) variable or [structure types](https://www.w3.org/TR/WGSL/#struct-types) variable contains [array types](https://www.w3.org/TR/WGSL/#array-types) attributes | + + +## Variable and Value Declarations + +| Declaration | Mutability | Scope | Effective-value-type | Initializer Support | Initializer Expression | Part of Resource Interface | +| --- | --- | --- | --- | --- | --- | --- | +| `var` | Immutable | [Module](https://www.w3.org/TR/WGSL/#module-scope) | [Concrete](https://www.w3.org/TR/WGSL/#type-concrete) [constructible](https://www.w3.org/TR/WGSL/#constructible) [host-shareable](https://www.w3.org/TR/WGSL/#host-shareable) excludes [array types](https://www.w3.org/TR/WGSL/#array-types) and [structure types](https://www.w3.org/TR/WGSL/#struct-types) contains array members | Disallowed | | Yes. [uniform buffer](https://www.w3.org/TR/WGSL/#uniform-buffer) | + +NOTE: Each [entry point](https://www.w3.org/TR/WGSL/#entry-point) can statically use at most one immediate data variable. + +Sample Code: +``` +struct Constants { + inner: i32; +} + +var a : Constants; +var b : i32; +var c : i32; // unused + +fn uses_a() { + let foo = a.inner; +} + +fn uses_uses_a() { + uses_a(); +} + +fn uses_b() { + let foo = b; +} + +// Each entry point can statically use at most one immediate data variable. +@compute @workgroup_size(1) +fn main1() { + uses_a(); +} + +@compute @workgroup_size(1) +fn main2() { + uses_uses_a(); +} + +@compute @workgroup_size(1) +fn main3() { + uses_b(); +} + +@compute @workgroup_size(1) +fn main4() { +} + +``` + +# API + +## Limits + +One new limits: + +| Limit name | Description | Type | Limit class | Default | +| --- | --- | --- | --- | --- | +| immediateDataRangeMaxByteSize | The maximum bytes allowed value for the immediateDataRangeMaxSize | [GPUSize32](https://www.w3.org/TR/webgpu/#typedefdef-gpusize32) | [maximum](https://www.w3.org/TR/webgpu/#limit-class-maximum) | 64 | + +NOTE: 64 bytes is the sizeof(mat4x4). + +## Pipeline Layouts +One new member in `GPUPipelineLayoutDescriptor`. + +```javascript +dictionary GPUPipelineLayoutDescriptor + : GPUObjectDescriptorBase { + required sequence bindGroupLayouts; + uint32_t immediateDataRangeByteSize = 0; +}; +``` +`immediateDataRangeByteSize`: Size of immediate data range used in pipeline, type is bytes. + +NOTE: `immediateDataRangeByteSize` = sizeof(variables) + sizeof(paddings). Follow [ Aligment rules ](https://www.w3.org/TR/WGSL/#alignment-and-size) in wgsl spec. + +NOTE: two pipeline layouts are defined to be “compatible for immediate data” if they were created with identical immediate data byte size. It means immediate data values can share between pipeline layouts that are compatible for immediate data. + +NOTE: Immediate data range follow [out-of-bounds access](https://www.w3.org/TR/WGSL/#out-of-bounds-access) rules in wgsl spec. + +## GPUCommandEncoder + +Four new functions in `GPUCommandEncoder`. + +```javascript +interface mixin GPUBindingCommandsMixin { + void setImmediateDataRange(uint32_t rangeOffset, AllowSharedBufferSource data, optional dataOffset, optional size); +} +``` +NOTE: rangeOffset: Offset in bytes into immediate data range to begin writing at. Requires multiple of 4 bytes. +NOTE: dataOffset: Offset in into data to begin writing from. Given in elements if data is a TypedArray and bytes otherwise. + +# Open Questions: + +- Should pipelineLayout defines immediate range compatible? + - Implementation internal immediate data usage could easily break compatibility. Implementation needs extra + effort to ensure such compatibility. + +- Should it be allowed to use multiple `var` in one entry point? + + Currently this proposal allows only one, for simplicity. diff --git a/proposals/subgroups.md b/proposals/subgroups.md index f5dcfea37c..912ce40a8b 100644 --- a/proposals/subgroups.md +++ b/proposals/subgroups.md @@ -2,9 +2,11 @@ Status: **Draft** -Last modified: 2023-11-07 +Last modified: 2024-12-12 -Issue: #4306 +Issue: [#4306](https://github.com/gpuweb/gpuweb/issues/4306) + +Spec PR: [#4963](https://github.com/gpuweb/gpuweb/pulls/4963) # Requirements @@ -42,17 +44,18 @@ Add two new enable extensions. | Enable | Description | | --- | --- | | **subgroups** | Adds built-in values and functions for subgroups | -| **subgroups-f16** | Allows f16 to be used in subgroups operations | +| ~subgroups_f16~ | Allows f16 to be used in subgroups operations | -Note: Metal can always provide subgroups-f16, Vulkan requires +Note: Metal can always provide subgroups_f16, Vulkan requires VK_KHR_shader_subgroup_extended_types ([~61%](https://vulkan.gpuinfo.org/listdevicescoverage.php?extension=VK_KHR_shader_subgroup_extended_types&platform=all) of devices), and D3D12 requires SM6.2. -**TODO**: Can we drop **subgroups-f16**? +**TODO**: Can we drop **subgroups_f16**? According to this [analysis](https://github.com/teoxoy/gpuinfo-vulkan-query/blob/8681e0074ece1b251177865203d18b018e05d67a/subgroups.txt#L1071-L1466) Only 4% of devices that support both f16 and subgroups could not support subgroup extended types. +**RESOLVED** at F2F: remove subgroups_f16 **TODO**: Should this feature be broken down further? According to [gpuinfo.org](https://vulkan.gpuinfo.org/displaycoreproperty.php?core=1.1&name=subgroupSupportedOperations&platform=all), @@ -74,6 +77,8 @@ Some possibilities: | `subgroup_size` | u32 | Input | The size of the current subgroup | | `subgroup_invocation_id` | u32 | Input | The index of the invocation in the current subgroup | +When used in `compute` shader stage, `subgroup_size` should be considered uniform for uniformity analysis. + Note: HLSL does not expose a subgroup_id or num_subgroups equivalent. **TODO**: Can subgroup_id and/or num_subgroups be emulated efficiently and portably? @@ -81,35 +86,37 @@ Note: HLSL does not expose a subgroup_id or num_subgroups equivalent. ## Built-in Functions All built-in function can only be used in `compute` or `fragment` shader stages. -Using f16 as a parameter in any of these functions requires `subgroups-f16` to be enabled. +Using f16 as a parameter in any of these functions requires `subgroups_f16` to be enabled. | Function | Preconditions | Description | | --- | --- | --- | | `fn subgroupElect() -> bool` | | Returns true if this invocation has the lowest subgroup_invocation_id among active invocations in the subgroup | | `fn subgroupAll(e : bool) -> bool` | | Returns true if `e` is true for all active invocations in the subgroup | | `fn subgroupAny(e : bool) -> bool` | | Returns true if `e` is true for any active invocation in the subgroup | -| `fn subgroupBroadcast(e : T, id : I) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types
`I` must be i32 or u32 | Broadcasts `e` from subgroup_invocation_id `id` to all active invocations. `id` must be dynamically uniform1 | +| `fn subgroupBroadcast(e : T, id : I) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types
`I` must be i32 or u32 | Broadcasts `e` from the invocation whose subgroup_invocation_id matches `id`, to all active invocations.
`id` must be a constant-expression. Use `subgroupShuffle` if you need a non-constant `id`. | | `fn subgroupBroadcastFirst(e : T) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types | Broadcasts `e` from the active invocation with the lowest subgroup_invocation_id in the subgroup to all other active invocations | | `fn subgroupBallot(pred : bool) -> vec4` | | Returns a set of bitfields where the bit corresponding to subgroup_invocation_id is 1 if `pred` is true for that active invocation and 0 otherwise. | | `fn subgroupShuffle(v : T, id : I) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types
`I` must be u32 or i32 | Returns `v` from the active invocation whose subgroup_invocation_id matches `id` | -| `fn subgroupShuffleXor(v : T, mask : u32) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types | Returns `v` from the active invocation whose subgroup_invocation_id matches `subgroup_invocation_id ^ mask`.
`mask` must be dynamically uniform. | -| `fn subgroupShuffleUp(v : T, delta : u32) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types | Returns `v` from the active invocation whose subgroup_invocation_id matches `subgroup_invocation_id - delta` | -| `fn subgroupShuffleDown(v : T, delta : u32) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types | Returns `v` from the active invocation whose subgroup_invocation_id matches `subgroup_invocation_id + delta` | -| `fn subgroupSum(e : T) -> T` | `T` must be u32, i32, f32, or a vector of those types | Reduction
Adds `e` among all active invocations and returns that result | -| `fn subgroupExclusiveSum(e : T) -> T)` | `T` must be u32, i32, f32, f16 or a vector of those types | Exclusive scan
Returns the sum of `e` for all active invocations with subgroup_invocation_id less than this invocation | -| `fn subgroupProduct(e : T) -> T` | `T` must be u32, i32, f32, or a vector of those types | Reduction
Multiplies `e` among all active invocations and returns that result | -| `fn subgroupExclusiveProduct(e : T) -> T)` | `T` must be u32, i32, f32, f16 or a vector of those types | Exclusive scan
Returns the product of `e` for all active invocations with subgroup_invocation_id less than this invocation | +| `fn subgroupShuffleXor(v : T, mask : u32) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types | Returns `v` from the active invocation whose subgroup_invocation_id matches `subgroup_invocation_id ^ mask`.
`mask` must be dynamically uniform1 | +| `fn subgroupShuffleUp(v : T, delta : u32) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types | Returns `v` from the active invocation whose subgroup_invocation_id matches `subgroup_invocation_id - delta`
`delta` must be dynamically uniform1 | +| `fn subgroupShuffleDown(v : T, delta : u32) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types | Returns `v` from the active invocation whose subgroup_invocation_id matches `subgroup_invocation_id + delta`
`delta` must be dynamically uniform1 | +| `fn subgroupAdd(e : T) -> T` | `T` must be u32, i32, f32, or a vector of those types | Reduction
Adds `e` among all active invocations and returns that result | +| `fn subgroupExclusiveAdd(e : T) -> T)` | `T` must be u32, i32, f32, f16 or a vector of those types | Exclusive scan
Returns the sum of `e` for all active invocations with subgroup_invocation_id less than this invocation | +| `fn subgroupInclusiveAdd(e : T) -> T)` | `T` must be u32, i32, f32, f16 or a vector of those types | Inclusive scan
Returns the sum of `e` for all active invocations with subgroup_invocation_id less than or equal to this invocation | +| `fn subgroupMul(e : T) -> T` | `T` must be u32, i32, f32, or a vector of those types | Reduction
Multiplies `e` among all active invocations and returns that result | +| `fn subgroupExclusiveMul(e : T) -> T)` | `T` must be u32, i32, f32, f16 or a vector of those types | Exclusive scan
Returns the product of `e` for all active invocations with subgroup_invocation_id less than this invocation | +| `fn subgroupInclusiveMul(e : T) -> T)` | `T` must be u32, i32, f32, f16 or a vector of those types | Inclusive scan
Returns the product of `e` for all active invocations with subgroup_invocation_id less than or equal to this invocation | | `fn subgroupAnd(e : T) -> T` | `T` must be u32, i32, or a vector of those types | Reduction
Performs a bitwise and of `e` among all active invocations and returns that result | | `fn subgroupOr(e : T) -> T` | `T` must be u32, i32, or a vector of those types | Reduction
Performs a bitwise or of `e` among all active invocations and returns that result | | `fn subgroupXor(e : T) -> T` | `T` must be u32, i32, or a vector of those types | Reduction
Performs a bitwise xor of `e` among all active invocations and returns that result | | `fn subgroupMin(e : T) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types | Reduction
Performs a min of `e` among all active invocations and returns that result | | `fn subgroupMax(e : T) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types | Reduction
Performs a max of `e` among all active invocations and returns that result | -| `fn quadBroadcast(e : T, id : I)` | `T` must be u32, i32, f32, f16 or a vector of those types
`I` must be u32 or i32 | Broadcasts `e` from the quad invocation with id equal to `id`
`e` must be a constant-expression2 | -| `fn quadSwapX(e : T)` | `T` must be u32, i32, f32, f16 or a vector of those types | Swaps `e` between invocations in the quad in the X direction | -| `fn quadSwapY(e : T)` | `T` must be u32, i32, f32, f16 or a vector of those types | Swaps `e` between invocations in the quad in the Y direction | -| `fn quadSwapDiagonal(e : T)` | `T` must be u32, i32, f32, f16 or a vector of those types | Swaps `e` between invocations in the quad diagnoally | +| `fn quadBroadcast(e : T, id : I)` | `T` must be u32, i32, f32, f16 or a vector of those types
`I` must be u32 or i32 | Broadcasts `e` from the quad invocation with id equal to `id`
`id` must be a constant-expression2 | +| `fn quadSwapX(e : T) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types | Swaps `e` between invocations in the quad in the X direction | +| `fn quadSwapY(e : T) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types | Swaps `e` between invocations in the quad in the Y direction | +| `fn quadSwapDiagonal(e : T) -> T` | `T` must be u32, i32, f32, f16 or a vector of those types | Swaps `e` between invocations in the quad diagnoally | 1. This is the first instance of dynamic uniformity. See the portability and uniformity section for more details. -2. Unlike `subgroupBroadcast`, SPIR-V does not have a shuffle operation to fall back on, so this requirement must be surfaced. +2. Unlike `subgroupBroadcast`, there is no alternative if the author wants a non-constant `id`: SPIR-V does not have a quad shuffle operation to fall back on. **TODO**: Are quad operations worth it? Quad operations present even less portability than subgroup operations due to @@ -153,12 +160,18 @@ Add new diagnostic controls: | Filterable Triggering Rule | Default Severity | Triggering Location | Description | | --- | --- | --- | --- | | **subgroup_uniformity** | Error | Call site of a subgroup builtin function | A call to a subgroup builtin that the uniformity analysis cannot prove occurs in uniform control flow (or with uniform parameter values in some cases) | -| **subgroup_branching** | Error | Call site of a subgroup builtin function | A call to a subgroup builtin that uniformity analysis cannot prove is preceeded only by uniform branches | +| ~subgroup_branching~ | Error | Call site of a subgroup builtin function | A call to a subgroup builtin that uniformity analysis cannot prove is preceeded only by uniform branches | **TODO**: Are these defaults appropriate? They attempt to default to the most portable behavior, but that means it would be an error to have a subgroup operation preceeded by divergent control flow. +Issue: after internal testing, we found subgroup_branching to be very onerous. +Disabling subgroup_uniformity on a builtin would require also disabling subgroup_branching in +almost all cases. +Additionally, simple, extremely common patterns would be rejected by the diagnostic +(e.g. initializing a workgroup variable with a subset of invocations). + # API ## GPU Feature @@ -167,7 +180,7 @@ New GPU features: | Feature | Description | | --- | --- | | **subgroups** | Allows the WGSL feature and adds new limits | -| **subgroups-f16** | Allows WGSL feature. Requires **subgroups** and **shader-f16** | +| ~subgroups-f16~ | Allows WGSL feature. Requires **subgroups** and **shader-f16** | **TODO**: Can we expose a feature to require a specific subgroup size? No facility exists in Metal so it would have to be a separate feature. @@ -176,9 +189,9 @@ In Vulkan, pipelines can specify a required size between min and max using subgroup size control. This is a requested feature (see #3950). -## Limits +## Adapter Info -Two new limits: +Two new entries in GPUAdapterInfo: | Limit | Description | Vulkan | Metal | D3D12 | --- | --- | --- | --- | --- | | subgroupMinSize | Minimum subgroup size | minSubgroupSize from VkPhysicalDeviceSubgroupSizeProperties[EXT] | 4 | WaveLaneCountMin from D3D12_FEATURE_DATA_D3D12_OPTIONS1 | @@ -229,20 +242,69 @@ D3D12 would have to be proven empricially. | `subgroupShuffleXor` | OpGroupNonUniformShuffleXor | simd_shuffle_xor | WaveReadLaneAt with index equal `subgroup_invocation_id ^ mask` | | `subgroupShuffleUp` | OpGroupNonUniformShuffleUp | simd_shuffle_up | WaveReadLaneAt with index equal `subgroup_invocation_id - delta` | | `subgroupShuffleDown` | OpGroupNonUniformShuffleDown | simd_shuffle_down | WaveReadLaneAt with index equal `subgroup_invocation_id + delta` | -| `subgroupSum` | OpGroupNonUniform[IF]Add with Reduce operation | simd_sum | WaveActiveSum | -| `subgroupExclusiveSum` | OpGroupNonUniform[IF]Add with ExclusiveScan operation | simd_prefix_exclusive_sum | WavePrefixSum | -| `subgroupProduct` | OpGroupNonUniform[IF]Mul with Reduce operation | simd_product | WaveActiveProduct | -| `subgroupExclusiveProduct` | OpGroupNonUniform[IF]Add with ExclusiveScan operation | simd_prefix_exclusive_product | WavePrefixProduct | +| `subgroupAdd` | OpGroupNonUniform[IF]Add with Reduce operation | simd_sum | WaveActiveSum | +| `subgroupExclusiveAdd` | OpGroupNonUniform[IF]Add with ExclusiveScan operation | simd_prefix_exclusive_sum | WavePrefixSum | +| `subgroupInclusiveAdd` | OpGroupNonUniform[IF]Add with InclusiveScan operation | simd_prefix_inclusive_sum | WavePrefixSum(x) + x | +| `subgroupMul` | OpGroupNonUniform[IF]Mul with Reduce operation | simd_product | WaveActiveProduct | +| `subgroupExclusiveMul` | OpGroupNonUniform[IF]Add with ExclusiveScan operation | simd_prefix_exclusive_product | WavePrefixProduct | +| `subgroupInclusiveMul` | OpGroupNonUniform[IF]Add with InclusiveScan operation | simd_prefix_inclusive_product | WavePrefixProduct(x) * x | | `subgroupAnd` | OpGroupNonUniformBitwiseAnd with Reduce operation | simd_and | WaveActiveBitAnd | | `subgroupOr` | OpGroupNonUniformBitwiseOr with Reduce operation | simd_or | WaveActiveBitOr | | `subgroupXor` | OpGroupNonUniformBitwiseXor with Reduce operation | simd_xor | WaveActiveBitXor | | `subgroupMin` | OpGroupNonUniform[SUF]Min with Reduce operation | simd_min | WaveActiveMin | | `subgroupMax` | OpGroupNonUniform[SUF]Max with Reduce operation | simd_max | WaveActiveMax | | `quadBroadcast` | OpGroupNonUniformQuadBroadcast | quad_broadcast | QuadReadLaneAt | -| `quadSwapX` | OpGroupNonUniformQuadSwap with Direction=0 | quad_shuffle with `quad_lane_id=thread_index_in_quad_group ^ 0x1` | QuadReadAcrossX | -| `quadSwapY` | OpGroupNonUniformQuadSwap with Direction=1 | quad_shuffle with `quad_lane_id=thread_index_in_quad_group ^ 0x10` | QuadReadAcrossY | -| `quadSwapDiagonal` | OpGroupNonUniformQuadSwap with Direction=2 | quad_shuffle with `quad_lane_id=thread_index_in_quad_group ^ 0x11` | QuadReadAcrossDiagonal | +| `quadSwapX` | OpGroupNonUniformQuadSwap with Direction=0 | quad_shuffle with `quad_lane_id=thread_index_in_quad_group ^ 1` (xor bits `01`) | QuadReadAcrossX | +| `quadSwapY` | OpGroupNonUniformQuadSwap with Direction=1 | quad_shuffle with `quad_lane_id=thread_index_in_quad_group ^ 2` (xor bits `10`) | QuadReadAcrossY | +| `quadSwapDiagonal` | OpGroupNonUniformQuadSwap with Direction=2 | quad_shuffle with `quad_lane_id=thread_index_in_quad_group ^ 3` (xor bits `11`) | QuadReadAcrossDiagonal | 1. All group non-uniform instructions use the `Subgroup` scope. 2. To avoid constant-expression requirement, use SPIR-V 1.5 or OpGroupNonUniformShuffle. + +# Appendix C: CTS Status + +Last updated: 2024-12-18 + +| Built-in value | Validation | Compute | Fragment | +| --- | --- | --- | --- | +| `subgroup_invocation_id` | ✓ | ✓ | ✓ | +| `subgroup_size` | ✓ | ✓ | ✓ | + +| Built-in function | Validation | Compute | Fragment | +| --- | --- | --- | --- | +| `subgroupElect` | ✓ | ✓ | ✓ | +| `subgroupAll` | ✓ | ✓ | ✓ | +| `subgroupAny` | ✓ | ✓ | ✓ | +| `subgroupBroadcast` | ✓ | ✓ | ✓ | +| `subgroupBroadcastFirst` | ✓ | ✓ | ✓ | +| `subgroupBallot` | ✓ | ✓ | ✓ | +| `subgroupShuffle` | ✓ | ✓ | ✓ | +| `subgroupShuffleXor` | ✓ | ✓ | ✓ | +| `subgroupShuffleUp` | ✓ | ✓ | ✓ | +| `subgroupShuffleDown` | ✓ | ✓ | ✓ | +| `subgroupAdd` | ✓ | ✓ | ✓ | +| `subgroupExclusiveAdd` | ✓ | ✓ | ✓ | +| `subgroupInclusiveAdd` | ✓ | ✓ | ✓ | +| `subgroupMul` | ✓ | ✓ | ✓ | +| `subgroupExclusiveMul` | ✓ | ✓ | ✓ | +| `subgroupInclusiveMul` | ✓ | ✓ | ✓ | +| `subgroupAnd` | ✓ | ✓ | ✓ | +| `subgroupOr` | ✓ | ✓ | ✓ | +| `subgroupXor` | ✓ | ✓ | ✓ | +| `subgroupMin` | ✓ | ✓ | ✓ | +| `subgroupMax` | ✓ | ✓ | ✓ | +| `quadBroadcast` | ✓ | ✓ | ✓ | +| `quadSwapX` | ✓ | ✓ | ✓ | +| `quadSwapY` | ✓ | ✓ | ✓ | +| `quadSwapDiagonal` | ✓ | ✓ | ✓ | + +| Diagnostic | Validation | +| --- | --- | +| `subgroup_uniformity` | ✓ | + +| Uniformity analysis | Validation | +| --- | --- | +| `subgroup_size` uniform in compute | ✓ | +| Built-in functions require uniformity | ✓ | +| Shuffle delta/mask params require uniformity | ✓ | diff --git a/proposals/texel-buffers.md b/proposals/texel-buffers.md new file mode 100644 index 0000000000..bd8d31a802 --- /dev/null +++ b/proposals/texel-buffers.md @@ -0,0 +1,415 @@ +# Texel Buffers + +**Roadmap:** This proposal is **under active development, but has not been standardized for inclusion in the WebGPU specification. The proposal is likely to change before it is standardized.** WebGPU implementations **must not** expose this functionality; doing so is a spec violation. Note however, an implementation might provide an option (e.g. command line flag) to enable a draft implementation, for developers who want to test this proposal. + +Last modified: 2024-10-07 + +Issue: [#162](https://github.com/gpuweb/gpuweb/issues/162) + +# WGSL + + +## Extension Names + +Add `'texel_buffer'` as a new language extension name. + + +## Language Extensions + +[[Add new table entry to *Language-extensions*]] + +| WGSL language extension | Description | +| ----------------------- | ------------------------------------------------------------------------ | +| **texel_buffer** | Allows the use of the `texel_buffer` type and related builtin functions. | + + +## Texel Buffer Types + +[[New subsection of **Texture and Sampler Types**]] + +A **texel buffer** supports accessing texels stored in a 1D buffer using texture load and store functions. + +Unlike other WGSL texture types, the texels of a texel buffer are stored in a `GPUBuffer`, and bound to the pipeline via a `GPUTexelBufferView`. +Additionally, the maximum number of texels in a texel buffer is often much larger than for storage textures. See https://gpuweb.github.io/gpuweb/#supported-limits + +A texel buffer type must be parameterized by one of the [texel formats](https://w3.org/TR/WGSL/#texel-formats) for storage textures. +The texel format determines the conversion function as specified in [Texel Formats](https://w3.org/TR/WGSL/#texel-formats). + +For a `textureStore` operation, the inverse of the conversion function is used to convert the shader value to the stored texel. + +| Type | Description | +| ------------------------------------ | ------------------------------ | +| **texel_buffer**<_Format_, _Access_> | A texel buffer type that accesses buffer data using texture functions. | + +- _Format_ must be an enumerant for one of the texel formats for storage textures +- _Access_ must be `read` or `read_write` + +Writes to texel buffers are visible to the same invocation, and can be synchronized with other invocations from the same workgroup using a `textureBarrier`. + + +## Restrictions on Functions + +Add `texel_buffer` to the list of valid function parameter types. + + +## Texture Built-in Functions + +[[Add new overloads]] + +| Parameterization | Overload | +| ------------------------------ | ------------------------ | +| _AM_ is `read` or `read_write` | `@must_use fn textureDimensions(t : texel_buffer) -> u32` | +| _C_ is `i32` or `u32`
_AM_ is `read` or `read_write`
_CF_ depends on the storage texel format _F_. [See the texel format table](https://w3.org/TR/WGSL/#storage-texel-formats) for the mapping of texel format to channel format. | `@must_use fn textureLoad(t : texel_buffer, coords : C) -> vec4` | +| _C_ is `i32` or `u32`
_CF_ depends on the storage texel format _F_. [See the texel format table](https://w3.org/TR/WGSL/#storage-texel-formats) for the mapping of texel format to channel format. | `@must_use fn textureStore(t : texel_buffer, coords : C, value: vec4)` | + + +# API + + +## Limits + +| Limit name | Type | Limit class | Default | +| ---------------------- | ----------- | ----------- | ------------------------- | +| **maxTexelBufferSize** | `GPUSize64` | maximum | 134217728 bytes (128 MiB) | + + +## Adapter Capability Guarantees + +Add "`maxTexelBufferSize` must be <= `maxBufferSize`". + + +## Resource Usages + +[[Modify description of internal usages]] + +**storage**
+Read/write storage resource binding. Allowed by buffer `STORAGE`, texture `STORAGE_BINDING`, or buffer `TEXEL_BUFFER`. + +**storage-read**
+Read-only storage resource bindings. Preserves the contents. Allowed by buffer `STORAGE`, texture `STORAGE_BINDING`, or buffer `TEXEL_BUFFER`. + + +## Buffer Usages + +[[Add new const to `GPUBufferUsage` namespace]] + +```javascript + const GPUFlagsConstant TEXEL_BUFFER = 0x0400; +``` + + +## GPUTexelBufferView + +[[New subsection of **Textures and Texture Views**]] + +A `GPUTexelBufferView` is a view onto some subset of the buffer subresources defined by a particular `GPUBuffer`. + +```javascript +[Exposed=(Window, Worker), SecureContext] +interface GPUTexelBufferView { +}; +GPUTexelBufferView includes GPUObjectBase; +``` + +`GPUTexelBufferView` has the following immutable properties: + +> **[[buffer]], readonly**
+> The `GPUBuffer` into which this is a view. +> +> **[[descriptor]], readonly**
+> The `GPUTexelBufferViewDescriptor` describing this texel buffer view. +> +> All optional fields of `GPUTexelBufferViewDescriptor` are defined. + + +### Texel Buffer View Creation + +```javascript +dictionary GPUTexelBufferViewDescriptor : GPUObjectDescriptorBase { + GPUTextureFormat format; + GPUSize64 offset = 0; + GPUSize64 size; +}; +``` + +`GPUTexelBufferViewDescriptor` has the following members: + +> **format, of type GPUTextureFormat**
+> The format of the texel buffer view. +> +> **offset, of type GPUSize64, defaulting to 0**
+> The offset, in bytes, from the beginning of the buffer to the range exposed by the texel buffer view. +> +> **size, of type GPUSize64**
+> The size, in bytes, of the texel buffer view. If not provided, specifies the range starting at `offset` and ending at the end of the buffer. + +**createView(descriptor)**
+Creates a `GPUTexelBufferView`. + +> **Called on:** `GPUBuffer` _this_. +> +> **Arguments:** +> +> | Parameter | Type | `Nullable` | `Optional` | Description | +> | ------------ | ------------------------------ | ---------- | ---------- | ---------------- | +> | `descriptor` | `GPUTexelBufferViewDescriptor` | ✘ | ✔ | Description of the `GPUTexelBufferView` to create. | +> +> **Returns:** _view_, of type `GPUTexelBufferView`. +> +> [Content timeline](https://w3.org/TR/WGSL/#content-timeline) steps: +> +> 1. ? Validate +> 2. Let _view_ be ! [create a new WebGPU object](https://w3.org/TR/WGSL/#abstract-opdef-create-a-new-webgpu-object)(_this_, `GPUTexelBufferView`, _descriptor_) +> 3. Issue the _initialization steps_ on the Device timeline of _this_. +> 4. Return _view_. +> +> [Device timeline](https://w3.org/TR/WGSL/#device-timeline) steps: +> +> 1. If any of the following conditions are unsatisfied generate a validation error, invalidate _view_ and return. +> - _this_ is valid to use with _this_.[[device]]. +> - _this_.usage must contain the `TEXEL_BUFFER` bit +> - _descriptor_.`offset` + _descriptor_.`size` must be <= _this_.`size` +> - _descriptor_.`size` must be <= _limits_.`maxTexelBufferSize`. +> - _descriptor_.`size` must be a multiple of the texel size of _descriptor_.`format`. +> - _descriptor_.`offset` must be a multiple of `256`. +> 2. Let _view_ be a new `GPUTexelBufferView` object. +> 3. Set _view_.[[buffer]] to _this_. +> 4. Set _view_.[[descriptor]] to _descriptor_. + + +## Bind Group Layout Creation + +[[Add new field to **GPUBindGroupLayoutEntry**]] + +```javascript +GPUTexelBufferBindingLayout texelBuffer; +``` + +**texelBuffer, of type [GPUTexelBufferBindingLayout]**
+When provided, indicates the binding resource type for this `GPUBindGroupLayoutEntry` is `GPUTexelBufferBindingLayout`. + +[[Add new entry to table of `GPUBindGroupLayoutEntry` members]] + +| Binding member | Resource type | Binding type | Binding usage | +| -------------- | ---------------------- | --------------------------------- | ------------- | +| texelBuffer | `GPUTexelBufferView` | `storage`
`read-only-storage` | storage
storage-read | + +**TODO:** Do these use buffer slots, texture slots, storage texture slots, or a new type of slot? + +[[Add new enum and dictionary]] + +```javascript +enum GPUTexelBufferAccess { + "read-only", + "read-write", +}; + +dictionary GPUTexelBufferBindingLayout { + GPUTexelBufferAccess access = "read-write"; + GPUTextureFormat format; +}; +``` + +`GPUTexelBufferBindingLayout` dictionaries have the following members: + +**access, of type GPUTexelBufferAccess, defaulting to "read-write"**
+Indicates the access mode that will be used for texel buffer views bound to this binding. +**format, of type GPUTextureFormat**
+The required format of texel buffer views bound to this binding. + +[[Add new validation when *entry*.`visibility` includes `VERTEX` ]] + +* If *entry*.`texelBuffer` is provided, *entry*.`texelBuffer`.`access` must be `"read-only"`. + + +## Bind Group Creation + +[[Add new validation rules for `GPUBindGroupEntry` in `createBindGroup`]] + +**texelBuffer** + +- _resource_ is a `GPUTexelBufferView`. +- _resource_ is valid to use with _this_. +- _layoutBinding_.texelBuffer.format is equal to _resource_.format. +- _resource_.[[buffer]].usage includes `TEXEL_BUFFER`. + + +## Default Pipeline Layout + +[[Add new steps for creating default pipeline layout]] + +> If _resource_ is for a texel buffer binding: +> +> - Let _texelBufferLayout_ be a new `GPUTexelBufferBindingLayout`. +> - Set _texelBufferLayout_.format to _resource_’s format. +> - If the access mode is:
+> -> **read**
+> Set _texelBufferLayout_.access to `"read-only"`.
+> -> **read_write**
+> Set _texelBufferLayout_.access to `"read-write"`. +> - Set _entry_.texelBuffer to _texelBufferLayout_. + + +## Bind Groups + +[[Add new aliasing limitations for texel buffers]] + +**Replace:** “writable buffer binding range” with “writable buffer binding range or texel buffer view” + +**Replace:** “of the same buffer” with “of the same buffer or texel buffer view” + + +## Plain color formats + +[[Add new column to format table for `TEXEL_BUFFER`]] + +| Format | `TEXEL_BUFFER` | +| ------------------------- | -------------- | +| **8-bit per component** | | +| `r8unorm` | | +| `r8snorm` | | +| `r8uint` | | +| `r8sint` | | +| `rg8unorm` | | +| `rg8snorm` | | +| `rg8uint` | | +| `rg8sint` | | +| `rgba8unorm` | ✔ | +| `rgba8unorm-srgb` | | +| `rgba8snorm` | | +| `rgba8uint` | ✔ | +| `rgba8sint` | ✔ | +| `bgra8unorm` | | +| `bgra8unorm-srgb` | | +| **16-bit per component** | | +| `r16uint` | | +| `r16sint` | | +| `r16float` | | +| `rg16uint` | | +| `rg16sint` | | +| `rg16float` | | +| `rgba16uint` | ✔ | +| `rgba16sint` | ✔ | +| `rgba16float` | ✔ | +| **32-bit per component** | | +| `r32uint` | ✔ | +| `r32sint` | ✔ | +| `r32float` | ✔ | +| `rg32uint` | | +| `rg32sint` | | +| `rg32float` | | +| `rgba32uint` | ✔ | +| `rgba32sint` | ✔ | +| `rgba32float` | ✔ | +| **mixed component width** | | +| `rgb10a2uint` | | +| `rgb10a2unorm` | | +| `rg11b10ufloat` | | + + +# Appendix A: Implementation details + + +### Vulkan + +In Vulkan, a `read_write` texel buffer would map to a storage texel buffer decorated as `Coherent`, and shader accesses would be performed with the `OpImageRead` and `OpImageWrite` instructions. +A texel buffer with a `read-only` access mode could use a uniform texel buffer instead, which would use `OpImageFetch` instead `OpImageRead`. + +When the `TEXEL_BUFFER` usage flag is set on buffer creation, both of the Vulkan texel buffer bits would be set:
+`VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT` + +The [required image formats](https://registry.khronos.org/vulkan/specs/1.2-extensions/html/vkspec.html#features-required-format-support) for storage texel buffers includes: + +``` +R8G8B8A8_UNORM +R8G8B8A8_UINT +R8G8B8A8_SINT +R16G16B16A16_UINT +R16G16B16A16_SINT +R16G16B16A16_SFLOAT +R32_UINT +R32_SINT +R32_SFLOAT +R32G32_UINT +R32G32_SINT +R32G32_SFLOAT +R32G32B32A32_UINT +R32G32B32A32_SINT +R32G32B32A32_SFLOAT +``` + +For the other formats, [gpuinfo.org](https://vulkan.gpuinfo.org/listbufferformats.php) has information on how widespread support is. +For 1- and 2-channel `R{8,16}_{SINT,UINT,SFLOAT}`, support is currently around 80% for storage texel buffers. + +**TODO:** We should do this query against WebGPU's baseline requirements, as the percentage for devices we actually support may be higher. + +Vulkan has a `maxTexelBufferElements` limit for the maximum size of a texel buffer. +[gpuinfo.org shows that](https://vulkan.gpuinfo.org/displaydevicelimit.php?name=maxTexelBufferElements&platform=all) more than 85% of devices support 128MB texel buffers. + + +### Metal + +Metal has a `texel_buffer` type that provides similar functionality, which was introduced in Metal 2.1. +The [Metal Feature Set Tables](https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf) show the supported formats for each access mode. +Unnormalized integer and floating point formats are supported for all access modes, as is `RGBA8Unorm`. +Using a `read-write` access mode also requires support for the Tier 2 `MTLReadWriteTextureTier`. +A `mem_texture` fence would be needed to make texel buffer writes visible within an invocation. + +To get coverage on older Metal versions, it would be possible to polyfill by using a regular device buffer and doing the format conversions inside the shader. +This requires that the storage format is specified inside the shader. + +The maximum texel buffer size is 64 M pixels for the Apple2 GPU family, and 256 M pixels for Apple3 and above. +The texel buffer size is also bounded above by the generic buffer size constraint. + +**TODO**: Get data for non-Apple GPUs. + + +### D3D12 + +In D3D12, a texel buffer can map to an Unordered Access View (UAV) for a buffer with a `DXGI_FORMAT`, and that UAV can be accessed in the shader with 32-bit result types. +See [Typed unordered access view (UAV) loads](https://docs.microsoft.com/en-us/windows/win32/direct3d12/typed-unordered-access-view-loads). +The `RWBuffer` should be prefixed with `globallycoherent`, and the element type needs to be prefixed with `unorm` or `snorm` if a normalized format is being used. + +Format support for typed UAV loads and stores in D3D12 can be checked [here](https://docs.microsoft.com/en-us/windows/win32/direct3ddxgi/hardware-support-for-direct3d-12-0-formats). +The set of required formats includes: + +``` +R8G8B8A8_UNORM +R8G8B8A8_UINT +R8G8B8A8_SINT +R16G16B16A16_UINT +R16G16B16A16_SINT +R16G16B16A16_FLOAT +R8_UINT +R8_SINT +R16_UINT +R16_SINT +R16_FLOAT +R32_UINT +R32_SINT +R32_SFLOAT +R32G32B32A32_UINT +R32G32B32A32_SINT +R32G32B32A32_FLOAT +``` + + +# Open and Resolved Questions + +1. Should this be an extension, or a core feature? + - To make it core, implementations would need to polyfill for Metal <2.1. We would also need to drop the formats that are not required everywhere (e.g. `R8_UINT`), or make them optional. + - Decision at F2F: + - Make it core. + - Drop the formats that are not widespread (leaving them for a [future texture format tier extension](https://github.com/gpuweb/gpuweb/issues/3837)). + - We do not need to support Metal <2.1 (Metal 2.2 is our minimum requirement now). +2. In the original issue it was mentioned [#162 (comment)](https://github.com/gpuweb/gpuweb/issues/162#issuecomment-452771668) + that uniform texel buffers support more formats. This proposal is only for storage texel buffers but uses the name "texture buffer" throughout. + + - Is it worth adding uniform texel buffers? Besides wider format support, are they faster? + - If the answer is yes or not sure, we should probably use "storage texture/texel buffer" for this proposal instead. + - Decision at F2F: + - Implementations may map read-only texel buffer to uniform texel buffers in the underlying API. + This unifies things on the WebGPU side. + The texture format table can then enable usage of more formats for read-only texel buffers. +3. Do we need the new createView method on the buffer? Implementations could create the view at bind time, assuming they are light weight. + - Corentin Wallez raised this at the F2F. diff --git a/spec/diagrams/buffer-map-failure.mmd b/spec/diagrams/buffer-map-failure.mmd index fa80f6b377..9fabf8d04c 100644 --- a/spec/diagrams/buffer-map-failure.mmd +++ b/spec/diagrams/buffer-map-failure.mmd @@ -1,6 +1,6 @@ sequenceDiagram Note over Content timeline: [[mapping]] is null
[[pending_map]] is null - Note over Device timeline: [[internals]].[[state]] is "available" + Note over Device timeline: [[internal state]] is "available" Content timeline ->> Device timeline: mapAsync() Note over Content timeline: [[mapping]] is null
[[pending_map]] is non-null Note over Device timeline: (failure, state unchanged) diff --git a/spec/diagrams/buffer-map-unmap.mmd b/spec/diagrams/buffer-map-unmap.mmd index 1046396c40..078602bf50 100644 --- a/spec/diagrams/buffer-map-unmap.mmd +++ b/spec/diagrams/buffer-map-unmap.mmd @@ -1,12 +1,12 @@ sequenceDiagram Note over Content timeline: [[mapping]] is null
[[pending_map]] is null - Note over Device timeline: [[internals]].[[state]] is "available" + Note over Device timeline: [[internal state]] is "available" Content timeline ->> Device timeline: mapAsync() Note over Content timeline: [[mapping]] is null
[[pending_map]] is non-null - Note over Device timeline: [[internals]].[[state]] is "unavailable" + Note over Device timeline: [[internal state]] is "unavailable" Device timeline ->> Content timeline: mapAsync() response - Note over Device timeline: [[internals]].[[state]] is "unavailable" + Note over Device timeline: [[internal state]] is "unavailable" Note over Content timeline: [[mapping]] is non-null
[[pending_map]] is null Content timeline ->> Device timeline: unmap() Note over Content timeline: [[mapping]] is null
[[pending_map]] is null - Note over Device timeline: [[internals]].[[state]] is "available" + Note over Device timeline: [[internal state]] is "available" diff --git a/spec/img/buffer-map-failure.mmd.svg b/spec/img/buffer-map-failure.mmd.svg index aae83a462e..ae1f621b11 100644 --- a/spec/img/buffer-map-failure.mmd.svg +++ b/spec/img/buffer-map-failure.mmd.svg @@ -1 +1 @@ -Device timelineContent timelineDevice timelineContent timeline[[mapping]] is null[[pending_map]] is null[[internals]].[[state]] is "available"[[mapping]] is null[[pending_map]] is non-null(failure, state unchanged)[[mapping]] is null[[pending_map]] is nullmapAsync()mapAsync() response \ No newline at end of file +Device timelineContent timelineDevice timelineContent timeline[[mapping]] is null[[pending_map]] is null[[internal state]] is "available"[[mapping]] is null[[pending_map]] is non-null(failure, state unchanged)[[mapping]] is null[[pending_map]] is nullmapAsync()mapAsync() response \ No newline at end of file diff --git a/spec/img/buffer-map-unmap.mmd.svg b/spec/img/buffer-map-unmap.mmd.svg index c258034bcb..cc3f2cc76c 100644 --- a/spec/img/buffer-map-unmap.mmd.svg +++ b/spec/img/buffer-map-unmap.mmd.svg @@ -1 +1 @@ -Device timelineContent timelineDevice timelineContent timeline[[mapping]] is null[[pending_map]] is null[[internals]].[[state]] is "available"[[mapping]] is null[[pending_map]] is non-null[[internals]].[[state]] is "unavailable"[[internals]].[[state]] is "unavailable"[[mapping]] is non-null[[pending_map]] is null[[mapping]] is null[[pending_map]] is null[[internals]].[[state]] is "available"mapAsync()mapAsync() responseunmap() \ No newline at end of file +Device timelineContent timelineDevice timelineContent timeline[[mapping]] is null[[pending_map]] is null[[internal state]] is "available"[[mapping]] is null[[pending_map]] is non-null[[internal state]] is "unavailable"[[internal state]] is "unavailable"[[mapping]] is non-null[[pending_map]] is null[[mapping]] is null[[pending_map]] is null[[internal state]] is "available"mapAsync()mapAsync() responseunmap() \ No newline at end of file diff --git a/spec/img/cubemap.svg b/spec/img/cubemap.svg new file mode 100644 index 0000000000..19c8d99915 --- /dev/null +++ b/spec/img/cubemap.svg @@ -0,0 +1,45 @@ + + + + + + + + + +U+V + + + + + + + + [0]+X + [1]-X + [2]+Y + [3]-Y + [4]+Z + [5]-Z + + + diff --git a/spec/img/favicon-32x32.png b/spec/img/favicon-32x32.png new file mode 100644 index 0000000000..3ca4c8e5e5 Binary files /dev/null and b/spec/img/favicon-32x32.png differ diff --git a/spec/img/favicon-96x96.png b/spec/img/favicon-96x96.png new file mode 100644 index 0000000000..95ba36dfed Binary files /dev/null and b/spec/img/favicon-96x96.png differ diff --git a/spec/img/framebuffer-coordinates.svg b/spec/img/framebuffer-coordinates.svg new file mode 100644 index 0000000000..8b03dea240 --- /dev/null +++ b/spec/img/framebuffer-coordinates.svg @@ -0,0 +1,31 @@ + + + + + \ No newline at end of file diff --git a/spec/img/logo.png b/spec/img/logo.png new file mode 100644 index 0000000000..f077a706fb Binary files /dev/null and b/spec/img/logo.png differ diff --git a/spec/img/normalized-device-coordinates.svg b/spec/img/normalized-device-coordinates.svg new file mode 100644 index 0000000000..3a685c2eec --- /dev/null +++ b/spec/img/normalized-device-coordinates.svg @@ -0,0 +1,31 @@ + + + + + \ No newline at end of file diff --git a/spec/img/uv-coordinates.svg b/spec/img/uv-coordinates.svg new file mode 100644 index 0000000000..2d02198a29 --- /dev/null +++ b/spec/img/uv-coordinates.svg @@ -0,0 +1,31 @@ + + + + + \ No newline at end of file diff --git a/spec/index.bs b/spec/index.bs index 724c6e9d2b..1a1f4a1ce5 100644 --- a/spec/index.bs +++ b/spec/index.bs @@ -3,11 +3,12 @@ Title: WebGPU Shortname: webgpu Level: None Status: w3c/ED -Group: webgpu +Group: gpuwg ED: https://gpuweb.github.io/gpuweb/ TR: https://www.w3.org/TR/webgpu/ Repository: gpuweb/gpuweb !Participate: File an issue (open issues) +!Test Suite: WebGPU CTS Editor: Kai Ninomiya, Google https://www.google.com, kainino@google.com, w3cid 99487 Editor: Brandon Jones, Google https://www.google.com, bajones@google.com, w3cid 87824 @@ -21,19 +22,7 @@ Markup Shorthands: dfn yes Markup Shorthands: idl yes Markup Shorthands: css no Assume Explicit For: yes - - -
-{
-    "SourceMap": {
-        "authors": [
-            "John Lenz",
-            "Nick Fitzgerald"
-        ],
-        "href": "https://sourcemaps.info/spec.html",
-        "title": "Source Map Revision 3 Proposal"
-    }
-}
+Deadline: 2025-02-28
 
+ +
+{
+  "vulkan": {
+    "authors": [
+      "The Khronos Vulkan Working Group"
+    ],
+    "href": "https://registry.khronos.org/vulkan/specs/1.3/html/vkspec.html",
+    "title": "Vulkan 1.3",
+    "publisher": "Khronos",
+    "deliveredBy": [
+      "https://www.khronos.org/"
+    ]
+  }
+}
 
+ + +