-
Notifications
You must be signed in to change notification settings - Fork 648
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add compact string encoding library #320
Open
d1ll0n
wants to merge
5
commits into
transmissions11:main
Choose a base branch
from
d1ll0n:packed-strings
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
// SPDX-License-Identifier: AGPL-3.0-only | ||
pragma solidity >=0.8.0; | ||
|
||
import {DSTestPlus} from "./utils/DSTestPlus.sol"; | ||
|
||
import {PackedStringLib} from "../utils/PackedStringLib.sol"; | ||
|
||
contract PackedStringLibTest is DSTestPlus { | ||
function testPackString(string calldata data, bytes calldata brutalizeWith) | ||
external | ||
packableString(data) | ||
brutalizeMemory(brutalizeWith) | ||
{ | ||
uint256 length = bytes(data).length; | ||
|
||
bytes32 packedString = PackedStringLib.packString(data); | ||
|
||
if (length == 0) { | ||
assertEq(packedString, bytes32(0), "Packed string not null with zero length"); | ||
} else { | ||
// First byte is length | ||
assertEq(uint256(packedString) >> 248, length, "First byte does not match string length"); | ||
// Last 31 bytes are body | ||
uint256 originalBody; | ||
assembly { | ||
originalBody := calldataload(data.offset) | ||
} | ||
assertEq(uint256(packedString) << 8, originalBody, "Last 31 bytes do not match string body"); | ||
} | ||
} | ||
|
||
function testPackString(bytes calldata brutalizeWith) external brutalizeMemory(brutalizeWith) { | ||
assertEq(PackedStringLib.packString(""), bytes32(0)); | ||
assertEq(PackedStringLib.packString(string(bytes(hex"ff"))), bytes32(uint256(0x01ff) << 240)); | ||
} | ||
|
||
function testPackStringTooLong(string calldata data, bytes calldata brutalizeWith) | ||
external | ||
brutalizeMemory(brutalizeWith) | ||
{ | ||
hevm.assume(bytes(data).length > 31); | ||
hevm.expectRevert(PackedStringLib.UnpackableString.selector); | ||
PackedStringLib.packString(data); | ||
} | ||
|
||
function testPackStringTooLong() external { | ||
bytes memory data = new bytes(32); | ||
hevm.expectRevert(PackedStringLib.UnpackableString.selector); | ||
PackedStringLib.packString(string(data)); | ||
} | ||
|
||
function testUnpackStringAlwaysAllocatesTwoWords(bytes calldata brutalizeWith) | ||
external | ||
brutalizeMemory(brutalizeWith) | ||
{ | ||
uint256 freeMemPtr; | ||
assembly { | ||
freeMemPtr := mload(0x40) | ||
} | ||
string memory output = PackedStringLib.unpackString(bytes32(0)); | ||
uint256 growth; | ||
assembly { | ||
growth := sub(mload(0x40), freeMemPtr) | ||
} | ||
assertEq(growth, 0x40); | ||
assertEq(bytes(output).length, 0); | ||
} | ||
|
||
function testUnpackStringCanCorruptMemory(bytes calldata brutalizeWith) external brutalizeMemory(brutalizeWith) { | ||
// Create a badly encoded packed string advertising a length of 64 bytes | ||
bytes32 invalidPackedString = bytes32(uint256(0x40) << 248); | ||
// Unpack invalid string | ||
string memory output = PackedStringLib.unpackString(invalidPackedString); | ||
// Allocate new dynamic variable - length will overlap second word of string body | ||
bytes memory d = new bytes(32); | ||
// Addresses compiler warnings about unused variables | ||
assertEq(d.length * 2, bytes(output).length); | ||
assertEq(keccak256(bytes(output)), keccak256(abi.encodePacked(uint256(0), uint256(32)))); | ||
} | ||
|
||
function testPackUnpackString(string calldata data, bytes calldata brutalizeWith) | ||
external | ||
packableString(data) | ||
brutalizeMemory(brutalizeWith) | ||
{ | ||
assertEqIncludingPadding(PackedStringLib.unpackString(PackedStringLib.packString(data)), data); | ||
} | ||
|
||
function testPackUnpackString(bytes calldata brutalizeWith) external brutalizeMemory(brutalizeWith) { | ||
assertEqIncludingPadding(PackedStringLib.unpackString(bytes32(0)), string("")); | ||
} | ||
|
||
function testReturnUnpackedString(string calldata data, bytes calldata brutalizeWith) | ||
external | ||
packableString(data) | ||
{ | ||
assertEqIncludingPadding(this.returnUnpackedString(PackedStringLib.packString(data), brutalizeWith), data); | ||
} | ||
|
||
function testReturnUnpackedString(bytes calldata brutalizeWith) external { | ||
assertEqIncludingPadding( | ||
this.returnUnpackedString(PackedStringLib.packString("Hello world"), brutalizeWith), | ||
"Hello world" | ||
); | ||
} | ||
|
||
/** | ||
* @dev Differential fuzzing for storage and retrieval of: | ||
* - Standard string storage, returned in returndata | ||
* - Packed string, unpacked with returnUnpackedString and returned in returndata | ||
* - Packed string, unpacked with unpackString and returned in returndata | ||
* - Packed string, unpacked with unpackString in same execution context | ||
*/ | ||
function testDifferentiallyFuzzStoreRead(string memory stringyFuzzBall, bytes calldata brutalizeWith) | ||
public | ||
brutalizeMemory(brutalizeWith) | ||
{ | ||
hevm.assume(bytes(stringyFuzzBall).length < 32); | ||
this.setFuzzyStrings(stringyFuzzBall); | ||
string memory fuzzy0 = PackedStringLib.unpackString(_fuzzyStringPacked); | ||
string memory fuzzy1 = this.fuzzyString1(brutalizeWith); | ||
string memory fuzzy2 = this.fuzzyString2(brutalizeWith); | ||
string memory fuzzy3 = this.fuzzyString3(brutalizeWith); | ||
|
||
assertEqIncludingPadding(stringyFuzzBall, fuzzy0); | ||
assertEqIncludingPadding(fuzzy0, fuzzy1); | ||
assertEqIncludingPadding(fuzzy1, fuzzy2); | ||
assertEqIncludingPadding(fuzzy2, fuzzy3); | ||
} | ||
|
||
/// @dev Assert that the two strings are identical, including their trailing zeros | ||
function assertEqIncludingPadding(string memory a, string memory b) internal { | ||
bytes32 hashA; | ||
bytes32 hashB; | ||
assembly { | ||
let lenA := mload(a) | ||
let lenB := mload(b) | ||
|
||
hashA := keccak256(add(a, 32), and(add(lenA, 31), not(31))) | ||
hashB := keccak256(add(b, 32), and(add(lenB, 31), not(31))) | ||
} | ||
assertEq(hashA, hashB); | ||
} | ||
|
||
// Typical string storage | ||
string internal _fuzzyStringStandard; | ||
// Packed string storage | ||
bytes32 internal _fuzzyStringPacked; | ||
|
||
function returnUnpackedString(bytes32 packedString, bytes calldata brutalizeWith) | ||
external | ||
view | ||
brutalizeMemory(brutalizeWith) | ||
returns (string memory) | ||
{ | ||
PackedStringLib.returnUnpackedString(packedString); | ||
} | ||
|
||
// Restrict test to allowed string sizes - test success cases | ||
modifier packableString(string calldata data) { | ||
hevm.assume(bytes(data).length < 32); | ||
_; | ||
} | ||
|
||
function fuzzyString1(bytes calldata brutalizeWith) | ||
external | ||
view | ||
brutalizeMemory(brutalizeWith) | ||
returns (string memory) | ||
{ | ||
return _fuzzyStringStandard; | ||
} | ||
|
||
function fuzzyString2(bytes calldata brutalizeWith) | ||
external | ||
view | ||
brutalizeMemory(brutalizeWith) | ||
returns (string memory) | ||
{ | ||
PackedStringLib.returnUnpackedString(_fuzzyStringPacked); | ||
} | ||
|
||
function fuzzyString3(bytes calldata brutalizeWith) | ||
external | ||
view | ||
brutalizeMemory(brutalizeWith) | ||
returns (string memory) | ||
{ | ||
return PackedStringLib.unpackString(_fuzzyStringPacked); | ||
} | ||
|
||
function setFuzzyStrings(string memory stringyFuzzBall) external { | ||
_fuzzyStringStandard = stringyFuzzBall; | ||
_fuzzyStringPacked = PackedStringLib.packString(stringyFuzzBall); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
// SPDX-License-Identifier: MIT | ||
pragma solidity >=0.8.0; | ||
|
||
/// @notice Efficient library for encoding/decoding strings shorter than 32 bytes as one word. | ||
/// @notice Solidity has built-in functionality for storing strings shorter than 32 bytes in | ||
/// a single word, but it must determine at runtime whether to treat each string as one word | ||
/// or several. This introduces a significant amount of bytecode and runtime complexity to | ||
/// any contract storing strings. | ||
/// @notice When it is known in advance that a string will never be longer than 31 bytes, | ||
/// telling the compiler to always treat strings as such can greatly reduce extraneous runtime | ||
/// code that would have never been executed. | ||
/// @notice https://docs.soliditylang.org/en/v0.8.17/types.html#bytes-and-string-as-arrays | ||
/// @author Solmate (https://github.com/transmissions11/solmate/blob/main/src/utils/PackedStringLib.sol) | ||
library PackedStringLib { | ||
error UnpackableString(); | ||
|
||
/// @dev Pack a 0-31 byte string into a bytes32. | ||
/// @dev Will revert if string exceeds 31 bytes. | ||
function packString(string memory unpackedString) internal pure returns (bytes32 packedString) { | ||
uint256 length = bytes(unpackedString).length; | ||
// Verify string length and body will fit into one word | ||
if (length > 31) { | ||
revert UnpackableString(); | ||
} | ||
assembly { | ||
// -------------------------------------------------------------------------// | ||
// Layout in memory of input string (less than 32 bytes) // | ||
// Note that "position" is relative to the pointer, not absolute // | ||
// -------------------------------------------------------------------------// | ||
// Bytes | Value | Description // | ||
// -------------------------------------------------------------------------// | ||
// 0:31 | 0 | Empty left-padding for string length // | ||
// | | Not included in output // | ||
// 31:32 | length | Single-byte length between 0 and 31 // | ||
// 32:63 | body / unknown | Right-padded string body if length > 0 // | ||
// | | Unknown if length is zero // | ||
// 63:64 | 0 / unknown | Empty right-padding byte for string if // | ||
// | | length > 0; otherwise, unknown data // | ||
// | | This byte is never included in the output // | ||
// -------------------------------------------------------------------------// | ||
|
||
// Read one word starting at the last byte of the length, so that the first | ||
// byte of the packed string will be its length (left-padded) and the | ||
// following 31 bytes will contain the string's body (right-padded). | ||
packedString := mul( | ||
mload(add(unpackedString, 31)), | ||
// If length is zero, the word after length will not be allocated for | ||
// the body and may contain dirty bits. We multiply the packed value by | ||
// length > 0 to ensure the body is null if the length is zero. | ||
iszero(iszero(length)) | ||
) | ||
} | ||
} | ||
|
||
/// @dev Return the unpacked form of `packedString`. | ||
/// @notice Ends contract execution and returns the string - should only | ||
/// be used in an external function with a string return type. | ||
/// @notice Does not check `packedString` has valid encoding, assumes it was created | ||
/// by `packString`. | ||
function returnUnpackedString(bytes32 packedString) internal pure { | ||
assembly { | ||
// ---------------------------------------------------------------------// | ||
// Unpacked string layout in memory & returndata // | ||
// ---------------------------------------------------------------------// | ||
// Position | Value | Description // | ||
// ---------------------------------------------------------------------// | ||
// 0:32 | 32 | Offset to string length // | ||
// 32:63 | 0 | Empty left-padding for string length // | ||
// 63:64 | String length | Single-byte length of string // | ||
// 64:95 | String body | 0-31 byte right-padded string body // | ||
// 95:96 | 0 | Empty right-padding for string body // | ||
// ---------------------------------------------------------------------// | ||
|
||
// Write the offset to the string in the first word of scratch space. | ||
mstore(0x00, 0x20) | ||
|
||
// Note: We could shift the returndata right 32 bytes to avoid regions | ||
// that Solidity's normal memory management would contaminate; starting at | ||
// zero and manually clearing the padding bits protects against developer | ||
// error where the developer is manipulating the zero slot and using very | ||
// large numbers in the free memory pointer slot. | ||
|
||
// Clear the 0x20 and 0x40 slots to ensure dirty bits do not contaminate | ||
// the left-padding for length or right-padding for body. | ||
mstore(0x20, 0x00) | ||
mstore(0x40, 0x00) | ||
|
||
// Write the packed string to memory starting at the last byte of the | ||
// length buffer, writing the length byte to the end of the first word | ||
// and the 0-31 byte body at the start of the second word. | ||
mstore(0x3f, packedString) | ||
|
||
// Return (offset, length, body) | ||
return(0x00, 0x60) | ||
} | ||
} | ||
|
||
/// @dev Memory-safe string unpacking - updates the free memory pointer to | ||
/// allocate space for the string. Useful for strings which are used within | ||
/// the contract and not simply returned in metadata queries. | ||
/// @notice Does not check `packedString` has valid encoding, assumes it was created | ||
/// by `packString`. | ||
/// Note that supplying an input not generated by this library can result in severe memory | ||
/// corruption. The returned string can have an apparent length of up to 255 bytes and | ||
/// overflow into adjacent memory regions if it is not encoded correctly. | ||
function unpackString(bytes32 packedString) internal pure returns (string memory unpackedString) { | ||
assembly { | ||
// Set pointer for `unpackedString` to free memory pointer. | ||
unpackedString := mload(0x40) | ||
// Clear full buffer - it may contain dirty (unallocated) data. | ||
// Normally this would not matter for the trailing zeroes of the body, | ||
// but developers may assume that strings are padded to full words so | ||
// we maintain that practice here. | ||
mstore(unpackedString, 0) | ||
mstore(add(unpackedString, 0x20), 0) | ||
// Increase free memory pointer by 64 bytes to allocate space for | ||
// the string's length and body - prevents Solidity's memory | ||
// management from overwriting it. | ||
mstore(0x40, add(unpackedString, 0x40)) | ||
// Write the packed string to memory starting at the last byte of the | ||
// length buffer. This places the single-byte length at the end of the | ||
// length word and the 0-31 byte body at the start of the body word. | ||
mstore(add(unpackedString, 0x1f), packedString) | ||
} | ||
} | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can't you do something like
bytes32(bytes(unpackedString))
?The conversion would truncate the string. So you'd probably need the
if (length > 31)
check if truncation is not good enough.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also would be nice to define a custom type
type ShortString is bytes32;