Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add compact string encoding library #320

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .gas-snapshot
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,17 @@ OwnedTest:testCallFunctionAsNonOwner(address) (runs: 256, μ: 16238, ~: 16257)
OwnedTest:testCallFunctionAsOwner() (gas: 10479)
OwnedTest:testSetOwner() (gas: 13035)
OwnedTest:testSetOwner(address) (runs: 256, μ: 13151, ~: 13170)
PackedStringLibTest:testDifferentiallyFuzzStoreRead(string,bytes) (runs: 256, μ: 57342, ~: 59405)
PackedStringLibTest:testPackString(bytes) (runs: 256, μ: 1378, ~: 1372)
PackedStringLibTest:testPackString(string,bytes) (runs: 256, μ: 4617, ~: 4628)
PackedStringLibTest:testPackStringTooLong() (gas: 3237)
PackedStringLibTest:testPackStringTooLong(string,bytes) (runs: 256, μ: 4602, ~: 4596)
PackedStringLibTest:testPackUnpackString(bytes) (runs: 256, μ: 1355, ~: 1349)
PackedStringLibTest:testPackUnpackString(string,bytes) (runs: 256, μ: 4835, ~: 4838)
PackedStringLibTest:testReturnUnpackedString(bytes) (runs: 256, μ: 3068, ~: 3058)
PackedStringLibTest:testReturnUnpackedString(string,bytes) (runs: 256, μ: 6338, ~: 6346)
PackedStringLibTest:testUnpackStringAlwaysAllocatesTwoWords(bytes) (runs: 256, μ: 1178, ~: 1170)
PackedStringLibTest:testUnpackStringCanCorruptMemory(bytes) (runs: 256, μ: 1583, ~: 1577)
ReentrancyGuardTest:invariantReentrancyStatusAlways1() (runs: 256, calls: 3840, reverts: 319)
ReentrancyGuardTest:testFailUnprotectedCall() (gas: 46147)
ReentrancyGuardTest:testNoReentrancy() (gas: 7515)
Expand Down
196 changes: 196 additions & 0 deletions src/test/PackedStringLib.t.sol
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
// SPDX-License-Identifier: AGPL-3.0-only
pragma solidity >=0.8.0;

import {DSTestPlus} from "./utils/DSTestPlus.sol";

import {PackedStringLib} from "../utils/PackedStringLib.sol";

contract PackedStringLibTest is DSTestPlus {
function testPackString(string calldata data, bytes calldata brutalizeWith)
external
packableString(data)
brutalizeMemory(brutalizeWith)
{
uint256 length = bytes(data).length;

bytes32 packedString = PackedStringLib.packString(data);

if (length == 0) {
assertEq(packedString, bytes32(0), "Packed string not null with zero length");
} else {
// First byte is length
assertEq(uint256(packedString) >> 248, length, "First byte does not match string length");
// Last 31 bytes are body
uint256 originalBody;
assembly {
originalBody := calldataload(data.offset)
}
assertEq(uint256(packedString) << 8, originalBody, "Last 31 bytes do not match string body");
}
}

function testPackString(bytes calldata brutalizeWith) external brutalizeMemory(brutalizeWith) {
assertEq(PackedStringLib.packString(""), bytes32(0));
assertEq(PackedStringLib.packString(string(bytes(hex"ff"))), bytes32(uint256(0x01ff) << 240));
}

function testPackStringTooLong(string calldata data, bytes calldata brutalizeWith)
external
brutalizeMemory(brutalizeWith)
{
hevm.assume(bytes(data).length > 31);
hevm.expectRevert(PackedStringLib.UnpackableString.selector);
PackedStringLib.packString(data);
}

function testPackStringTooLong() external {
bytes memory data = new bytes(32);
hevm.expectRevert(PackedStringLib.UnpackableString.selector);
PackedStringLib.packString(string(data));
}

function testUnpackStringAlwaysAllocatesTwoWords(bytes calldata brutalizeWith)
external
brutalizeMemory(brutalizeWith)
{
uint256 freeMemPtr;
assembly {
freeMemPtr := mload(0x40)
}
string memory output = PackedStringLib.unpackString(bytes32(0));
uint256 growth;
assembly {
growth := sub(mload(0x40), freeMemPtr)
}
assertEq(growth, 0x40);
assertEq(bytes(output).length, 0);
}

function testUnpackStringCanCorruptMemory(bytes calldata brutalizeWith) external brutalizeMemory(brutalizeWith) {
// Create a badly encoded packed string advertising a length of 64 bytes
bytes32 invalidPackedString = bytes32(uint256(0x40) << 248);
// Unpack invalid string
string memory output = PackedStringLib.unpackString(invalidPackedString);
// Allocate new dynamic variable - length will overlap second word of string body
bytes memory d = new bytes(32);
// Addresses compiler warnings about unused variables
assertEq(d.length * 2, bytes(output).length);
assertEq(keccak256(bytes(output)), keccak256(abi.encodePacked(uint256(0), uint256(32))));
}

function testPackUnpackString(string calldata data, bytes calldata brutalizeWith)
external
packableString(data)
brutalizeMemory(brutalizeWith)
{
assertEqIncludingPadding(PackedStringLib.unpackString(PackedStringLib.packString(data)), data);
}

function testPackUnpackString(bytes calldata brutalizeWith) external brutalizeMemory(brutalizeWith) {
assertEqIncludingPadding(PackedStringLib.unpackString(bytes32(0)), string(""));
}

function testReturnUnpackedString(string calldata data, bytes calldata brutalizeWith)
external
packableString(data)
{
assertEqIncludingPadding(this.returnUnpackedString(PackedStringLib.packString(data), brutalizeWith), data);
}

function testReturnUnpackedString(bytes calldata brutalizeWith) external {
assertEqIncludingPadding(
this.returnUnpackedString(PackedStringLib.packString("Hello world"), brutalizeWith),
"Hello world"
);
}

/**
* @dev Differential fuzzing for storage and retrieval of:
* - Standard string storage, returned in returndata
* - Packed string, unpacked with returnUnpackedString and returned in returndata
* - Packed string, unpacked with unpackString and returned in returndata
* - Packed string, unpacked with unpackString in same execution context
*/
function testDifferentiallyFuzzStoreRead(string memory stringyFuzzBall, bytes calldata brutalizeWith)
public
brutalizeMemory(brutalizeWith)
{
hevm.assume(bytes(stringyFuzzBall).length < 32);
this.setFuzzyStrings(stringyFuzzBall);
string memory fuzzy0 = PackedStringLib.unpackString(_fuzzyStringPacked);
string memory fuzzy1 = this.fuzzyString1(brutalizeWith);
string memory fuzzy2 = this.fuzzyString2(brutalizeWith);
string memory fuzzy3 = this.fuzzyString3(brutalizeWith);

assertEqIncludingPadding(stringyFuzzBall, fuzzy0);
assertEqIncludingPadding(fuzzy0, fuzzy1);
assertEqIncludingPadding(fuzzy1, fuzzy2);
assertEqIncludingPadding(fuzzy2, fuzzy3);
}

/// @dev Assert that the two strings are identical, including their trailing zeros
function assertEqIncludingPadding(string memory a, string memory b) internal {
bytes32 hashA;
bytes32 hashB;
assembly {
let lenA := mload(a)
let lenB := mload(b)

hashA := keccak256(add(a, 32), and(add(lenA, 31), not(31)))
hashB := keccak256(add(b, 32), and(add(lenB, 31), not(31)))
}
assertEq(hashA, hashB);
}

// Typical string storage
string internal _fuzzyStringStandard;
// Packed string storage
bytes32 internal _fuzzyStringPacked;

function returnUnpackedString(bytes32 packedString, bytes calldata brutalizeWith)
external
view
brutalizeMemory(brutalizeWith)
returns (string memory)
{
PackedStringLib.returnUnpackedString(packedString);
}

// Restrict test to allowed string sizes - test success cases
modifier packableString(string calldata data) {
hevm.assume(bytes(data).length < 32);
_;
}

function fuzzyString1(bytes calldata brutalizeWith)
external
view
brutalizeMemory(brutalizeWith)
returns (string memory)
{
return _fuzzyStringStandard;
}

function fuzzyString2(bytes calldata brutalizeWith)
external
view
brutalizeMemory(brutalizeWith)
returns (string memory)
{
PackedStringLib.returnUnpackedString(_fuzzyStringPacked);
}

function fuzzyString3(bytes calldata brutalizeWith)
external
view
brutalizeMemory(brutalizeWith)
returns (string memory)
{
return PackedStringLib.unpackString(_fuzzyStringPacked);
}

function setFuzzyStrings(string memory stringyFuzzBall) external {
_fuzzyStringStandard = stringyFuzzBall;
_fuzzyStringPacked = PackedStringLib.packString(stringyFuzzBall);
}
}
126 changes: 126 additions & 0 deletions src/utils/PackedStringLib.sol
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// SPDX-License-Identifier: MIT
pragma solidity >=0.8.0;

/// @notice Efficient library for encoding/decoding strings shorter than 32 bytes as one word.
/// @notice Solidity has built-in functionality for storing strings shorter than 32 bytes in
/// a single word, but it must determine at runtime whether to treat each string as one word
/// or several. This introduces a significant amount of bytecode and runtime complexity to
/// any contract storing strings.
/// @notice When it is known in advance that a string will never be longer than 31 bytes,
/// telling the compiler to always treat strings as such can greatly reduce extraneous runtime
/// code that would have never been executed.
/// @notice https://docs.soliditylang.org/en/v0.8.17/types.html#bytes-and-string-as-arrays
/// @author Solmate (https://github.com/transmissions11/solmate/blob/main/src/utils/PackedStringLib.sol)
library PackedStringLib {
error UnpackableString();

/// @dev Pack a 0-31 byte string into a bytes32.
/// @dev Will revert if string exceeds 31 bytes.
function packString(string memory unpackedString) internal pure returns (bytes32 packedString) {
uint256 length = bytes(unpackedString).length;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't you do something like bytes32(bytes(unpackedString))?

The conversion would truncate the string. So you'd probably need the if (length > 31) check if truncation is not good enough.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also would be nice to define a custom type type ShortString is bytes32;

// Verify string length and body will fit into one word
if (length > 31) {
revert UnpackableString();
}
assembly {
// -------------------------------------------------------------------------//
// Layout in memory of input string (less than 32 bytes) //
// Note that "position" is relative to the pointer, not absolute //
// -------------------------------------------------------------------------//
// Bytes | Value | Description //
// -------------------------------------------------------------------------//
// 0:31 | 0 | Empty left-padding for string length //
// | | Not included in output //
// 31:32 | length | Single-byte length between 0 and 31 //
// 32:63 | body / unknown | Right-padded string body if length > 0 //
// | | Unknown if length is zero //
// 63:64 | 0 / unknown | Empty right-padding byte for string if //
// | | length > 0; otherwise, unknown data //
// | | This byte is never included in the output //
// -------------------------------------------------------------------------//

// Read one word starting at the last byte of the length, so that the first
// byte of the packed string will be its length (left-padded) and the
// following 31 bytes will contain the string's body (right-padded).
packedString := mul(
mload(add(unpackedString, 31)),
// If length is zero, the word after length will not be allocated for
// the body and may contain dirty bits. We multiply the packed value by
// length > 0 to ensure the body is null if the length is zero.
iszero(iszero(length))
)
}
}

/// @dev Return the unpacked form of `packedString`.
/// @notice Ends contract execution and returns the string - should only
/// be used in an external function with a string return type.
/// @notice Does not check `packedString` has valid encoding, assumes it was created
/// by `packString`.
function returnUnpackedString(bytes32 packedString) internal pure {
assembly {
// ---------------------------------------------------------------------//
// Unpacked string layout in memory & returndata //
// ---------------------------------------------------------------------//
// Position | Value | Description //
// ---------------------------------------------------------------------//
// 0:32 | 32 | Offset to string length //
// 32:63 | 0 | Empty left-padding for string length //
// 63:64 | String length | Single-byte length of string //
// 64:95 | String body | 0-31 byte right-padded string body //
// 95:96 | 0 | Empty right-padding for string body //
// ---------------------------------------------------------------------//

// Write the offset to the string in the first word of scratch space.
mstore(0x00, 0x20)

// Note: We could shift the returndata right 32 bytes to avoid regions
// that Solidity's normal memory management would contaminate; starting at
// zero and manually clearing the padding bits protects against developer
// error where the developer is manipulating the zero slot and using very
// large numbers in the free memory pointer slot.

// Clear the 0x20 and 0x40 slots to ensure dirty bits do not contaminate
// the left-padding for length or right-padding for body.
mstore(0x20, 0x00)
mstore(0x40, 0x00)

// Write the packed string to memory starting at the last byte of the
// length buffer, writing the length byte to the end of the first word
// and the 0-31 byte body at the start of the second word.
mstore(0x3f, packedString)

// Return (offset, length, body)
return(0x00, 0x60)
}
}

/// @dev Memory-safe string unpacking - updates the free memory pointer to
/// allocate space for the string. Useful for strings which are used within
/// the contract and not simply returned in metadata queries.
/// @notice Does not check `packedString` has valid encoding, assumes it was created
/// by `packString`.
/// Note that supplying an input not generated by this library can result in severe memory
/// corruption. The returned string can have an apparent length of up to 255 bytes and
/// overflow into adjacent memory regions if it is not encoded correctly.
function unpackString(bytes32 packedString) internal pure returns (string memory unpackedString) {
assembly {
// Set pointer for `unpackedString` to free memory pointer.
unpackedString := mload(0x40)
// Clear full buffer - it may contain dirty (unallocated) data.
// Normally this would not matter for the trailing zeroes of the body,
// but developers may assume that strings are padded to full words so
// we maintain that practice here.
mstore(unpackedString, 0)
mstore(add(unpackedString, 0x20), 0)
// Increase free memory pointer by 64 bytes to allocate space for
// the string's length and body - prevents Solidity's memory
// management from overwriting it.
mstore(0x40, add(unpackedString, 0x40))
// Write the packed string to memory starting at the last byte of the
// length buffer. This places the single-byte length at the end of the
// length word and the 0-31 byte body at the start of the body word.
mstore(add(unpackedString, 0x1f), packedString)
}
}
}