Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

An attempt to share scratch slot assignments among subroutines #533

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions examples/application/abi/algobank_approval.teal
Original file line number Diff line number Diff line change
Expand Up @@ -202,23 +202,23 @@ retsub

// withdraw
withdraw_3:
store 8
store 7
store 6
store 5
txn Sender
byte "balance"
txn Sender
byte "balance"
app_local_get
load 7
load 5
-
app_local_put
itxn_begin
int pay
itxn_field TypeEnum
load 8
load 6
txnas Accounts
itxn_field Receiver
load 7
load 5
itxn_field Amount
int 0
itxn_field Fee
Expand Down
19 changes: 11 additions & 8 deletions pyteal/compiler/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def verifyOpsForMode(teal: List[TealComponent], mode: Mode):
def compileSubroutine(
ast: Expr,
options: CompileOptions,
subroutineGraph: Dict[SubroutineDefinition, Set[SubroutineDefinition]],
subroutine_graph: Dict[Optional[SubroutineDefinition], Set[SubroutineDefinition]],
subroutine_start_blocks: Dict[Optional[SubroutineDefinition], TealBlock],
subroutine_end_blocks: Dict[Optional[SubroutineDefinition], TealBlock],
) -> None:
Expand Down Expand Up @@ -202,15 +202,14 @@ def compileSubroutine(
for subroutine in stmt.getSubroutines():
referencedSubroutines.add(subroutine)

if currentSubroutine is not None:
subroutineGraph[currentSubroutine] = referencedSubroutines
subroutine_graph[currentSubroutine] = referencedSubroutines

newSubroutines = referencedSubroutines - subroutine_start_blocks.keys()
for subroutine in sorted(newSubroutines, key=lambda subroutine: subroutine.id):
compileSubroutine(
subroutine.get_declaration(),
options,
subroutineGraph,
subroutine_graph,
subroutine_start_blocks,
subroutine_end_blocks,
)
Expand Down Expand Up @@ -272,11 +271,13 @@ def compileTeal(

options = CompileOptions(mode=mode, version=version, optimize=optimize)

subroutineGraph: Dict[SubroutineDefinition, Set[SubroutineDefinition]] = dict()
subroutine_graph: Dict[
Optional[SubroutineDefinition], Set[SubroutineDefinition]
] = dict()
subroutine_start_blocks: Dict[Optional[SubroutineDefinition], TealBlock] = dict()
subroutine_end_blocks: Dict[Optional[SubroutineDefinition], TealBlock] = dict()
compileSubroutine(
ast, options, subroutineGraph, subroutine_start_blocks, subroutine_end_blocks
ast, options, subroutine_graph, subroutine_start_blocks, subroutine_end_blocks
)

# note: optimizations are off by default, in which case, apply_global_optimizations
Expand All @@ -291,14 +292,16 @@ def compileTeal(
for start in subroutine_start_blocks.values():
apply_global_optimizations(start, options.optimize)

localSlotAssignments = assignScratchSlotsToSubroutines(subroutine_start_blocks)
localSlotAssignments = assignScratchSlotsToSubroutines(
subroutine_start_blocks, subroutine_graph
)

subroutineMapping: Dict[
Optional[SubroutineDefinition], List[TealComponent]
] = sort_subroutine_blocks(subroutine_start_blocks, subroutine_end_blocks)

spillLocalSlotsDuringRecursion(
version, subroutineMapping, subroutineGraph, localSlotAssignments
version, subroutineMapping, subroutine_graph, localSlotAssignments
)

subroutineLabels = resolveSubroutines(subroutineMapping)
Expand Down
193 changes: 181 additions & 12 deletions pyteal/compiler/scratchslots.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pyteal.ir import TealBlock, Op
from pyteal.errors import TealInternalError
from pyteal.config import NUM_SLOTS
from pyteal.compiler.subroutines import find_callstack_exclusive_subroutines


def collect_unoptimized_slots(
Expand Down Expand Up @@ -87,8 +88,103 @@ def collectSlotsFromBlock(block: TealBlock, slots: Set[ScratchSlot]):
return global_slots, local_slots


def combine_subroutine_slot_assignments_greedy_algorithm(
combined_subroutine_groups: list[set[Optional[SubroutineDefinition]]],
subroutine_graph: dict[Optional[SubroutineDefinition], set[SubroutineDefinition]],
) -> None:
"""This is an imperfect greedy algorithm to share scratch slot assignments between callstack
exclusive subroutines.

* It's granularity is at the subroutine level, meaning it decides that two subroutines must share
all of their scratch slot assignments, or none of them.
* It uses the "exclusivity" of a subroutine (i.e. how many other subroutines it's callstack
exclusive with) as a heuristic to combine subroutines.
* Analysis has not been done to prove that this algorithm always terminates (or if its results
are anywhere near optimal).
* WARNING: this algorithm DOES NOT honor user-defined scratch slots. Those slots may be
assigned to a numeric slot which IS NOT what the user specified.

Args:
combined_subroutine_groups: A list of sets of subroutines. Each set indicates subroutines
which will share scratch slot assignments. This is a makeshift union-find data
structure.
subroutine_graph: A graph of subroutines. Each key is a subroutine (the main routine should
not be present), which represents a node in the graph. Each value is a set of all
subroutines that specific subroutine calls, which represent directional edges in the
graph.
"""
callstack_exclusive_subroutines = find_callstack_exclusive_subroutines(
subroutine_graph
)

if len(callstack_exclusive_subroutines) == 0:
return

# choose "most exclusive" (i.e. most compatible) subroutine to start
current_subroutine = max(
callstack_exclusive_subroutines.keys(),
key=lambda s: len(callstack_exclusive_subroutines[s]),
)
while True:
group_index = -1
for i, group in enumerate(combined_subroutine_groups):
if current_subroutine in group:
group_index = i
break

# only look at subroutines we're not already grouped with
new_callstack_exclusive = [
s
for s in callstack_exclusive_subroutines[current_subroutine]
if s not in combined_subroutine_groups[group_index]
]
if len(new_callstack_exclusive) == 0:
# nothing else to do
break

# choose the "most exclusive" subroutine that is exclusive to `current_subroutine`
to_combine = max(
new_callstack_exclusive,
key=lambda s: len(callstack_exclusive_subroutines[s]),
)
# Share scratch slot assignments between `current_subroutine` and `to_combine`.
to_combine_group_index = -1
for i, group in enumerate(combined_subroutine_groups):
if to_combine in group:
to_combine_group_index = i
break
combined_subroutine_groups[group_index] |= combined_subroutine_groups[
to_combine_group_index
]
combined_subroutine_groups.pop(to_combine_group_index)

# BEWARE! Now that we've decided to share scratch slot assignments between the two
# subroutines, this potentially limits the other subroutines that they can share assignments
# with. Specifically, if even if `current_subroutine` is callstack exclusive with another
# subroutine `X`, if `to_combine` is not callstack exclusive with `X`, it's no longer safe
# for `current_subroutine` to share assignments with `X`. We encode this constraint by
# taking the intersection of `current_subroutine` and `to_combine`'s callstack exclusive
# subroutines.
intersection = (
callstack_exclusive_subroutines[current_subroutine]
& callstack_exclusive_subroutines[to_combine]
)
callstack_exclusive_subroutines[current_subroutine] = intersection | {
to_combine
}
callstack_exclusive_subroutines[to_combine] = intersection | {
cast(SubroutineDefinition, current_subroutine)
}

current_subroutine = max(
callstack_exclusive_subroutines.keys(),
key=lambda s: len(callstack_exclusive_subroutines[s]),
)


def assignScratchSlotsToSubroutines(
subroutineBlocks: Dict[Optional[SubroutineDefinition], TealBlock],
subroutine_graph: dict[Optional[SubroutineDefinition], set[SubroutineDefinition]],
) -> Dict[Optional[SubroutineDefinition], Set[int]]:
"""Assign scratch slot values for an entire program.

Expand All @@ -97,6 +193,10 @@ def assignScratchSlotsToSubroutines(
blocks. The key None is taken to mean the main program routine. The values of this
map will be modified in order to assign specific slot values to all referenced scratch
slots.
subroutine_graph: A graph of subroutines. Each key is a subroutine (the main routine should
not be present), which represents a node in the graph. Each value is a set of all
subroutines that specific subroutine calls, which represent directional edges in the
graph.

Raises:
TealInternalError: if the scratch slots referenced by the program do not fit into 256 slots,
Expand All @@ -113,7 +213,54 @@ def assignScratchSlotsToSubroutines(
*local_slots.values()
)

slotAssignments: Dict[ScratchSlot, int] = dict()
# combined_subroutine_groups is a makeshift union-find data structure which identifies which
# subroutines will share scratch slot assignments.
# TODO: replace this with an actual union-find data structure
# TODO: it may make more sense to decide whether two subroutines can share an assignment on a
# slot-by-slot basis, instead of grouping all a subroutine's slots together.
combined_subroutine_groups: list[set[Optional[SubroutineDefinition]]] = [
{s} for s in subroutine_graph.keys()
]

# TODO: implement a way to opt into this optimization -- don't always run it
combine_subroutine_slot_assignments_greedy_algorithm(
combined_subroutine_groups, subroutine_graph
)

# the "spokesperson" for a group is the subroutine with the largest number of local slots
combined_subroutine_groups_spokesperson: list[Optional[SubroutineDefinition]] = []
# all other subroutines in the group will have their local slots mapped to their spokesperson's
local_slot_mappings_to_spokesperson: list[dict[ScratchSlot, ScratchSlot]] = []
for group in combined_subroutine_groups:
spokesperson = max(group, key=lambda s: len(local_slots[s]))
spokesperson_local_slots = list(local_slots[spokesperson])
local_slot_mappings = {slot: slot for slot in spokesperson_local_slots}

for subroutine in group:
if subroutine is spokesperson:
continue
for i, slot in enumerate(local_slots[subroutine]):
local_slot_mappings[slot] = spokesperson_local_slots[i]

combined_subroutine_groups_spokesperson.append(spokesperson)
local_slot_mappings_to_spokesperson.append(local_slot_mappings)

slots_to_assign: set[ScratchSlot] = global_slots | cast(
set[ScratchSlot], set()
).union(
*[
local_slots[spokesperson]
for spokesperson in combined_subroutine_groups_spokesperson
]
)

if len(slots_to_assign) > NUM_SLOTS:
raise TealInternalError(
"Too many slots in use: {}, maximum is {}".format(
len(slots_to_assign), NUM_SLOTS
)
)

slotIds: Set[int] = set()

for slot in allSlots:
Expand All @@ -127,12 +274,15 @@ def assignScratchSlotsToSubroutines(
)
slotIds.add(slot.id)

if len(allSlots) > NUM_SLOTS:
# TODO: identify which slots can be reused
# subroutines which never invoke each other can use the same slot ID for local slots
raise TealInternalError(
"Too many slots in use: {}, maximum is {}".format(len(allSlots), NUM_SLOTS)
)
# Run the above check on all slots (before subroutine combination optimization), but clear it out
# and populate slotIds again. We only do this because the optimization algorithm above doesn't
# honor user-defined slot IDs.
slotIds.clear()

for slot in slots_to_assign:
if not slot.isReservedSlot:
continue
slotIds.add(slot.id)

# verify that all local slots are assigned to before being loaded.
# TODO: for simplicity, the current implementation does not perform this check with global slots
Expand All @@ -145,8 +295,9 @@ def assignScratchSlotsToSubroutines(
)
raise TealInternalError(msg) from errors[0]

slotAssignments: Dict[ScratchSlot, int] = dict()
nextSlotIndex = 0
for slot in sorted(allSlots, key=lambda slot: slot.id):
for slot in sorted(slots_to_assign, key=lambda slot: slot.id):
# Find next vacant slot that compiler can assign to
while nextSlotIndex in slotIds:
nextSlotIndex += 1
Expand All @@ -158,14 +309,32 @@ def assignScratchSlotsToSubroutines(
slotAssignments[slot] = nextSlotIndex
slotIds.add(nextSlotIndex)

for start in subroutineBlocks.values():
for subroutine, start in subroutineBlocks.items():
group_index = -1
for i, group in enumerate(combined_subroutine_groups):
if subroutine in group:
group_index = i
break
assert group_index != -1

slot_mapping = local_slot_mappings_to_spokesperson[group_index]

for block in TealBlock.Iterate(start):
for op in block.ops:
for slot in op.getSlots():
op.assignSlot(slot, slotAssignments[slot])
if slot in slot_mapping:
# a local slot
op.assignSlot(slot, slotAssignments[slot_mapping[slot]])
else:
# a global slot
op.assignSlot(slot, slotAssignments[slot])

assignedLocalSlots: Dict[Optional[SubroutineDefinition], Set[int]] = dict()
for subroutine, slots in local_slots.items():
assignedLocalSlots[subroutine] = set(slotAssignments[slot] for slot in slots)
for i, group in enumerate(combined_subroutine_groups):
slot_mapping = local_slot_mappings_to_spokesperson[i]
for subroutine in group:
assignedLocalSlots[subroutine] = set(
slotAssignments[slot_mapping[slot]] for slot in local_slots[subroutine]
)

return assignedLocalSlots
Loading