Skip to content

Commit

Permalink
fix: DIA-1534: JSON values interpreted as format spec (#243)
Browse files Browse the repository at this point in the history
  • Loading branch information
matt-bernstein authored Nov 4, 2024
1 parent b33a87f commit 882ca68
Show file tree
Hide file tree
Showing 4 changed files with 227 additions and 60 deletions.
59 changes: 59 additions & 0 deletions adala/utils/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,65 @@ def format_field(self, value, format_spec):
if value.startswith("{") and value.endswith("}"):
return value[:-1] + ":" + format_spec + "}"

def _vformat(self, format_string, args, kwargs, used_args, recursion_depth,
auto_arg_index=0):
# copied verbatim from parent class except for the # HACK
if recursion_depth < 0:
raise ValueError('Max string recursion exceeded')
result = []
for literal_text, field_name, format_spec, conversion in \
self.parse(format_string):

# output the literal text
if literal_text:
result.append(literal_text)

# if there's a field, output it
if field_name is not None:
# this is some markup, find the object and do
# the formatting

# handle arg indexing when empty field_names are given.
if field_name == '':
if auto_arg_index is False:
raise ValueError('cannot switch from manual field '
'specification to automatic field '
'numbering')
field_name = str(auto_arg_index)
auto_arg_index += 1
elif field_name.isdigit():
if auto_arg_index:
raise ValueError('cannot switch from manual field '
'specification to automatic field '
'numbering')
# disable auto arg incrementing, if it gets
# used later on, then an exception will be raised
auto_arg_index = False

# given the field_name, find the object it references
# and the argument it came from
obj, arg_used = self.get_field(field_name, args, kwargs)
used_args.add(arg_used)

# do any conversion on the resulting object
obj = self.convert_field(obj, conversion)

# expand the format spec, if needed
format_spec, auto_arg_index = self._vformat(
format_spec, args, kwargs,
used_args, recursion_depth-1,
auto_arg_index=auto_arg_index)

# format the object and append to the result
# HACK: if the format_spec is invalid, assume this field_name was not meant to be a variable, and don't substitute anything
formatted_field = self.format_field(obj, format_spec)
if formatted_field is None:
result.append('{' + ':'.join([field_name, format_spec]) + '}')
else:
result.append(formatted_field)

return ''.join(result), auto_arg_index


PartialStringFormat = PartialStringFormatter()

Expand Down
118 changes: 59 additions & 59 deletions tests/cassettes/test_llm/test_llm_sync.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,34 +38,34 @@ interactions:
response:
body:
string: !!binary |
H4sIAAAAAAAAA2xRwW7UMBS85ysevnBJUJKulHQvVRFSqVQuLKIghCKv43Xc2n7GfqGsqpX4DX6P
L0HOpruL2oslz3jGM+89ZgBM92wJTAychPWmuLz5+enLStHHSl3hsP284urD6mv59uzy9t0PlicF
ru+koCfVG4HWG0ka3Z4WQXKSybVq6vOmPa/aZiIs9tIkmfJULLCw2umiLutFUTZF1c7qAbWQkS3h
WwYA8DidKafr5S+2hDJ/QqyMkSvJlodHACygSQjjMepI3BHLj6RAR9JN0a9fW+hROwUP0pgcaODu
HrY4voL3+AB8jSOl6wXcDpz+/v4TAV0CAljteiDs+fbi1DzIzRh5KuhGY2Z8d0hrUPmA6zjzB3yj
nY5DFySP6FKySOjZxO4ygO/TVMb/ijIf0HrqCO+lS4bVYm/Hjrs4IduZJCRujvhZnb/g1vWSuDbx
ZKpMcDHI/qgss5Nqz/98yWJfTzv1zCWbnVjcRpK222inZPBB7/e08d2irUVd82YtWLbL/gEAAP//
AwCZ/QJBtQIAAA==
H4sIAAAAAAAAA2xRwWrcMBS8+ytedcnFLrbX4LCXUAqlySmXkNBSjFaSbTWSnpCeSUxY6G/09/ol
RV5nd0NyEWhGM5p57yUDYFqyLTAxchLWm+LLLTU3Y9X6+WG69fM83D3fieFb9ePrjfYsTwrc/VaC
XlWfBVpvFGl0B1oExUkl16rdlO1mUzftQliUyiTZ4KlosLDa6aIu66Yo26K6XNUjaqEi28LPDADg
ZTlTTifVM9tCmb8iVsXIB8W2x0cALKBJCOMx6kjcEctPpEBHyi3Rry8sSNRugCdlTA40cvcIM06f
4Ds+Ad/hROl6Bfcjp39//kZAl4AAVjsJhJLPV+fmQfVT5Kmgm4xZ8f0xrcHBB9zFlT/ivXY6jl1Q
PKJLySKhZwu7zwB+LVOZ3hRlPqD11BE+KpcMq+Zgx067OCMvV5KQuDnhmzr/wK2Tirg28WyqTHAx
KnlSltlZtfd/fmRxqKfd8M4lW51YnCMp2/XaDSr4oA976n1X7ngpq7rpK5bts/8AAAD//wMA63ls
OrUCAAA=
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8d7b84f06dcf22e4-ORD
- 8dd58d46fed642d1-EWR
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Thu, 24 Oct 2024 16:59:47 GMT
- Mon, 04 Nov 2024 15:14:07 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=30VOoxxFFnFIfoszGGPiK.d0YRhibLz9HMGU6Ov8uc4-1729789187-1.0.1.1-q3_tlPXfGO1erGWkfmCYDDw3XraJeWyheNxzdM7_vacleVv.0RlXh7dRiAWb2nDk5x0bpul5cT3GeJJgiIxm_w;
path=/; expires=Thu, 24-Oct-24 17:29:47 GMT; domain=.api.openai.com; HttpOnly;
- __cf_bm=KAd6a3pcZOhTOwgMUKrL9KYbhJMwtNkCrQ9Y..zdfjk-1730733247-1.0.1.1-Fr7.8fVaOBnPgS98bQMFzW6icIK2LXLAm7EubilXy0lm656o7_Qz1IsH_JafYYdByzJGrS2ATNOTvgCdLPFGEQ;
path=/; expires=Mon, 04-Nov-24 15:44:07 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=qrNT2dcRQYPgyDlvmFllJr47OO2xkifOl_y00DLjQwE-1729789187545-0.0.1.1-604800000;
- _cfuvid=l7iL4O7hW2C3VXg3EHHmHGMfG6h9GaDpM3R43nhxJAw-1730733247411-0.0.1.1-604800000;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
Transfer-Encoding:
- chunked
Expand All @@ -78,7 +78,7 @@ interactions:
openai-organization:
- heartex
openai-processing-ms:
- '499'
- '444'
openai-version:
- '2020-10-01'
strict-transport-security:
Expand All @@ -96,7 +96,7 @@ interactions:
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_09488789123c9033339676c9eb78b8cf
- req_fe0ad8004ac9aff9cbb93995b412bcd9
status:
code: 200
message: OK
Expand All @@ -116,8 +116,8 @@ interactions:
content-type:
- application/json
cookie:
- __cf_bm=30VOoxxFFnFIfoszGGPiK.d0YRhibLz9HMGU6Ov8uc4-1729789187-1.0.1.1-q3_tlPXfGO1erGWkfmCYDDw3XraJeWyheNxzdM7_vacleVv.0RlXh7dRiAWb2nDk5x0bpul5cT3GeJJgiIxm_w;
_cfuvid=qrNT2dcRQYPgyDlvmFllJr47OO2xkifOl_y00DLjQwE-1729789187545-0.0.1.1-604800000
- __cf_bm=KAd6a3pcZOhTOwgMUKrL9KYbhJMwtNkCrQ9Y..zdfjk-1730733247-1.0.1.1-Fr7.8fVaOBnPgS98bQMFzW6icIK2LXLAm7EubilXy0lm656o7_Qz1IsH_JafYYdByzJGrS2ATNOTvgCdLPFGEQ;
_cfuvid=l7iL4O7hW2C3VXg3EHHmHGMfG6h9GaDpM3R43nhxJAw-1730733247411-0.0.1.1-604800000
host:
- api.openai.com
user-agent:
Expand All @@ -143,26 +143,26 @@ interactions:
response:
body:
string: !!binary |
H4sIAAAAAAAAA2yQUU+DMBSF3/kVtc9ggG2B8aYxGqMxe3CZiTGklAvUlbZpOzOz7L+bAhuY7aUP
5+s599x78BDCrMQZwrQhlraKB3evP+8fL296Vdnt82z9sXlgK0r3m6J4Wj9i3zlk8Q3Unly3VLaK
g2VS9JhqIBZcapTEyyRdRmnSgVaWwJ2tVjaYy6BlggVxGM+DMAmidHA3klEwOEOfHkIIHbrX9RQl
7HGGQv+ktGAMqQFn508IYS25UzAxhhlLhMX+CKkUFkRX/Z4IIsjNlGqodoa4hmLH+aAfz+O4rJWW
hRn4Wa+YYKbJNRAjhYs2Virc0aOH0Fe31u5fU6y0bJXNrdyCcIHRoo/D4zFHOBuYlZbwiSf1r4Tl
JVjCuJlcBVNCGyhHZ+hNNrsceS2i346J+iLFG5Kw+TUW2rxiogatNOvvXKm8WixJGs3jpMLe0fsD
AAD//wMAKtDZunUCAAA=
H4sIAAAAAAAAA2yQS0/DMBCE7/kVxucE5VWlzQ0kHuJEhYRACEWus0kMju3aWwRC/e8ojzZB7cWH
+Tyzs/vrEUJFSXNCecOQt0YGV4+YPtj1KzzdrF/Wq9t669DI5/uv8m4bUb9z6M0HcDy4LrlujQQU
Wg2YW2AIXWqUJWGWJHGa9aDVJcjOVhsMUh20QokgDuM0CLMgWo7uRgsOjubkzSOEkN/+7XqqEr5p
TkL/oLTgHKuB5sdPhFCrZadQ5pxwyBRSf4JcKwTVV79miil2MacWqp1jXUO1k3LU98dxUtfG6o0b
+VGvhBKuKSwwp1UX7VAb2tO9R8h7v9buX1NqrG4NFqg/QXWB0WKIo9MxJ5iMDDUyOfMs/TNhRQnI
hHSzq1DOeAPl5Ay92WanI89FDNsJVZ+keGMSdT8OoS0qoWqwxorhzpUpqsWKLaM0zirq7b0/AAAA
//8DAP9tac11AgAA
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8d7b84f68e9022e4-ORD
- 8dd58d4ceec242d1-EWR
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Thu, 24 Oct 2024 16:59:47 GMT
- Mon, 04 Nov 2024 15:14:07 GMT
Server:
- cloudflare
Transfer-Encoding:
Expand All @@ -176,7 +176,7 @@ interactions:
openai-organization:
- heartex
openai-processing-ms:
- '267'
- '271'
openai-version:
- '2020-10-01'
strict-transport-security:
Expand All @@ -194,20 +194,20 @@ interactions:
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_1ae0e296b425ec3a935b14fd8741b417
- req_c6a6c7507d645e9dc98d054751ed844b
status:
code: 200
message: OK
- request:
body: '{"messages": [{"role": "user", "content": "My name is Carla and I am 25
years old with {brackets:.2f} and {brackets2:invalid_format_spec}."}], "model":
"gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": 0.0, "tool_choice":
{"type": "function", "function": {"name": "Output"}}, "tools": [{"type": "function",
"function": {"name": "Output", "description": "Correctly extracted `Output`
with all the required parameters with correct types", "parameters": {"properties":
{"name": {"description": "name:", "title": "Name", "type": "string"}, "age":
{"description": "age:", "title": "Age", "type": "string"}}, "required": ["age",
"name"], "type": "object"}}}]}'
years old with {brackets:.2f} and {brackets2:invalid_format_spec} and {input_name:invalid_format_spec}."}],
"model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": 0.0,
"tool_choice": {"type": "function", "function": {"name": "Output"}}, "tools":
[{"type": "function", "function": {"name": "Output", "description": "Correctly
extracted `Output` with all the required parameters with correct types", "parameters":
{"properties": {"name": {"description": "name:", "title": "Name", "type": "string"},
"age": {"description": "age:", "title": "Age", "type": "string"}}, "required":
["age", "name"], "type": "object"}}}]}'
headers:
accept:
- application/json
Expand All @@ -216,12 +216,12 @@ interactions:
connection:
- keep-alive
content-length:
- '665'
- '702'
content-type:
- application/json
cookie:
- __cf_bm=30VOoxxFFnFIfoszGGPiK.d0YRhibLz9HMGU6Ov8uc4-1729789187-1.0.1.1-q3_tlPXfGO1erGWkfmCYDDw3XraJeWyheNxzdM7_vacleVv.0RlXh7dRiAWb2nDk5x0bpul5cT3GeJJgiIxm_w;
_cfuvid=qrNT2dcRQYPgyDlvmFllJr47OO2xkifOl_y00DLjQwE-1729789187545-0.0.1.1-604800000
- __cf_bm=KAd6a3pcZOhTOwgMUKrL9KYbhJMwtNkCrQ9Y..zdfjk-1730733247-1.0.1.1-Fr7.8fVaOBnPgS98bQMFzW6icIK2LXLAm7EubilXy0lm656o7_Qz1IsH_JafYYdByzJGrS2ATNOTvgCdLPFGEQ;
_cfuvid=l7iL4O7hW2C3VXg3EHHmHGMfG6h9GaDpM3R43nhxJAw-1730733247411-0.0.1.1-604800000
host:
- api.openai.com
user-agent:
Expand All @@ -247,28 +247,28 @@ interactions:
response:
body:
string: !!binary |
H4sIAAAAAAAAA2xSW2+bMBh951dY33OogLQN4a1qunZqtKnrNPXChBxjiDvfZJtuSZT/PnEJkLQ8
IOscn3O+i3ceQsBySBCQNXZEaO5fLd9/Pn99iSu9fLmdFuZmeZF/uzG/7qr3pyeY1Aq1eqPEHVRn
RAnNqWNKtjQxFDtau4azaD6L52EcN4RQOeW1rNTOP1e+YJL5URCd+8HMD+NOvVaMUAsJevUQQmjX
/Os6ZU7/QYKCyQER1FpcUkj6SwiBUbxGAFvLrMPSwWQgiZKOyrp0WXE+IpxSPCOY8yG4/Xaj8zAs
zHn2+Pyw/Ztf395vv2wXC7G4n//gD5vH6Sivtd7opqCikqQf0ojv8eQkDCGQWDTa75XTlTtRIgTY
lJWg0tVVwy5t7qeQpHCNDccpTFLAZYtEFyns4chg7312/j0ai6FFZTHv5tXh+34BXJXaqJU9mScU
TDK7zgzFtukLrFO6za5zmgSojnYH2iihXebUHyprwzCYtn4wvK8x25FOOcxHeHhQHfllOXWYNcvt
3xPBZE3zQRp4o+Y+hn5m0TbIZPnBxeucwG6soyIrmCyp0YY1jw8KncWrglziWU4i8PbefwAAAP//
AwBCQ8sMigMAAA==
H4sIAAAAAAAAA2xSTY+bMBC98yusOYcKCFVW3FY5tNJW6fbj0pYKOWYA7xrbaw/qRlH+e8VHAsku
B2S95/fezHiOAWMgS8gYiIaTaK0K7x8pffj8a9eIVDx2Lwfx74Hv/NZ++vF9u4NVrzD7JxR0Vn0Q
prUKSRo90sIhJ+xd48062qzXSXo3EK0pUfWy2lKYmrCVWoZJlKRhtAnju0ndGCnQQ8b+BIwxdhz+
fZ26xFfIWLQ6Iy16z2uE7HKJMXBG9Qhw76UnrglWMymMJtR96bpTakGQMaoQXKk5ePyOi/M8LK5U
Qb+5Rmya+uX+5xdKvsnExq/OVYu80fpgh4KqTovLkBb8Bc9uwhgDzdtB+7Uj29GNkjHgru5a1NRX
Dcd8uJ9DlsOWO8VzWOXA6xFJPuZwgiuDU/De+e9iLA6rznM1zWvCT5cHUKa2zuz9zTyhklr6pnDI
/dAXeDJ2zO5zhgTort4OrDOtpYLMM+reMI6T0Q/m/Vqw0xYAGeJqgSdn1ZVfUSJxOTzuZZ8EFw2W
szQKFs29DX3PYmxQ6vqNSzA5gT94wraopK7RWSeH5YPKFtGeR2WcpFUMwSn4DwAA//8DAOfuJ+KK
AwAA
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8d7b84f91a0522e4-ORD
- 8dd58d4f7ad142d1-EWR
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Thu, 24 Oct 2024 16:59:48 GMT
- Mon, 04 Nov 2024 15:14:08 GMT
Server:
- cloudflare
Transfer-Encoding:
Expand All @@ -282,7 +282,7 @@ interactions:
openai-organization:
- heartex
openai-processing-ms:
- '437'
- '359'
openai-version:
- '2020-10-01'
strict-transport-security:
Expand All @@ -294,13 +294,13 @@ interactions:
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149998974'
- '149998966'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_a0da2b8a7a016f7b72a260a873afb4cf
- req_a2dc0c0d4aede5c36555ef07c9ef3b6c
status:
code: 200
message: OK
Expand Down Expand Up @@ -356,22 +356,22 @@ interactions:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8d7b84fd785061fd-ORD
- 8dd58d530fd942ad-EWR
Connection:
- keep-alive
Content-Length:
- '262'
Content-Type:
- application/json; charset=utf-8
Date:
- Thu, 24 Oct 2024 16:59:48 GMT
- Mon, 04 Nov 2024 15:14:08 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=mWIPSP1jcAelqfb6KeGySlcGW6Zg.GqmDVPrbybrWl8-1729789188-1.0.1.1-NWztZ1HWnHc8cnYSOjCe74TC2Ma8QOikHAa0oWNgCn9XLiXBvQDySrQuO8b7g1twH0SH4o6JfwRgFNm8ouy3mQ;
path=/; expires=Thu, 24-Oct-24 17:29:48 GMT; domain=.api.openai.com; HttpOnly;
- __cf_bm=8BjE6koy0LToKFcs7pIgucndkUARFhNCjRzEsiXa3ck-1730733248-1.0.1.1-9DWjfxx66ZyFHpi_hBBjuY78ucZbEu2iSLQI5glryAJruhEaySb8n_lZ59XB8_Q4BpIoEsf2hjt7WuQUp9.Djg;
path=/; expires=Mon, 04-Nov-24 15:44:08 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=XVXbpEDTsosPnlpUpurpjN1wiejqi.IZXZCK0FJxpv8-1729789188820-0.0.1.1-604800000;
- _cfuvid=RxFASrWZo4AWBPRerc.fWI0YYOSk0r6.se0J1HjhSoc-1730733248645-0.0.1.1-604800000;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
X-Content-Type-Options:
- nosniff
Expand All @@ -382,7 +382,7 @@ interactions:
vary:
- Origin
x-request-id:
- req_2e39a553f5c2c365b6da3d8f2425ed2f
- req_62fdbb8af445ec1efec814a8c0e216c9
status:
code: 401
message: Unauthorized
Expand Down
2 changes: 1 addition & 1 deletion tests/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class Output(BaseModel):

result = runtime.record_to_record(
record={"input_name": "Carla", "input_age": 25},
input_template="My name is {input_name} and I am {input_age:02d} years old with {brackets:.2f} and {brackets2:invalid_format_spec}.",
input_template="My name is {input_name} and I am {input_age:02d} years old with {brackets:.2f} and {brackets2:invalid_format_spec} and {input_name:invalid_format_spec}.",
instructions_template="",
response_model=Output,
)
Expand Down
Loading

0 comments on commit 882ca68

Please sign in to comment.