Skip to content

Commit

Permalink
Fix formula, add flow diagram
Browse files Browse the repository at this point in the history
  • Loading branch information
arturl committed Dec 27, 2024
1 parent 1df9502 commit b097b50
Show file tree
Hide file tree
Showing 3 changed files with 285 additions and 21 deletions.
Binary file added content/blog/codegen-learnings/flow.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
267 changes: 267 additions & 0 deletions content/blog/codegen-learnings/flow.tldr
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
{
"tldrawFileFormatVersion": 1,
"schema": {
"schemaVersion": 2,
"sequences": {
"com.tldraw.store": 4,
"com.tldraw.asset": 1,
"com.tldraw.camera": 1,
"com.tldraw.document": 2,
"com.tldraw.instance": 25,
"com.tldraw.instance_page_state": 5,
"com.tldraw.page": 1,
"com.tldraw.instance_presence": 6,
"com.tldraw.pointer": 1,
"com.tldraw.shape": 4,
"com.tldraw.asset.bookmark": 2,
"com.tldraw.asset.image": 5,
"com.tldraw.asset.video": 5,
"com.tldraw.shape.group": 0,
"com.tldraw.shape.text": 2,
"com.tldraw.shape.bookmark": 2,
"com.tldraw.shape.draw": 2,
"com.tldraw.shape.geo": 9,
"com.tldraw.shape.note": 8,
"com.tldraw.shape.line": 5,
"com.tldraw.shape.frame": 0,
"com.tldraw.shape.arrow": 5,
"com.tldraw.shape.highlight": 1,
"com.tldraw.shape.embed": 4,
"com.tldraw.shape.image": 4,
"com.tldraw.shape.video": 2,
"com.tldraw.binding.arrow": 0
}
},
"records": [
{
"gridSize": 10,
"name": "",
"meta": {},
"id": "document:document",
"typeName": "document"
},
{
"meta": {},
"id": "page:page",
"name": "Page 1",
"index": "a1",
"typeName": "page"
},
{
"id": "pointer:pointer",
"typeName": "pointer",
"x": 357.43359375,
"y": 132.7734375,
"lastActivityTimestamp": 1735267617634,
"meta": {}
},
{
"followingUserId": null,
"opacityForNextShape": 1,
"stylesForNextShape": {
"tldraw:size": "s",
"tldraw:font": "sans",
"tldraw:geo": "arrow-right"
},
"brush": null,
"scribbles": [],
"cursor": {
"type": "cross",
"rotation": 0
},
"isFocusMode": false,
"exportBackground": true,
"isDebugMode": false,
"isToolLocked": false,
"screenBounds": {
"x": 0,
"y": 0,
"w": 1128,
"h": 978
},
"insets": [
false,
false,
true,
false
],
"zoomBrush": null,
"isGridMode": false,
"isPenMode": false,
"chatMessage": "",
"isChatting": false,
"highlightedUserIds": [],
"isFocused": true,
"devicePixelRatio": 2,
"isCoarsePointer": false,
"isHoveringCanvas": true,
"openMenus": [],
"isChangingStyle": false,
"isReadonly": false,
"meta": {},
"duplicateProps": null,
"id": "instance:instance",
"currentPageId": "page:page",
"typeName": "instance"
},
{
"editingShapeId": null,
"croppingShapeId": null,
"selectedShapeIds": [
"shape:0vPmbhCIhdgPr7gYnOxm9"
],
"hoveredShapeId": "shape:0vPmbhCIhdgPr7gYnOxm9",
"erasingShapeIds": [],
"hintingShapeIds": [],
"focusedGroupId": null,
"meta": {},
"id": "instance_page_state:page:page",
"pageId": "page:page",
"typeName": "instance_page_state"
},
{
"x": 49.0078125,
"y": 97.7109375,
"z": 1,
"meta": {},
"id": "camera:page:page",
"typeName": "camera"
},
{
"x": 40.28125,
"y": 79.8203125,
"rotation": 0,
"isLocked": false,
"opacity": 1,
"meta": {},
"id": "shape:BJfJwAyLKVoUOGaixU2Xf",
"type": "text",
"props": {
"color": "black",
"size": "s",
"w": 252.359375,
"text": "User query:\n\n\"Generate code for S3 Bucket\"",
"font": "sans",
"textAlign": "start",
"autoSize": true,
"scale": 1
},
"parentId": "page:page",
"index": "a1",
"typeName": "shape"
},
{
"x": 374.58203125,
"y": 54.3125,
"rotation": 0,
"isLocked": false,
"opacity": 1,
"meta": {},
"id": "shape:z2gtvIBdf6_eCX2gzQ2L1",
"type": "text",
"props": {
"color": "black",
"size": "s",
"w": 310.2265625,
"text": "Search terms:\n\n\"AWS S3 bucket\",\n\"Pulumi AWS S3\",\n\"create S3 bucket Pulumi TypeScript\"",
"font": "sans",
"textAlign": "start",
"autoSize": true,
"scale": 1
},
"parentId": "page:page",
"index": "a22v7",
"typeName": "shape"
},
{
"x": 25.6796875,
"y": 58.140625,
"rotation": 0,
"isLocked": false,
"opacity": 1,
"meta": {},
"id": "shape:PGJKCoOonmat14xrifIp0",
"type": "geo",
"props": {
"w": 286.8671875,
"h": 132.9296875,
"geo": "rectangle",
"color": "black",
"labelColor": "black",
"fill": "none",
"dash": "draw",
"size": "s",
"font": "sans",
"text": "",
"align": "middle",
"verticalAlign": "middle",
"growY": 0,
"url": "",
"scale": 1
},
"parentId": "page:page",
"index": "a39Cr",
"typeName": "shape"
},
{
"x": 362.58203125,
"y": 35.85546875000003,
"rotation": 0,
"isLocked": false,
"opacity": 1,
"meta": {},
"id": "shape:hrkCX2QL0NyXDz_xl3n_i",
"type": "geo",
"props": {
"w": 342.9609375,
"h": 172.41015624999997,
"geo": "rectangle",
"color": "black",
"labelColor": "black",
"fill": "none",
"dash": "draw",
"size": "s",
"font": "sans",
"text": "",
"align": "middle",
"verticalAlign": "middle",
"growY": 0,
"url": "",
"scale": 1
},
"parentId": "page:page",
"index": "a40nT",
"typeName": "shape"
},
{
"x": 322.2734375,
"y": 100.125,
"rotation": 0,
"isLocked": false,
"opacity": 1,
"meta": {},
"id": "shape:0vPmbhCIhdgPr7gYnOxm9",
"type": "geo",
"props": {
"w": 35.16015625,
"h": 32.6484375,
"geo": "arrow-right",
"color": "black",
"labelColor": "black",
"fill": "none",
"dash": "draw",
"size": "s",
"font": "sans",
"text": "",
"align": "middle",
"verticalAlign": "middle",
"growY": 0,
"url": "",
"scale": 1
},
"parentId": "page:page",
"index": "a55Ml",
"typeName": "shape"
}
]
}
39 changes: 18 additions & 21 deletions content/blog/codegen-learnings/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,32 +82,16 @@ Because you were looking for the word "pie", you also retrieved a recipe for a S

Now let's formalize this a bit. Recall measures the ratio of the relevant documents retrieved to the total number of relevant docuemtns in RAG:

TODO

old:

$$Recall = \frac{N(Retrieved\_documents \cap Relevant\_documents)}{N(Relevant\_documents)}$$

fixed1:

$$Recall = \frac{N(\text{Retrieved\_documents} \cap \text{Relevant\_documents})}{N(\text{Relevant\_documents})}$$

fixed2:

$$Recall = \frac{N(Retrieved\text{\_}documents \cap Relevant\text{\_}documents)}{N(Relevant\text{\_}documents)}$$

alternative

$$Recall = \frac{N(\text{RetrievedDocuments} \cap \text{RelevantDocuments})}{N(\text{RelevantDocuments})}$$
$$Recall = \frac{N(Retrieved \cap Relevant)}{N(Relevant)}$$

Where

- $N(Retrieved\_documents \cap Relevant\_documents)$ is the number of documents that are both retrieved and relevant.
- $N(Relevant\_documents)$ is the total number of relevant documents in the database.
- $N(Retrieved \cap Relevant)$ is the number of documents that are both retrieved and relevant.
- $N(Relevant)$ is the total number of relevant documents in the database.

Good recall means that many documents relevant to the query were retrieved.

$$Precision = \frac{N(Retrieved\_documents \cap Relevant\_documents)}{N(Retrieved\_documents)}$$
$$Precision = \frac{N(Retrieved \cap Relevant)}{N(Retrieved)}$$

Where $N(Retrieved\_documents)$ is the total number of documents that were retrieved.

Expand All @@ -122,12 +106,25 @@ Precision and recall are essential in understanding the information retrieval qu
Fortunately, other metrics that often can effectively estimate retrieval quality have been developed. We have found a metric that can predict, with some degree of accuracy, whether the generated code will successfully compile. For this metric, we compare the _tokens_ present in the prompted produced by the LLM with the number of tokens present in the actually generated code. (By token here we understand a compiler token - an identifier such as the name of a class, method or a field and not a traditional LLM token concept),
Intuitively, if a token present in the prompt also appears in the generated program, we can assume that the token effectively contributed to the generated program. Tokens in the generated program that were not part of the prompt are not necessarily wrong but they are less trusted (they can come from the LLM built-in knowledge or were, ahem, hallucinated)

$$prompt \ coverage = \frac{N(Tokens\_in\_prompt \cap Tokens\_in\_code)}{N(Tokens\_in\_code)} $$
1:
$$prompt \ coverage = \frac{N(Tokens\_in\_prompt \cap Tokens\_in\_code)}{N(Tokens\_in\_code)}$$

2:
$$prompt \ coverage = \frac{N(\text{Tokens\_in\_prompt} \cap \text{Tokens\_in\_code})}{N(\text{Tokens\_in\_code})}$$

<!-- Note: our documents call is Recall, which is not how industry uses this term (see above) -->

Prompt coverage is a metric we can observe in production, and it's one of several metrics we use when updating providers to ensure we haven't regressed the quality of the RAG.

<span style="width: 50%; float: left; margin-left: 20px;">
<span style="text-align:center">
<img src="flow.png" alt="">
<figcaption>
<i>Flow of blah</i>
</figcaption>
</span>
</span>

<!--raw material
1.1. "generate code for S3 bucket" -> get search terms:
Expand Down

0 comments on commit b097b50

Please sign in to comment.