Fix formula, add flow diagram

pulumi · Dec 27, 2024 · b097b50 · b097b50
1 parent 1df9502
commit b097b50
Show file tree

Hide file tree

Showing 3 changed files with 285 additions and 21 deletions.
diff --git a/content/blog/codegen-learnings/flow.png b/content/blog/codegen-learnings/flow.png
diff --git a/content/blog/codegen-learnings/flow.tldr b/content/blog/codegen-learnings/flow.tldr
@@ -0,0 +1,267 @@
+{
+	"tldrawFileFormatVersion": 1,
+	"schema": {
+		"schemaVersion": 2,
+		"sequences": {
+			"com.tldraw.store": 4,
+			"com.tldraw.asset": 1,
+			"com.tldraw.camera": 1,
+			"com.tldraw.document": 2,
+			"com.tldraw.instance": 25,
+			"com.tldraw.instance_page_state": 5,
+			"com.tldraw.page": 1,
+			"com.tldraw.instance_presence": 6,
+			"com.tldraw.pointer": 1,
+			"com.tldraw.shape": 4,
+			"com.tldraw.asset.bookmark": 2,
+			"com.tldraw.asset.image": 5,
+			"com.tldraw.asset.video": 5,
+			"com.tldraw.shape.group": 0,
+			"com.tldraw.shape.text": 2,
+			"com.tldraw.shape.bookmark": 2,
+			"com.tldraw.shape.draw": 2,
+			"com.tldraw.shape.geo": 9,
+			"com.tldraw.shape.note": 8,
+			"com.tldraw.shape.line": 5,
+			"com.tldraw.shape.frame": 0,
+			"com.tldraw.shape.arrow": 5,
+			"com.tldraw.shape.highlight": 1,
+			"com.tldraw.shape.embed": 4,
+			"com.tldraw.shape.image": 4,
+			"com.tldraw.shape.video": 2,
+			"com.tldraw.binding.arrow": 0
+		}
+	},
+	"records": [
+		{
+			"gridSize": 10,
+			"name": "",
+			"meta": {},
+			"id": "document:document",
+			"typeName": "document"
+		},
+		{
+			"meta": {},
+			"id": "page:page",
+			"name": "Page 1",
+			"index": "a1",
+			"typeName": "page"
+		},
+		{
+			"id": "pointer:pointer",
+			"typeName": "pointer",
+			"x": 357.43359375,
+			"y": 132.7734375,
+			"lastActivityTimestamp": 1735267617634,
+			"meta": {}
+		},
+		{
+			"followingUserId": null,
+			"opacityForNextShape": 1,
+			"stylesForNextShape": {
+				"tldraw:size": "s",
+				"tldraw:font": "sans",
+				"tldraw:geo": "arrow-right"
+			},
+			"brush": null,
+			"scribbles": [],
+			"cursor": {
+				"type": "cross",
+				"rotation": 0
+			},
+			"isFocusMode": false,
+			"exportBackground": true,
+			"isDebugMode": false,
+			"isToolLocked": false,
+			"screenBounds": {
+				"x": 0,
+				"y": 0,
+				"w": 1128,
+				"h": 978
+			},
+			"insets": [
+				false,
+				false,
+				true,
+				false
+			],
+			"zoomBrush": null,
+			"isGridMode": false,
+			"isPenMode": false,
+			"chatMessage": "",
+			"isChatting": false,
+			"highlightedUserIds": [],
+			"isFocused": true,
+			"devicePixelRatio": 2,
+			"isCoarsePointer": false,
+			"isHoveringCanvas": true,
+			"openMenus": [],
+			"isChangingStyle": false,
+			"isReadonly": false,
+			"meta": {},
+			"duplicateProps": null,
+			"id": "instance:instance",
+			"currentPageId": "page:page",
+			"typeName": "instance"
+		},
+		{
+			"editingShapeId": null,
+			"croppingShapeId": null,
+			"selectedShapeIds": [
+				"shape:0vPmbhCIhdgPr7gYnOxm9"
+			],
+			"hoveredShapeId": "shape:0vPmbhCIhdgPr7gYnOxm9",
+			"erasingShapeIds": [],
+			"hintingShapeIds": [],
+			"focusedGroupId": null,
+			"meta": {},
+			"id": "instance_page_state:page:page",
+			"pageId": "page:page",
+			"typeName": "instance_page_state"
+		},
+		{
+			"x": 49.0078125,
+			"y": 97.7109375,
+			"z": 1,
+			"meta": {},
+			"id": "camera:page:page",
+			"typeName": "camera"
+		},
+		{
+			"x": 40.28125,
+			"y": 79.8203125,
+			"rotation": 0,
+			"isLocked": false,
+			"opacity": 1,
+			"meta": {},
+			"id": "shape:BJfJwAyLKVoUOGaixU2Xf",
+			"type": "text",
+			"props": {
+				"color": "black",
+				"size": "s",
+				"w": 252.359375,
+				"text": "User query:\n\n\"Generate code for S3 Bucket\"",
+				"font": "sans",
+				"textAlign": "start",
+				"autoSize": true,
+				"scale": 1
+			},
+			"parentId": "page:page",
+			"index": "a1",
+			"typeName": "shape"
+		},
+		{
+			"x": 374.58203125,
+			"y": 54.3125,
+			"rotation": 0,
+			"isLocked": false,
+			"opacity": 1,
+			"meta": {},
+			"id": "shape:z2gtvIBdf6_eCX2gzQ2L1",
+			"type": "text",
+			"props": {
+				"color": "black",
+				"size": "s",
+				"w": 310.2265625,
+				"text": "Search terms:\n\n\"AWS S3 bucket\",\n\"Pulumi AWS S3\",\n\"create S3 bucket Pulumi TypeScript\"",
+				"font": "sans",
+				"textAlign": "start",
+				"autoSize": true,
+				"scale": 1
+			},
+			"parentId": "page:page",
+			"index": "a22v7",
+			"typeName": "shape"
+		},
+		{
+			"x": 25.6796875,
+			"y": 58.140625,
+			"rotation": 0,
+			"isLocked": false,
+			"opacity": 1,
+			"meta": {},
+			"id": "shape:PGJKCoOonmat14xrifIp0",
+			"type": "geo",
+			"props": {
+				"w": 286.8671875,
+				"h": 132.9296875,
+				"geo": "rectangle",
+				"color": "black",
+				"labelColor": "black",
+				"fill": "none",
+				"dash": "draw",
+				"size": "s",
+				"font": "sans",
+				"text": "",
+				"align": "middle",
+				"verticalAlign": "middle",
+				"growY": 0,
+				"url": "",
+				"scale": 1
+			},
+			"parentId": "page:page",
+			"index": "a39Cr",
+			"typeName": "shape"
+		},
+		{
+			"x": 362.58203125,
+			"y": 35.85546875000003,
+			"rotation": 0,
+			"isLocked": false,
+			"opacity": 1,
+			"meta": {},
+			"id": "shape:hrkCX2QL0NyXDz_xl3n_i",
+			"type": "geo",
+			"props": {
+				"w": 342.9609375,
+				"h": 172.41015624999997,
+				"geo": "rectangle",
+				"color": "black",
+				"labelColor": "black",
+				"fill": "none",
+				"dash": "draw",
+				"size": "s",
+				"font": "sans",
+				"text": "",
+				"align": "middle",
+				"verticalAlign": "middle",
+				"growY": 0,
+				"url": "",
+				"scale": 1
+			},
+			"parentId": "page:page",
+			"index": "a40nT",
+			"typeName": "shape"
+		},
+		{
+			"x": 322.2734375,
+			"y": 100.125,
+			"rotation": 0,
+			"isLocked": false,
+			"opacity": 1,
+			"meta": {},
+			"id": "shape:0vPmbhCIhdgPr7gYnOxm9",
+			"type": "geo",
+			"props": {
+				"w": 35.16015625,
+				"h": 32.6484375,
+				"geo": "arrow-right",
+				"color": "black",
+				"labelColor": "black",
+				"fill": "none",
+				"dash": "draw",
+				"size": "s",
+				"font": "sans",
+				"text": "",
+				"align": "middle",
+				"verticalAlign": "middle",
+				"growY": 0,
+				"url": "",
+				"scale": 1
+			},
+			"parentId": "page:page",
+			"index": "a55Ml",
+			"typeName": "shape"
+		}
+	]
+}
diff --git a/content/blog/codegen-learnings/index.md b/content/blog/codegen-learnings/index.md
@@ -82,32 +82,16 @@ Because you were looking for the word "pie", you also retrieved a recipe for a S
 
 Now let's formalize this a bit. Recall measures the ratio of the relevant documents retrieved to the total number of relevant docuemtns in RAG:
 
-TODO
-
-old:
-
-$$Recall = \frac{N(Retrieved\_documents \cap Relevant\_documents)}{N(Relevant\_documents)}$$
-
-fixed1:
-
-$$Recall = \frac{N(\text{Retrieved\_documents} \cap \text{Relevant\_documents})}{N(\text{Relevant\_documents})}$$
-
-fixed2:
-
-$$Recall = \frac{N(Retrieved\text{\_}documents \cap Relevant\text{\_}documents)}{N(Relevant\text{\_}documents)}$$
-
-alternative
-
-$$Recall = \frac{N(\text{RetrievedDocuments} \cap \text{RelevantDocuments})}{N(\text{RelevantDocuments})}$$
+$$Recall = \frac{N(Retrieved \cap Relevant)}{N(Relevant)}$$
 
 Where
 
-- $N(Retrieved\_documents \cap Relevant\_documents)$ is the number of documents that are both retrieved and relevant.
-- $N(Relevant\_documents)$ is the total number of relevant documents in the database.
+- $N(Retrieved \cap Relevant)$ is the number of documents that are both retrieved and relevant.
+- $N(Relevant)$ is the total number of relevant documents in the database.
 
 Good recall means that many documents relevant to the query were retrieved.
 
-$$Precision = \frac{N(Retrieved\_documents \cap Relevant\_documents)}{N(Retrieved\_documents)}$$
+$$Precision = \frac{N(Retrieved \cap Relevant)}{N(Retrieved)}$$
 
 Where $N(Retrieved\_documents)$ is the total number of documents that were retrieved.
 
@@ -122,12 +106,25 @@ Precision and recall are essential in understanding the information retrieval qu
 Fortunately, other metrics that often can effectively estimate retrieval quality have been developed. We have found a metric that can predict, with some degree of accuracy, whether the generated code will successfully compile. For this metric, we compare the _tokens_ present in the prompted produced by the LLM with the number of tokens present in the actually generated code. (By token here we understand a compiler token - an identifier such as the name of a class, method or a field and not a traditional LLM token concept),
 Intuitively, if a token present in the prompt also appears in the generated program, we can assume that the token effectively contributed to the generated program. Tokens in the generated program that were not part of the prompt are not necessarily wrong but they are less trusted (they can come from the LLM built-in knowledge or were, ahem, hallucinated)
 
-$$prompt \ coverage = \frac{N(Tokens\_in\_prompt \cap Tokens\_in\_code)}{N(Tokens\_in\_code)} $$
+1:
+$$prompt \ coverage = \frac{N(Tokens\_in\_prompt \cap Tokens\_in\_code)}{N(Tokens\_in\_code)}$$
+
+2:
+$$prompt \ coverage = \frac{N(\text{Tokens\_in\_prompt} \cap \text{Tokens\_in\_code})}{N(\text{Tokens\_in\_code})}$$
 
 <!-- Note: our documents call is Recall, which is not how industry uses this term (see above) -->
 
 Prompt coverage is a metric we can observe in production, and it's one of several metrics we use when updating providers to ensure we haven't regressed the quality of the RAG.
 
+<span style="width: 50%; float: left; margin-left: 20px;">
+<span style="text-align:center">
+    <img src="flow.png" alt="">
+    <figcaption>
+    <i>Flow of blah</i>
+    </figcaption>
+</span>
+</span>
+
 <!--raw material 
 
 1.1. "generate code for S3 bucket" -> get search terms: