twilwa · twilwa · Dec 22, 2023
diff --git a/trulens/examples/quickstart/groundtruth_evals.ipynb b/trulens/examples/quickstart/groundtruth_evals.ipynb
@@ -1,267 +1,19 @@
-{
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Ground Truth Evaluations\n",
-    "\n",
-    "In this quickstart you will create a evaluate a LangChain app using ground truth. Ground truth evaluation can be especially useful during early LLM experiments when you have a small set of example queries that are critical to get right.\n",
-    "\n",
-    "Ground truth evaluation works by comparing the similarity of an LLM response compared to its matching verified response.\n",
-    "\n",
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/main/trulens_eval/examples/quickstart/groundtruth_evals.ipynb)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Add API keys\n",
-    "For this quickstart, you will need Open AI keys."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# ! pip install trulens_eval==0.19.2 openai==1.3.7"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"...\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from trulens_eval import Tru\n",
-    "\n",
-    "tru = Tru()"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Create Simple LLM Application"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from openai import OpenAI\n",
-    "oai_client = OpenAI()\n",
-    "\n",
-    "from trulens_eval.tru_custom_app import instrument\n",
-    "\n",
-    "class APP:\n",
-    "    @instrument\n",
-    "    def completion(self, prompt):\n",
-    "        completion = oai_client.chat.completions.create(\n",
-    "                model=\"gpt-3.5-turbo\",\n",
-    "                temperature=0,\n",
-    "                messages=\n",
-    "                [\n",
-    "                    {\"role\": \"user\",\n",
-    "                    \"content\": \n",
-    "                    f\"Please answer the question: {prompt}\"\n",
-    "                    }\n",
-    "                ]\n",
-    "                ).choices[0].message.content\n",
-    "        return completion\n",
-    "    \n",
-    "llm_app = APP()"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Initialize Feedback Function(s)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "✅ In Ground Truth, input prompt will be set to __record__.main_input or `Select.RecordInput` .\n",
-      "✅ In Ground Truth, input response will be set to __record__.main_output or `Select.RecordOutput` .\n"
-     ]
-    }
-   ],
-   "source": [
-    "from trulens_eval import Feedback\n",
-    "from trulens_eval.feedback import GroundTruthAgreement\n",
-    "\n",
-    "golden_set = [\n",
-    "    {\"query\": \"who invented the lightbulb?\", \"response\": \"Thomas Edison\"},\n",
-    "    {\"query\": \"¿quien invento la bombilla?\", \"response\": \"Thomas Edison\"}\n",
-    "]\n",
-    "\n",
-    "f_groundtruth = Feedback(GroundTruthAgreement(golden_set).agreement_measure, name = \"Ground Truth\").on_input_output()"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Instrument chain for logging with TruLens"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# add trulens as a context manager for llm_app\n",
-    "from trulens_eval import TruCustomApp\n",
-    "tru_app = TruCustomApp(llm_app, app_id = 'LLM App v1', feedbacks = [f_groundtruth])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Instrumented query engine can operate as a context manager:\n",
-    "with tru_app as recording:\n",
-    "    llm_app.completion(\"¿quien invento la bombilla?\")\n",
-    "    llm_app.completion(\"who invented the lightbulb?\")"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## See results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Ground Truth</th>\n",
-       "      <th>positive_sentiment</th>\n",
-       "      <th>Human Feedack</th>\n",
-       "      <th>latency</th>\n",
-       "      <th>total_cost</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>app_id</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>LLM App v1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.38994</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.75</td>\n",
-       "      <td>0.000076</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "            Ground Truth  positive_sentiment  Human Feedack  latency  \\\n",
-       "app_id                                                                 \n",
-       "LLM App v1           1.0             0.38994            1.0     1.75   \n",
-       "\n",
-       "            total_cost  \n",
-       "app_id                  \n",
-       "LLM App v1    0.000076  "
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tru.get_leaderboard(app_ids=[tru_app.app_id])"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3.11.4 ('agents')",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "7d153714b979d5e6d08dd8ec90712dd93bff2c9b6c1f0c118169738af3430cd4"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
+tru = Tru()
+
+# Use the Google Gemini model
+from google.cloud import aiplatform
+aiplatform.init(project='your-google-cloud-project-id', location='us-central1')
+
+class GoogleGeminiModel:
+    def completion(self, prompt):
+        # Omitted on purpose - modeling code to be filled in with Gemini logic
+
+gemini_model = GoogleGeminiModel()
+
+# Replace the LLM application to use Gemini for completions
+class AppWithGemini:
+    @instrument
+    def completion(self, prompt):
+        return gemini_model.completion(prompt)
+
+llm_app = AppWithGemini()