diff --git a/src/client.ts b/src/client.ts index bca5a4b..c7200b1 100644 --- a/src/client.ts +++ b/src/client.ts @@ -2,6 +2,7 @@ import { Completion, CompletionResponse, CreateExperimentRequest, + CreateTestCaseCollection, DataItem, ExperimentOptions, ExperimentSchema, @@ -19,6 +20,7 @@ import { genTraceId } from './helpers'; import { asyncLocalStorage } from './utils/trace_utils'; import { pareaProject } from './project'; import { Experiment } from './experiment/experiment'; +import { createTestCases, createTestCollection } from './experiment/datasets'; const COMPLETION_ENDPOINT = '/completion'; const DEPLOYED_PROMPT_ENDPOINT = '/deployed-prompt'; @@ -26,7 +28,9 @@ const RECORD_FEEDBACK_ENDPOINT = '/feedback'; const EXPERIMENT_ENDPOINT = '/experiment'; const EXPERIMENT_STATS_ENDPOINT = '/experiment/{experiment_uuid}/stats'; const EXPERIMENT_FINISHED_ENDPOINT = '/experiment/{experiment_uuid}/finished'; -const GET_COLLECTION_ENDPOINT = '/collection/{test_collection_name}'; +const GET_COLLECTION_ENDPOINT = '/collection/{test_collection_identifier}'; +const CREATE_COLLECTION_ENDPOINT = '/collection'; +const ADD_TEST_CASES_ENDPOINT = '/testcases'; export class Parea { private apiKey: string; @@ -123,14 +127,40 @@ export class Parea { return response.data; } - public async getCollection(testCollectionName: string): Promise { + public async getCollection(testCollectionIdentifier: string | number): Promise { const response = await this.client.request({ method: 'GET', - endpoint: GET_COLLECTION_ENDPOINT.replace('{test_collection_name}', testCollectionName), + endpoint: GET_COLLECTION_ENDPOINT.replace('{test_collection_identifier}', String(testCollectionIdentifier)), }); return response.data; } + public async createTestCollection(data: Record[], name?: string | undefined): Promise { + const request: CreateTestCaseCollection = await createTestCollection(data, name); + await this.client.request({ + method: 'POST', + endpoint: CREATE_COLLECTION_ENDPOINT, + data: request, + }); + } + + public async addTestCases( + data: Record[], + name?: string | undefined, + datasetId?: number | undefined, + ): Promise { + const request = { + id: datasetId, + name, + test_cases: await createTestCases(data), + }; + await this.client.request({ + method: 'POST', + endpoint: ADD_TEST_CASES_ENDPOINT, + data: request, + }); + } + public experiment( data: string | Iterable, func: (...dataItem: any[]) => Promise, diff --git a/src/cookbook/enpoints_for_datasets.ts b/src/cookbook/enpoints_for_datasets.ts new file mode 100644 index 0000000..226c604 --- /dev/null +++ b/src/cookbook/enpoints_for_datasets.ts @@ -0,0 +1,33 @@ +import { Parea } from '../client'; +import * as dotenv from 'dotenv'; + +dotenv.config(); + +const p = new Parea(process.env.PAREA_API_KEY); + +export async function main() { + const data = [ + { + problem: '1+2', + target: 3, + tags: ['easy'], + }, + { problem: 'Solve the differential equation dy/dx = 3y.', target: 'y = c * e^(3x)', tags: ['hard'] }, + ]; + + // this will create a new dataset on Parea named "Math problems". + // The dataset will have one column named "problem", and two columns using the reserved names "target" and "tags". + // when using this dataset the expected prompt template should have a placeholder for the variable problem. + p.createTestCollection(data, 'Math problems 2'); + + const new_data = [{ problem: 'Evaluate the integral ∫x^2 dx from 0 to 3.', target: 9, tags: ['hard'] }]; + // this will add the new test cases to the existing "Math problems" dataset. + // New test cases must have the same columns as the existing dataset. + p.addTestCases(new_data, 'Math problems'); + // Or if you can use the dataset ID instead of the name + p.addTestCases(new_data, undefined, 121); +} + +main().then(() => { + console.log('Done!'); +}); diff --git a/src/experiment/datasets.ts b/src/experiment/datasets.ts new file mode 100644 index 0000000..16f681d --- /dev/null +++ b/src/experiment/datasets.ts @@ -0,0 +1,76 @@ +import { genRandomName } from './utils'; +import { CreateTestCase, CreateTestCaseCollection } from '../types'; + +/** + * Create a test case collection from a dictionary of test cases. + * + * @param data = list of key-value pairs where keys represent input names. + * Each item in the list represent a test case row. + * Target and Tags are reserved keys. There can only be one target and tags key per dict item. + * If target is present it will represent the target/expected response for the inputs. + * If tags are present they must be a list of json_serializable values. + * @param name - A unique name for the test collection. If not provided a random name will be generated. + * @returns CreateTestCaseCollection + */ +export async function createTestCollection( + data: Record[], + name?: string, +): Promise { + if (!name) { + name = genRandomName(); + } + + const columnNames = Array.from( + new Set(data.flatMap((row) => Object.keys(row).filter((key) => key !== 'target' && key !== 'tags'))), + ); + const testCases = await createTestCases(data); + + return { + name, + column_names: columnNames, + test_cases: testCases, + }; +} + +/** + * Create a list of test cases from a dictionary. + * + * @param data = list of key-value pairs where keys represent input names. + * Each item in the list represent a test case row. + * Target and Tags are reserved keys. There can only be one target and tags key per dict item. + * If target is present it will represent the target/expected response for the inputs. + * If tags are present they must be a list of json_serializable values. + * @returns CreateTestCase[] + */ +export async function createTestCases(data: Record[]): Promise { + const testCases: CreateTestCase[] = []; + + data.forEach((row) => { + const inputs: Record = {}; + let target: string | undefined; + let tags: string[] = []; + + Object.entries(row).forEach(([k, v]) => { + if (k === 'target') { + if (target !== undefined) { + console.warn('There can only be one target key per test case. Only the first target will be used.'); + } + target = JSON.stringify(v, null, 2); + } else if (k === 'tags') { + if (!Array.isArray(v)) { + throw new Error('Tags must be a list of json serializable values.'); + } + if (tags.length > 0) { + console.warn('There can only be one tags key per test case. Only the first set of tags will be used.'); + } + tags = v.map((tag) => (typeof tag === 'string' ? tag : JSON.stringify(tag, null, 2))); + } else { + inputs[k] = typeof v === 'string' ? v : JSON.stringify(v, null, 2); + } + }); + + testCases.push({ inputs, target, tags }); + }); + + return testCases; +} diff --git a/src/index.ts b/src/index.ts index 62ce144..29ccb2c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -48,6 +48,9 @@ export { LangChainTracerFields, TestCase, TestCaseCollection, + CreateTestCases, + CreateTestCase, + CreateTestCaseCollection, } from './types'; export { levenshtein } from './evals/general/levenshtein'; diff --git a/src/types.ts b/src/types.ts index 400dccc..a7faba9 100644 --- a/src/types.ts +++ b/src/types.ts @@ -358,3 +358,19 @@ export type ExperimentOptions = { datasetLevelEvalFuncs?: any[]; nWorkers?: number; }; + +export type CreateTestCase = { + inputs: Record; + target?: string; + tags: string[]; +}; + +export type CreateTestCases = { + id?: number; + name?: string; + test_cases: CreateTestCase[]; +}; + +export type CreateTestCaseCollection = CreateTestCases & { + column_names: string[]; +};