Skip to content

Commit

Permalink
refactor(firestore-bigquery-export): update gen-schema gemini approach
Browse files Browse the repository at this point in the history
  • Loading branch information
cabljac committed Dec 23, 2024
1 parent 0519ebc commit cf64a4b
Show file tree
Hide file tree
Showing 4 changed files with 318 additions and 107 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import * as fs from "fs/promises";
import { SchemaSchema } from "../../schema/genkitSchema";

jest.mock("fs/promises", () => ({
access: jest.fn(),
writeFile: jest.fn(),
}));

const mockDefineTool = jest.fn();
jest.mock("genkit", () => {
const actualGenkit = jest.requireActual("genkit");
return {
...actualGenkit,
genkit: jest.fn(() => ({
defineTool: jest.fn(),
definePrompt: jest.fn(),
})),
};
});

describe("runAgent - additional test", () => {
const schemaDirectory = "./schemas";
const tablePrefix = "testPrefix";
const schemaContent = JSON.stringify({
fields: [
{
name: "id",
type: "number",
description: "Unique identifier for the document",
},
],
});

beforeEach(() => {
jest.clearAllMocks(); // Clear all mock states before each test
});

it("should write a new schema file if it does not already exist", async () => {
const filePath = `${schemaDirectory}/${tablePrefix}.json`;

// Mock file system operations
(fs.access as jest.Mock).mockRejectedValueOnce(
new Error("File does not exist")
);
(fs.writeFile as jest.Mock).mockResolvedValueOnce(undefined); // Explicitly resolve with `undefined`

// Simulate the tool definition and handler
const writeSchemaHandler = async ({
fileName,
content,
}: {
fileName: string;
content: string;
}) => {
const filePath = `${schemaDirectory}/${fileName}`;
try {
await fs.access(filePath); // Check if the file exists
return "Error: Schema file already exists";
} catch {
await fs.writeFile(filePath, content); // Write the file if it doesn't exist
return "Schema created successfully";
}
};

// Call the handler directly
const result = await writeSchemaHandler({
fileName: `${tablePrefix}.json`,
content: schemaContent,
});

// Assertions
expect(fs.access).toHaveBeenCalledWith(filePath);
expect(fs.writeFile).toHaveBeenCalledWith(filePath, schemaContent);
expect(result).toBe("Schema created successfully");
});

it("should return an error if the schema file already exists", async () => {
const filePath = `${schemaDirectory}/${tablePrefix}.json`;

// Mock file system operations
(fs.access as jest.Mock).mockResolvedValueOnce(undefined); // Simulate file exists
(fs.writeFile as jest.Mock).mockResolvedValueOnce(undefined); // This should NOT be called

// Simulate the tool definition and handler
const writeSchemaHandler = async ({
fileName,
content,
}: {
fileName: string;
content: string;
}) => {
const filePath = `${schemaDirectory}/${fileName}`;
try {
await fs.access(filePath);
console.log("File exists, returning error"); // Debugging flow
return "Error: Schema file already exists";
} catch {
console.log("File does not exist, writing file"); // Debugging flow
await fs.writeFile(filePath, content);
return "Schema created successfully";
}
};

// Call the handler directly
const result = await writeSchemaHandler({
fileName: `${tablePrefix}.json`,
content: schemaContent,
});

console.log(
"Mock calls for fs.access:",
(fs.access as jest.Mock).mock.calls
);
console.log(
"Mock calls for fs.writeFile:",
(fs.writeFile as jest.Mock).mock.calls
);

// Assertions
expect(fs.access).toHaveBeenCalledWith(filePath);
expect(fs.writeFile).not.toHaveBeenCalled(); // Ensure writeFile is NOT called
expect(result).toBe("Error: Schema file already exists");
});

it("should return an error if the schema content is invalid", async () => {
const invalidSchemaContent = JSON.stringify({
fields: [
{ name: "id", type: "invalid_type" }, // Invalid type
],
});
const writeSchemaHandler = async ({
fileName,
content,
}: {
fileName: string;
content: string;
}) => {
const filePath = `${schemaDirectory}/${fileName}`;
try {
SchemaSchema.parse(JSON.parse(content)); // Validate schema structure
try {
await fs.access(filePath); // Check if the file exists
return "Error: Schema file already exists";
} catch {
await fs.writeFile(filePath, content); // Write the file if it doesn't exist
return "Schema created successfully";
}
} catch (error) {
return `Error creating schema: ${error.message}`;
}
};

// // Call the handler directly
const result = await writeSchemaHandler({
fileName: `${tablePrefix}.json`,
content: invalidSchemaContent,
});

// Assertions
expect(result).toMatch("Error: Schema file already exists");
expect(fs.writeFile).not.toHaveBeenCalled(); // Ensure writeFile is NOT called
});
});
58 changes: 5 additions & 53 deletions firestore-bigquery-export/scripts/gen-schema-view/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ import { runAgent } from "./schema/genkit";
const BIGQUERY_VALID_CHARACTERS = /^[a-zA-Z0-9_]+$/;
const FIRESTORE_VALID_CHARACTERS = /^[^\/]+$/;
const GCP_PROJECT_VALID_CHARACTERS = /^[a-z][a-z0-9-]{0,29}$/;
const MIN_SAMPLE_SIZE = 1;
const MAX_SAMPLE_SIZE = 100;
const DEFAULT_SAMPLE_SIZE = 10;
const DEFAULT_SAMPLE_SIZE = 100;

const validateInput = (value: any, name: string, regex: RegExp) => {
if (!value || value === "" || value.trim() === "") {
Expand Down Expand Up @@ -145,14 +143,9 @@ program
)
.option(
"--use-gemini-agent",
"Use Gemini AI Agent to automatically analyze your data and generate the schema",
"Use Gemini to automatically analyze your data and generate a draft schema. You will have a chance to manually view and approve this schema before it is used.",
false
)
.option(
"--agent-sample-size <size>",
`Number of documents for the Gemini Agent to analyze (${MIN_SAMPLE_SIZE}-${MAX_SAMPLE_SIZE})`,
DEFAULT_SAMPLE_SIZE.toString()
)
.option(
"--schema-dir <directory>",
"Directory to store generated schemas",
Expand Down Expand Up @@ -196,7 +189,7 @@ const questions = [
},
{
message:
"Would you like to use a Gemini AI Agent to automatically analyze your data and generate the schema?",
"Would you like to use a Gemini to automatically analyze your data and generate a draft schema?",
name: "useGeminiAgent",
type: "confirm",
default: false,
Expand All @@ -221,22 +214,6 @@ const questions = [
return true;
},
},
{
message: `How many documents should the Gemini Agent analyze? (${MIN_SAMPLE_SIZE}-${MAX_SAMPLE_SIZE})`,
name: "agentSampleSize",
type: "number",
default: DEFAULT_SAMPLE_SIZE,
when: (answers) => answers.useGeminiAgent,
validate: (value) => {
if (isNaN(value) || value < MIN_SAMPLE_SIZE) {
return `Please provide a number greater than or equal to ${MIN_SAMPLE_SIZE}`;
}
if (value > MAX_SAMPLE_SIZE) {
return `Sample size must not exceed ${MAX_SAMPLE_SIZE} documents`;
}
return true;
},
},
{
message:
"Where should this script look for schema definitions? (Enter a comma-separated list of, optionally globbed, paths to files or directories).",
Expand Down Expand Up @@ -285,30 +262,22 @@ async function run(): Promise<number> {

if (config.useGeminiAgent) {
try {
console.log("\nStarting Gemini Agent schema generation process...");

const sampleData = await sampleFirestoreDocuments(
config.collectionPath!,
config.agentSampleSize!
);

console.log("Initializing Gemini Agent...");
const chat = runAgent(
config.googleAiKey!,
config.schemaDirectory || "./schemas",
config.tableNamePrefix,
config.collectionPath!,
sampleData
);

console.log("Generating schema from sample data...");
await chat.send(
`Please analyze these documents and generate an appropriate BigQuery schema. ` +
`**Then use the writeSchema tool to save it as "${config.tableNamePrefix}.json**". ` +
`Let me know once you've created the schema file.`
);

console.log("Schema generation complete. Reading generated schema...");
const schemaName = `${config.tableNamePrefix}`;
const schemas = readSchemas([`./schemas/${schemaName}.json`]);

Expand All @@ -322,10 +291,7 @@ async function run(): Promise<number> {

const schemaPath = `./schemas/${config.tableNamePrefix}.json`;
console.log(
`\nSchema generation complete. The schema file has been created at: ${schemaPath}`
);
console.log(
"Please review the schema file and confirm if you want to proceed."
`\nSchema generation complete. The schema file has been created at: ${schemaPath}. Please review the schema file and confirm if you want to proceed.`
);

const confirmation = await inquirer.prompt([
Expand Down Expand Up @@ -405,18 +371,6 @@ async function parseConfig(): Promise<CliConfig> {
);
process.exit(1);
}

const sampleSize = parseInt(program.agentSampleSize);
if (
isNaN(sampleSize) ||
sampleSize < MIN_SAMPLE_SIZE ||
sampleSize > MAX_SAMPLE_SIZE
) {
console.error(
`Agent sample size must be between ${MIN_SAMPLE_SIZE} and ${MAX_SAMPLE_SIZE}.`
);
process.exit(1);
}
}

return {
Expand All @@ -427,9 +381,7 @@ async function parseConfig(): Promise<CliConfig> {
collectionPath: program.collectionPath,
schemas: program.useGeminiAgent ? {} : readSchemas(program.schemaFiles),
useGeminiAgent: program.useGeminiAgent,
agentSampleSize: program.useGeminiAgent
? parseInt(program.agentSampleSize)
: undefined,
agentSampleSize: DEFAULT_SAMPLE_SIZE,
googleAiKey: program.googleAiKey,
};
}
Expand Down
Loading

0 comments on commit cf64a4b

Please sign in to comment.