diff --git a/.gitignore b/.gitignore index d81e264f1..0012e4cb4 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,5 @@ thumbnails/ # OpenAPI generated code openapi thumbnails* + +vitrivr-json/ diff --git a/example-configs/ingestion/migration/video-ct.json b/example-configs/ingestion/migration/video-ct.json new file mode 100644 index 000000000..cc6e98dfa --- /dev/null +++ b/example-configs/ingestion/migration/video-ct.json @@ -0,0 +1,107 @@ +{ + "schema": "vitrivr-ct", + "context": { + "contentFactory": "InMemoryContentFactory", + "resolverName":"disk", + + "local": { + "clip": { + "contentSources": "selector" + }, + "averagecolor": { + "contentSources": "selector" + }, + "content": { + "path": "../cache" + }, + "thumbnail": { + "contentSources": "selector", + "maxSideResolution": "400", + "mimeType": "JPG" + }, + "selector":{ + "contentSources": "decoder" + }, + "enumerator": { + "path": "./media", + "depth": "3" + }, + "decoder": { + "timeWindowMs": "6000" + }, + "filter": { + "type": "SOURCE:VIDEO" + }, + "path": { + "field": "file" + } + } + }, + "operators": { + "enumerator": { + "type": "ENUMERATOR", + "factory": "FileSystemEnumerator", + "mediaTypes": ["VIDEO"] + }, + "decoder": { + "type": "DECODER", + "factory": "VideoDecoder" + }, + "metadata":{ + "type": "EXTRACTOR", + "fieldName": "video" + }, + "source":{ + "type": "EXTRACTOR", + "fieldName": "file" + }, + "path": { + "type": "TRANSFORMER", + "factory":"DescriptorAsContentTransformer" + }, + "selector": { + "type": "TRANSFORMER", + "factory": "LastContentAggregator" + }, + "time": { + "type": "EXTRACTOR", + "fieldName": "time" + }, + "averagecolor": { + "type": "EXTRACTOR", + "fieldName": "averagecolor" + }, + "clip": { + "type": "EXTRACTOR", + "fieldName": "clip" + }, + "thumbnail": { + "type": "EXPORTER", + "exporterName": "thumbnail" + }, + "filter": { + "type": "TRANSFORMER", + "factory": "TypeFilterTransformer" + } + }, + "operations": { + "enumerator": {"operator": "enumerator"}, + "decoder": {"operator": "decoder", "inputs": ["enumerator"]}, + "path": {"operator": "path", "inputs": ["decoder"]}, + "metadata": {"operator": "metadata", "inputs": ["path"], "merge": "COMBINE"}, + + "source": {"operator": "source", "inputs": ["metadata"]}, + + "time": {"operator": "time", "inputs": ["source"]}, + "selector": {"operator": "selector", "inputs": ["time"]}, + + "thumbnail": {"operator": "thumbnail", "inputs": ["selector"]}, + + "clip": {"operator": "clip", "inputs": ["thumbnail"]}, + "filter": {"operator": "filter","inputs": ["clip"],"merge": "MERGE"} + }, + "output": [ + "filter" + ], + "mergeType": "MERGE" +} \ No newline at end of file diff --git a/example-configs/ingestion/migration/video-json.json b/example-configs/ingestion/migration/video-json.json new file mode 100644 index 000000000..e17cc5252 --- /dev/null +++ b/example-configs/ingestion/migration/video-json.json @@ -0,0 +1,107 @@ +{ + "schema": "vitrivr-json", + "context": { + "contentFactory": "InMemoryContentFactory", + "resolverName":"disk", + + "local": { + "clip": { + "contentSources": "selector" + }, + "averagecolor": { + "contentSources": "selector" + }, + "content": { + "path": "../cache" + }, + "thumbnail": { + "contentSources": "selector", + "maxSideResolution": "400", + "mimeType": "JPG" + }, + "selector":{ + "contentSources": "decoder" + }, + "enumerator": { + "path": "./media", + "depth": "3" + }, + "decoder": { + "timeWindowMs": "6000" + }, + "filter": { + "type": "SOURCE:VIDEO" + }, + "path": { + "field": "file" + } + } + }, + "operators": { + "enumerator": { + "type": "ENUMERATOR", + "factory": "FileSystemEnumerator", + "mediaTypes": ["VIDEO"] + }, + "decoder": { + "type": "DECODER", + "factory": "VideoDecoder" + }, + "metadata":{ + "type": "EXTRACTOR", + "fieldName": "video" + }, + "source":{ + "type": "EXTRACTOR", + "fieldName": "file" + }, + "path": { + "type": "TRANSFORMER", + "factory":"DescriptorAsContentTransformer" + }, + "selector": { + "type": "TRANSFORMER", + "factory": "LastContentAggregator" + }, + "time": { + "type": "EXTRACTOR", + "fieldName": "time" + }, + "averagecolor": { + "type": "EXTRACTOR", + "fieldName": "averagecolor" + }, + "clip": { + "type": "EXTRACTOR", + "fieldName": "clip" + }, + "thumbnail": { + "type": "EXPORTER", + "exporterName": "thumbnail" + }, + "filter": { + "type": "TRANSFORMER", + "factory": "TypeFilterTransformer" + } + }, + "operations": { + "enumerator": {"operator": "enumerator"}, + "decoder": {"operator": "decoder", "inputs": ["enumerator"]}, + "path": {"operator": "path", "inputs": ["decoder"]}, + "metadata": {"operator": "metadata", "inputs": ["path"], "merge": "COMBINE"}, + + "source": {"operator": "source", "inputs": ["metadata"]}, + + "time": {"operator": "time", "inputs": ["source"]}, + "selector": {"operator": "selector", "inputs": ["time"]}, + + "thumbnail": {"operator": "thumbnail", "inputs": ["selector"]}, + + "clip": {"operator": "clip", "inputs": ["thumbnail"]}, + "filter": {"operator": "filter","inputs": ["clip"],"merge": "MERGE"} + }, + "output": [ + "filter" + ], + "mergeType": "MERGE" +} \ No newline at end of file diff --git a/example-configs/ingestion/migration/video-pg.json b/example-configs/ingestion/migration/video-pg.json new file mode 100644 index 000000000..ad76bb5e7 --- /dev/null +++ b/example-configs/ingestion/migration/video-pg.json @@ -0,0 +1,107 @@ +{ + "schema": "vitrivr-pg", + "context": { + "contentFactory": "InMemoryContentFactory", + "resolverName":"disk", + + "local": { + "clip": { + "contentSources": "selector" + }, + "averagecolor": { + "contentSources": "selector" + }, + "content": { + "path": "../cache" + }, + "thumbnail": { + "contentSources": "selector", + "maxSideResolution": "400", + "mimeType": "JPG" + }, + "selector":{ + "contentSources": "decoder" + }, + "enumerator": { + "path": "./media", + "depth": "3" + }, + "decoder": { + "timeWindowMs": "6000" + }, + "filter": { + "type": "SOURCE:VIDEO" + }, + "path": { + "field": "file" + } + } + }, + "operators": { + "enumerator": { + "type": "ENUMERATOR", + "factory": "FileSystemEnumerator", + "mediaTypes": ["VIDEO"] + }, + "decoder": { + "type": "DECODER", + "factory": "VideoDecoder" + }, + "metadata":{ + "type": "EXTRACTOR", + "fieldName": "video" + }, + "source":{ + "type": "EXTRACTOR", + "fieldName": "file" + }, + "path": { + "type": "TRANSFORMER", + "factory":"DescriptorAsContentTransformer" + }, + "selector": { + "type": "TRANSFORMER", + "factory": "LastContentAggregator" + }, + "time": { + "type": "EXTRACTOR", + "fieldName": "time" + }, + "averagecolor": { + "type": "EXTRACTOR", + "fieldName": "averagecolor" + }, + "clip": { + "type": "EXTRACTOR", + "fieldName": "clip" + }, + "thumbnail": { + "type": "EXPORTER", + "exporterName": "thumbnail" + }, + "filter": { + "type": "TRANSFORMER", + "factory": "TypeFilterTransformer" + } + }, + "operations": { + "enumerator": {"operator": "enumerator"}, + "decoder": {"operator": "decoder", "inputs": ["enumerator"]}, + "path": {"operator": "path", "inputs": ["decoder"]}, + "metadata": {"operator": "metadata", "inputs": ["path"], "merge": "COMBINE"}, + + "source": {"operator": "source", "inputs": ["metadata"]}, + + "time": {"operator": "time", "inputs": ["source"]}, + "selector": {"operator": "selector", "inputs": ["time"]}, + + "thumbnail": {"operator": "thumbnail", "inputs": ["selector"]}, + + "clip": {"operator": "clip", "inputs": ["thumbnail"]}, + "filter": {"operator": "filter","inputs": ["clip"],"merge": "MERGE"} + }, + "output": [ + "filter" + ], + "mergeType": "MERGE" +} \ No newline at end of file diff --git a/example-configs/schema/migration-schema.json b/example-configs/schema/migration-schema.json new file mode 100644 index 000000000..94b2676b7 --- /dev/null +++ b/example-configs/schema/migration-schema.json @@ -0,0 +1,176 @@ +{ + "schemas": { + "vitrivr-ct": { + "connection": { + "database": "CottontailConnectionProvider", + "parameters": { + "Host": "127.0.0.1", + "port": "1865" + } + }, + "fields": { + "averagecolor": { + "factory": "AverageColor" + }, + "file": { + "factory": "FileSourceMetadata" + }, + "clip": { + "factory": "DenseEmbedding", + "parameters": { + "host": "http://10.34.64.84:8888/", + "model": "open-clip-vit-b32", + "length": "512", + "timeoutSeconds": "100", + "retries": "1000" + } + }, + "time": { + "factory": "TemporalMetadata" + }, + "video": { + "factory": "VideoSourceMetadata" + } + }, + "resolvers": { + "disk": { + "factory": "DiskResolver", + "parameters": { + "location": "./example/thumbs" + } + } + }, + "exporters": { + "thumbnail": { + "factory": "ThumbnailExporter", + "resolverName": "disk", + "parameters": { + "maxSideResolution": "300", + "mimeType": "JPG" + } + } + }, + "extractionPipelines": { + "video": { + "path": "./example-configs/ingestion/migration/video-ct.json" + } + } + }, + + "vitrivr-pg": { + "connection": { + "database": "PgVectorConnectionProvider", + "parameters": { + "Host": "127.0.0.1", + "port": "5432", + "username": "postgres", + "password": "vitrivr" + } + }, + "fields": { + "averagecolor": { + "factory": "AverageColor" + }, + "file": { + "factory": "FileSourceMetadata" + }, + "clip": { + "factory": "DenseEmbedding", + "parameters": { + "host": "http://10.34.64.84:8888/", + "model": "open-clip-vit-b32", + "length": "512", + "timeoutSeconds": "100", + "retries": "1000" + } + }, + "time": { + "factory": "TemporalMetadata" + }, + "video": { + "factory": "VideoSourceMetadata" + } + }, + "resolvers": { + "disk": { + "factory": "DiskResolver", + "parameters": { + "location": "./example/thumbs" + } + } + }, + "exporters": { + "thumbnail": { + "factory": "ThumbnailExporter", + "resolverName": "disk", + "parameters": { + "maxSideResolution": "300", + "mimeType": "JPG" + } + } + }, + "extractionPipelines": { + "video": { + "path": "./example-configs/ingestion/migration/video-pg.json" + } + } + }, + + + "vitrivr-json": { + "connection": { + "database": "JsonlConnectionProvider", + "parameters": { + "root": "." + } + }, + "fields": { + "averagecolor": { + "factory": "AverageColor" + }, + "file": { + "factory": "FileSourceMetadata" + }, + "clip": { + "factory": "DenseEmbedding", + "parameters": { + "host": "http://10.34.64.84:8888/", + "model": "open-clip-vit-b32", + "length": "512", + "timeoutSeconds": "100", + "retries": "1000" + } + }, + "time": { + "factory": "TemporalMetadata" + }, + "video": { + "factory": "VideoSourceMetadata" + } + }, + "resolvers": { + "disk": { + "factory": "DiskResolver", + "parameters": { + "location": "./example/thumbs" + } + } + }, + "exporters": { + "thumbnail": { + "factory": "ThumbnailExporter", + "resolverName": "disk", + "parameters": { + "maxSideResolution": "300", + "mimeType": "JPG" + } + } + }, + "extractionPipelines": { + "video": { + "path": "./example-configs/ingestion/migration/video-json.json" + } + } + } + } +} \ No newline at end of file diff --git a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/AbstractJsonlReader.kt b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/AbstractJsonlReader.kt index 3eff781d5..2868d5d8d 100644 --- a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/AbstractJsonlReader.kt +++ b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/AbstractJsonlReader.kt @@ -41,7 +41,6 @@ abstract class AbstractJsonlReader>( } override fun getAll(): Sequence { - return BufferedReader(InputStreamReader(path.inputStream())).lineSequence().mapNotNull { try { val list = Json.decodeFromString(it) @@ -54,7 +53,6 @@ abstract class AbstractJsonlReader>( null } } - } override fun queryAndJoin(query: Query): Sequence { @@ -71,7 +69,7 @@ abstract class AbstractJsonlReader>( } override fun getForRetrievable(retrievableId: RetrievableId): Sequence { - return getAll().filter { it.retrievableId == retrievableId} + return getAll().filter { it.retrievableId == retrievableId } } override fun getAllForRetrievable(retrievableIds: Iterable): Sequence { diff --git a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/retrievable/JsonlRetrievableReader.kt b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/retrievable/JsonlRetrievableReader.kt index c95003656..b5f638fcd 100644 --- a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/retrievable/JsonlRetrievableReader.kt +++ b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/retrievable/JsonlRetrievableReader.kt @@ -67,6 +67,6 @@ class JsonlRetrievableReader(override val connection: JsonlConnection) : Retriev } } - override fun count(): Long = BufferedReader(InputStreamReader(retrievablePath.inputStream())).lineSequence().count().toLong() - + override fun count(): Long = + BufferedReader(InputStreamReader(retrievablePath.inputStream())).lineSequence().count().toLong() } \ No newline at end of file diff --git a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/scalar/ScalarJsonlReader.kt b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/scalar/ScalarJsonlReader.kt index 6403a518b..e56f75ae5 100644 --- a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/scalar/ScalarJsonlReader.kt +++ b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/scalar/ScalarJsonlReader.kt @@ -20,8 +20,8 @@ class ScalarJsonlReader( override fun toDescriptor(list: AttributeContainerList): ScalarDescriptor<*, *> { val map = list.list.associateBy { it.attribute.name } - val retrievableId = (map[DESCRIPTOR_ID_COLUMN_NAME]?.value!!.toValue() as Value.UUIDValue).value - val descriptorId = (map[RETRIEVABLE_ID_COLUMN_NAME]?.value!!.toValue() as Value.UUIDValue).value + val retrievableId = (map[RETRIEVABLE_ID_COLUMN_NAME]?.value!!.toValue() as Value.UUIDValue).value + val descriptorId = (map[DESCRIPTOR_ID_COLUMN_NAME]?.value!!.toValue() as Value.UUIDValue).value val value = map["value"]?.value!!.toValue() return when (prototype) { diff --git a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/struct/StructJsonlReader.kt b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/struct/StructJsonlReader.kt index a730ae619..fa2134d3c 100644 --- a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/struct/StructJsonlReader.kt +++ b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/struct/StructJsonlReader.kt @@ -26,8 +26,8 @@ class StructJsonlReader( ?: throw IllegalStateException("Provided type ${this.field.analyser.descriptorClass} does not have a primary constructor.") val valueMap = mutableMapOf>() - val retrievableId = (map[DESCRIPTOR_ID_COLUMN_NAME]?.value!!.toValue() as Value.UUIDValue).value - val descriptorId = (map[RETRIEVABLE_ID_COLUMN_NAME]?.value!!.toValue() as Value.UUIDValue).value + val retrievableId = (map[RETRIEVABLE_ID_COLUMN_NAME]?.value!!.toValue() as Value.UUIDValue).value + val descriptorId = (map[DESCRIPTOR_ID_COLUMN_NAME]?.value!!.toValue() as Value.UUIDValue).value val parameters: MutableList = mutableListOf( descriptorId, retrievableId, diff --git a/vitrivr-engine-module-pgvector/src/main/kotlin/org/vitrivr/engine/database/pgvector/retrievable/RetrievableReader.kt b/vitrivr-engine-module-pgvector/src/main/kotlin/org/vitrivr/engine/database/pgvector/retrievable/RetrievableReader.kt index 030a4d99a..37d44cc7d 100644 --- a/vitrivr-engine-module-pgvector/src/main/kotlin/org/vitrivr/engine/database/pgvector/retrievable/RetrievableReader.kt +++ b/vitrivr-engine-module-pgvector/src/main/kotlin/org/vitrivr/engine/database/pgvector/retrievable/RetrievableReader.kt @@ -120,6 +120,9 @@ class RetrievableReader(override val connection: PgVectorConnection): Retrievabl } query.append("$OBJECT_ID_COLUMN_NAME = ANY (?)") } + if (query.endsWith("WHERE ")) { + query.delete(query.length - 7, query.length) + } return sequence { try { diff --git a/vitrivr-engine-server/build.gradle b/vitrivr-engine-server/build.gradle index 654a8ca13..80a7b4c4d 100644 --- a/vitrivr-engine-server/build.gradle +++ b/vitrivr-engine-server/build.gradle @@ -8,6 +8,7 @@ dependencies { /** vitrivr engine dependencies. */ api project(':vitrivr-engine-index') api project(':vitrivr-engine-query') + api project(':vitrivr-engine-module-jsonl') api project(':vitrivr-engine-module-features') /* TODO: This dependency is not necessary and only here to facilitate easy testing. */ api project(':vitrivr-engine-module-cottontaildb') /* TODO: This dependency is not necessary and only here to facilitate easy testing. */ api project(':vitrivr-engine-module-pgvector') /* TODO: This dependency is not necessary and only here to facilitate easy testing. */ diff --git a/vitrivr-engine-server/src/main/kotlin/org/vitrivr/engine/server/Main.kt b/vitrivr-engine-server/src/main/kotlin/org/vitrivr/engine/server/Main.kt index 6af233582..925f505a6 100644 --- a/vitrivr-engine-server/src/main/kotlin/org/vitrivr/engine/server/Main.kt +++ b/vitrivr-engine-server/src/main/kotlin/org/vitrivr/engine/server/Main.kt @@ -86,7 +86,7 @@ fun main(args: Array) { /* Prepare CLI endpoint. */ val cli = Cli(manager) for (schema in manager.listSchemas()) { - cli.register(SchemaCommand(schema, executor)) + cli.register(SchemaCommand(schema, executor, manager)) } /* Start the Javalin server. */ diff --git a/vitrivr-engine-server/src/main/kotlin/org/vitrivr/engine/server/api/cli/commands/SchemaCommand.kt b/vitrivr-engine-server/src/main/kotlin/org/vitrivr/engine/server/api/cli/commands/SchemaCommand.kt index 1f01268eb..7b89324c2 100644 --- a/vitrivr-engine-server/src/main/kotlin/org/vitrivr/engine/server/api/cli/commands/SchemaCommand.kt +++ b/vitrivr-engine-server/src/main/kotlin/org/vitrivr/engine/server/api/cli/commands/SchemaCommand.kt @@ -14,6 +14,8 @@ import org.vitrivr.engine.core.config.ingest.IngestionPipelineBuilder import org.vitrivr.engine.core.config.pipeline.execution.ExecutionServer import org.vitrivr.engine.core.database.Initializer import org.vitrivr.engine.core.model.metamodel.Schema +import org.vitrivr.engine.core.model.metamodel.SchemaManager +import org.vitrivr.engine.core.model.relationship.Relationship import java.nio.file.Path import java.nio.file.Paths import java.util.* @@ -23,7 +25,7 @@ import java.util.* * @author Ralph Gasser * @version 1.0 */ -class SchemaCommand(private val schema: Schema, private val server: ExecutionServer) : NoOpCliktCommand( +class SchemaCommand(private val schema: Schema, private val server: ExecutionServer, private val manager: SchemaManager) : NoOpCliktCommand( name = schema.name, help = "Groups commands related to a specific schema, in this case the schema '${schema.name}'.", epilog = "Schema related commands usually have the form: , e.g., `vitrivr about` Check help for command specific parameters.", @@ -37,7 +39,8 @@ class SchemaCommand(private val schema: Schema, private val server: ExecutionSer About(), Initialize(), Extract(this.schema, this.server), - Status(this.schema, this.server) + Status(this.schema, this.server), + MigrateTo(this.schema, this.manager) ) } @@ -166,4 +169,62 @@ class SchemaCommand(private val schema: Schema, private val server: ExecutionSer logger.info { "Status: ${executor.status(jobId)} at ${System.currentTimeMillis()}" } } } + + inner class MigrateTo(private val schema: Schema, private val manager: SchemaManager) : + CliktCommand(name = "migrate-to", help = "Export all data from the schema.") { + + private val logger = KotlinLogging.logger {} + + /** Path to the output directory. */ + private val targetSchemaName: String? by option( + "-n", + "--name", + help = "name of the target schema." + ) + + override fun run() { + + /** Check if target [Schema] exists. */ + val targetSchema = manager.getSchema(targetSchemaName!!) ?: run { + logger.error { + "Error trying to migrate from ${schema.name} to $targetSchemaName. $targetSchemaName does not exist." + } + return + } + + logger.info { "Migrating from ${schema.name} to $targetSchemaName..." } + + /** Migrate retrievables */ + val currentRetrievablesReader = this.schema.connection.getRetrievableReader() + val targetRetrievablesWriter = targetSchema.connection.getRetrievableWriter() + targetRetrievablesWriter.addAll(currentRetrievablesReader.getAll().toList()) + logger.info { "Migrated ${currentRetrievablesReader.count()} retrievables." } + + /** Migrate relationships */ + val relations = currentRetrievablesReader.getConnections(emptyList(), emptyList(), emptyList()) + .map{ (subjectid, predicate, objectid) -> + Relationship.ById(subjectid, predicate, objectid, false) + }.toList() + targetRetrievablesWriter.connectAll(relations) + logger.info { "Migrated ${relations.size} relationships." } + + /** Migrate Fields */ + val currentFields = this.schema.fields() + val targetFields = targetSchema.fields() + if (currentFields.size != targetFields.size) { + logger.error { + "Error trying to migrate from ${schema.name} to $targetSchemaName. Number of fields do not match." + } + return + } + val zippedFields = currentFields.zip(targetFields) + zippedFields.forEach{ (currField, tarField) -> + val oldReader = currField.getReader() + val newWriter = tarField.getWriter() + newWriter.addAll(oldReader.getAll().toList()) + } + logger.info{ "Migrated ${currentFields.size} fields." } + logger.info{ "Migration complete."} + } + } }