diff --git a/vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/features/metadata/source/exif/ExifMetadataExtractor.kt b/vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/features/metadata/source/exif/ExifMetadataExtractor.kt index d56233df..44354b5b 100644 --- a/vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/features/metadata/source/exif/ExifMetadataExtractor.kt +++ b/vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/features/metadata/source/exif/ExifMetadataExtractor.kt @@ -26,7 +26,6 @@ val logger: KLogger = KotlinLogging.logger {} private val NON_ALPHANUMERIC_REGEX = Regex("[^a-zA-Z0-9]") - private val DATE_FORMAT_PATTERNS = listOf( "yyyy:MM:dd HH:mm:ss", "yyyy-MM-dd HH:mm:ss", @@ -45,50 +44,57 @@ private fun convertDate(date: String): Date? { return null } - -private fun convertType(directory: Directory, tagType: Int, type: Type): Value<*>? = when (type) { - Type.Boolean -> Value.Boolean(directory.getBoolean(tagType)) - Type.Byte -> Value.Byte(directory.getObject(tagType) as Byte) - Type.Datetime -> convertDate(directory.getString(tagType))?.let { Value.DateTime(it) } - Type.Double -> Value.Double(directory.getDouble(tagType)) - Type.Float -> Value.Float(directory.getFloat(tagType)) - Type.Int -> Value.Int(directory.getInt(tagType)) - Type.Long -> Value.Long(directory.getLong(tagType)) - Type.Short -> Value.Short(directory.getObject(tagType) as Short) - Type.String -> Value.String(directory.getString(tagType)) - Type.Text -> Value.String(directory.getString(tagType)) - Type.UUID -> Value.UUIDValue(UUID.fromString(directory.getString(tagType))) - is Type.BooleanVector -> throw IllegalArgumentException("Unsupported type: $type") - is Type.DoubleVector -> throw IllegalArgumentException("Unsupported type: $type") - is Type.FloatVector -> throw IllegalArgumentException("Unsupported type: $type") - is Type.IntVector -> throw IllegalArgumentException("Unsupported type: $type") - is Type.LongVector -> throw IllegalArgumentException("Unsupported type: $type") -} - -private fun JsonObject.convertType(type: Type): Value<*>? { - val jsonPrimitive = this.jsonPrimitive - if (jsonPrimitive.isString) { - return when (type) { - Type.String -> Value.String(jsonPrimitive.content) - Type.Datetime -> convertDate(jsonPrimitive.content)?.let { Value.DateTime(it) } - else -> null - } - } else { - return when (type) { - Type.Boolean -> Value.Boolean(jsonPrimitive.boolean) - Type.Byte -> Value.Byte(jsonPrimitive.int.toByte()) - Type.Short -> Value.Short(jsonPrimitive.int.toShort()) - Type.Int -> Value.Int(jsonPrimitive.int) - Type.Long -> Value.Long(jsonPrimitive.int.toLong()) - Type.Float -> Value.Float(jsonPrimitive.float) - Type.Double -> Value.Double(jsonPrimitive.double) - else -> null +fun JsonElement.toValue(): Value<*>? { + return when (this) { + is JsonPrimitive -> { + when { + this.booleanOrNull != null -> Value.Boolean(this.boolean) + this.intOrNull != null -> Value.Int(this.int) + this.longOrNull != null -> Value.Long(this.long) + this.floatOrNull != null -> Value.Float(this.float) + this.doubleOrNull != null -> Value.Double(this.double) + this.isString -> Value.String(this.content) // Only isString exists + else -> null + } } + is JsonArray, is JsonObject -> Value.String(this.toString()) + else -> null } } -class ExifMetadataExtractor(input: Operator, analyser: ExifMetadata, field: Schema.Field, AnyMapStructDescriptor>?, parameters: Map) : AbstractExtractor, AnyMapStructDescriptor>(input, analyser, field, parameters) { +fun Value<*>.convertToType(type: Type): Value<*>? { + return when (type) { + Type.Boolean -> if (this is Value.Boolean) this else null + Type.Byte -> if (this is Value.Int) Value.Byte(this.value.toByte()) else null + Type.Short -> if (this is Value.Int) Value.Short(this.value.toShort()) else null + Type.Int -> if (this is Value.Int) this else null + Type.Long -> if (this is Value.Long) this else null + Type.Float -> if (this is Value.Float) this else null + Type.Double -> if (this is Value.Double) this else null + Type.String -> if (this is Value.String) this else null + + Type.Text -> if (this is Value.String) { + Value.Text(this.value) + } else null + + Type.Datetime -> if (this is Value.String) { + convertDate(this.value)?.let { Value.DateTime(it) } + } else null + + Type.UUID -> if (this is Value.String) { + Value.UUIDValue(UUID.fromString(this.value)) + } else null + + else -> null + } +} +class ExifMetadataExtractor( + input: Operator, + analyser: ExifMetadata, + field: Schema.Field, AnyMapStructDescriptor>?, + parameters: Map +) : AbstractExtractor, AnyMapStructDescriptor>(input, analyser, field, parameters) { override fun matches(retrievable: Retrievable): Boolean = retrievable.filteredAttribute(SourceAttribute::class.java)?.source is FileSource @@ -98,21 +104,26 @@ class ExifMetadataExtractor(input: Operator, analyser: ExifMetadata val columnValues = mutableMapOf>() val attributes = this.field?.parameters?.map { (k, v) -> k to Attribute(k, Type.valueOf(v)) }?.toMap() ?: emptyMap() + for (directory in metadata.directories) { for (tag in directory.tags) { val tagname = tag.tagName.replace(NON_ALPHANUMERIC_REGEX, "") val fullname = "${directory.name.replace(NON_ALPHANUMERIC_REGEX, "")}_$tagname" if (fullname == "ExifSubIFD_UserComment" || fullname == "JpegComment_JPEGComment") { - if (fullname in attributes){ + if (fullname in attributes) { columnValues[fullname] = Value.String(tag.description) } try { - val json = Json.parseToJsonElement(tag.description).jsonObject - json.forEach { (key, value) -> - attributes[key]?.let { attribute -> - value.jsonObject.convertType(attribute.type)?.let { converted -> - columnValues[key] = converted + val jsonElement = Json.parseToJsonElement(tag.description) + if (jsonElement is JsonObject) { + attributes.forEach { (attributeKey, attributeValue) -> + if (jsonElement.containsKey(attributeKey)) { + val jsonValue = jsonElement[attributeKey]?.toValue() + + jsonValue?.convertToType(attributeValue.type)?.let { converted -> + columnValues[attributeKey] = converted + } } } } @@ -125,7 +136,6 @@ class ExifMetadataExtractor(input: Operator, analyser: ExifMetadata columnValues[fullname] = converted } } - } } } @@ -134,3 +144,18 @@ class ExifMetadataExtractor(input: Operator, analyser: ExifMetadata return listOf(AnyMapStructDescriptor(UUID.randomUUID(), retrievable.id, attributes.values.toList(), columnValues.mapValues { it.value }, field = this.field)) } } + +private fun convertType(directory: Directory, tagType: Int, type: Type): Value<*>? = when (type) { + Type.Boolean -> Value.Boolean(directory.getBoolean(tagType)) + Type.Byte -> Value.Byte(directory.getObject(tagType) as Byte) + Type.Datetime -> convertDate(directory.getString(tagType))?.let { Value.DateTime(it) } + Type.Double -> Value.Double(directory.getDouble(tagType)) + Type.Float -> Value.Float(directory.getFloat(tagType)) + Type.Int -> Value.Int(directory.getInt(tagType)) + Type.Long -> Value.Long(directory.getLong(tagType)) + Type.Short -> Value.Short(directory.getObject(tagType) as Short) + Type.String -> Value.String(directory.getString(tagType)) + Type.Text -> Value.Text(directory.getString(tagType)) // Ensure Type.Text returns Value.Text + Type.UUID -> Value.UUIDValue(UUID.fromString(directory.getString(tagType))) + is Type.BooleanVector, is Type.DoubleVector, is Type.FloatVector, is Type.IntVector, is Type.LongVector -> throw IllegalArgumentException("Unsupported type: $type") +} diff --git a/vitrivr-engine-index/src/main/kotlin/org/vitrivr/engine/index/transform/DescriptorAsContentTransformer.kt b/vitrivr-engine-index/src/main/kotlin/org/vitrivr/engine/index/transform/DescriptorAsContentTransformer.kt index c8a97962..140a800c 100644 --- a/vitrivr-engine-index/src/main/kotlin/org/vitrivr/engine/index/transform/DescriptorAsContentTransformer.kt +++ b/vitrivr-engine-index/src/main/kotlin/org/vitrivr/engine/index/transform/DescriptorAsContentTransformer.kt @@ -11,6 +11,7 @@ import org.vitrivr.engine.core.model.content.factory.ContentFactory import org.vitrivr.engine.core.model.descriptor.Descriptor import org.vitrivr.engine.core.model.descriptor.scalar.StringDescriptor import org.vitrivr.engine.core.model.descriptor.scalar.TextDescriptor +import org.vitrivr.engine.core.model.descriptor.struct.AnyMapStructDescriptor import org.vitrivr.engine.core.model.descriptor.struct.metadata.source.FileSourceMetadataDescriptor import org.vitrivr.engine.core.model.retrievable.Ingested import org.vitrivr.engine.core.model.retrievable.Retrievable @@ -46,23 +47,34 @@ class DescriptorAsContentTransformer : TransformerFactory { descriptor -> descriptor.field?.fieldName == fieldName }.forEach{ descriptor -> - val content = convertDescriptorToContent(descriptor) - retrievable.addContent(content) - retrievable.addAttribute(ContentAuthorAttribute(content.id, name)) - logger.debug { "Descriptor ${descriptor.id} of retrievable ${retrievable.id} has been converted to content element." } + val pairs = processDescriptor(descriptor) + for (pair in pairs) { + val content = pair.second + retrievable.addContent(content) + for (key in pair.first) { + val attribute = ContentAuthorAttribute(content.id, key) + retrievable.addAttribute(attribute) + } + logger.debug { "Descriptor ${descriptor.id} of retrievable ${retrievable.id} has been converted to content element." } + } } emit(retrievable) } } - private fun convertDescriptorToContent(descriptor: Descriptor<*>): ContentElement<*> { + private fun processDescriptor(descriptor: Descriptor<*>): List, ContentElement<*>>> { return when (descriptor) { - is StringDescriptor -> contentFactory.newTextContent(descriptor.value.value) - is TextDescriptor -> contentFactory.newTextContent(descriptor.value.value) - is FileSourceMetadataDescriptor -> contentFactory.newTextContent(descriptor.path.value) + is StringDescriptor -> listOf(Pair(setOf(name), contentFactory.newTextContent(descriptor.value.value))) + is TextDescriptor -> listOf(Pair(setOf(name), contentFactory.newTextContent(descriptor.value.value))) + is FileSourceMetadataDescriptor -> listOf(Pair(setOf(name), contentFactory.newTextContent(descriptor.path.value))) + is AnyMapStructDescriptor -> { + descriptor.values().map{ + entry -> + Pair(setOf(name, "$name.${entry.key}"), contentFactory.newTextContent(entry.value.toString())) + } + } else -> throw IllegalArgumentException("Descriptor type not supported.") } - } } } \ No newline at end of file