Skip to content

Commit

Permalink
Merge pull request #114 from vitrivr/feature/exif_to_prompt
Browse files Browse the repository at this point in the history
Extending Content Author to support more finegrained tagging
  • Loading branch information
faberf authored Oct 18, 2024
2 parents fd4dd8d + 44df37c commit 2d86a8e
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ val logger: KLogger = KotlinLogging.logger {}

private val NON_ALPHANUMERIC_REGEX = Regex("[^a-zA-Z0-9]")


private val DATE_FORMAT_PATTERNS = listOf(
"yyyy:MM:dd HH:mm:ss",
"yyyy-MM-dd HH:mm:ss",
Expand All @@ -45,50 +44,57 @@ private fun convertDate(date: String): Date? {
return null
}


private fun convertType(directory: Directory, tagType: Int, type: Type): Value<*>? = when (type) {
Type.Boolean -> Value.Boolean(directory.getBoolean(tagType))
Type.Byte -> Value.Byte(directory.getObject(tagType) as Byte)
Type.Datetime -> convertDate(directory.getString(tagType))?.let { Value.DateTime(it) }
Type.Double -> Value.Double(directory.getDouble(tagType))
Type.Float -> Value.Float(directory.getFloat(tagType))
Type.Int -> Value.Int(directory.getInt(tagType))
Type.Long -> Value.Long(directory.getLong(tagType))
Type.Short -> Value.Short(directory.getObject(tagType) as Short)
Type.String -> Value.String(directory.getString(tagType))
Type.Text -> Value.String(directory.getString(tagType))
Type.UUID -> Value.UUIDValue(UUID.fromString(directory.getString(tagType)))
is Type.BooleanVector -> throw IllegalArgumentException("Unsupported type: $type")
is Type.DoubleVector -> throw IllegalArgumentException("Unsupported type: $type")
is Type.FloatVector -> throw IllegalArgumentException("Unsupported type: $type")
is Type.IntVector -> throw IllegalArgumentException("Unsupported type: $type")
is Type.LongVector -> throw IllegalArgumentException("Unsupported type: $type")
}

private fun JsonObject.convertType(type: Type): Value<*>? {
val jsonPrimitive = this.jsonPrimitive
if (jsonPrimitive.isString) {
return when (type) {
Type.String -> Value.String(jsonPrimitive.content)
Type.Datetime -> convertDate(jsonPrimitive.content)?.let { Value.DateTime(it) }
else -> null
}
} else {
return when (type) {
Type.Boolean -> Value.Boolean(jsonPrimitive.boolean)
Type.Byte -> Value.Byte(jsonPrimitive.int.toByte())
Type.Short -> Value.Short(jsonPrimitive.int.toShort())
Type.Int -> Value.Int(jsonPrimitive.int)
Type.Long -> Value.Long(jsonPrimitive.int.toLong())
Type.Float -> Value.Float(jsonPrimitive.float)
Type.Double -> Value.Double(jsonPrimitive.double)
else -> null
fun JsonElement.toValue(): Value<*>? {
return when (this) {
is JsonPrimitive -> {
when {
this.booleanOrNull != null -> Value.Boolean(this.boolean)
this.intOrNull != null -> Value.Int(this.int)
this.longOrNull != null -> Value.Long(this.long)
this.floatOrNull != null -> Value.Float(this.float)
this.doubleOrNull != null -> Value.Double(this.double)
this.isString -> Value.String(this.content) // Only isString exists
else -> null
}
}
is JsonArray, is JsonObject -> Value.String(this.toString())
else -> null
}
}

class ExifMetadataExtractor(input: Operator<Retrievable>, analyser: ExifMetadata, field: Schema.Field<ContentElement<*>, AnyMapStructDescriptor>?, parameters: Map<String,String>) : AbstractExtractor<ContentElement<*>, AnyMapStructDescriptor>(input, analyser, field, parameters) {
fun Value<*>.convertToType(type: Type): Value<*>? {
return when (type) {
Type.Boolean -> if (this is Value.Boolean) this else null
Type.Byte -> if (this is Value.Int) Value.Byte(this.value.toByte()) else null
Type.Short -> if (this is Value.Int) Value.Short(this.value.toShort()) else null
Type.Int -> if (this is Value.Int) this else null
Type.Long -> if (this is Value.Long) this else null
Type.Float -> if (this is Value.Float) this else null
Type.Double -> if (this is Value.Double) this else null
Type.String -> if (this is Value.String) this else null

Type.Text -> if (this is Value.String) {
Value.Text(this.value)
} else null

Type.Datetime -> if (this is Value.String) {
convertDate(this.value)?.let { Value.DateTime(it) }
} else null

Type.UUID -> if (this is Value.String) {
Value.UUIDValue(UUID.fromString(this.value))
} else null

else -> null
}
}

class ExifMetadataExtractor(
input: Operator<Retrievable>,
analyser: ExifMetadata,
field: Schema.Field<ContentElement<*>, AnyMapStructDescriptor>?,
parameters: Map<String, String>
) : AbstractExtractor<ContentElement<*>, AnyMapStructDescriptor>(input, analyser, field, parameters) {

override fun matches(retrievable: Retrievable): Boolean =
retrievable.filteredAttribute(SourceAttribute::class.java)?.source is FileSource
Expand All @@ -98,21 +104,26 @@ class ExifMetadataExtractor(input: Operator<Retrievable>, analyser: ExifMetadata
val columnValues = mutableMapOf<AttributeName, Value<*>>()

val attributes = this.field?.parameters?.map { (k, v) -> k to Attribute(k, Type.valueOf(v)) }?.toMap() ?: emptyMap()

for (directory in metadata.directories) {
for (tag in directory.tags) {
val tagname = tag.tagName.replace(NON_ALPHANUMERIC_REGEX, "")
val fullname = "${directory.name.replace(NON_ALPHANUMERIC_REGEX, "")}_$tagname"

if (fullname == "ExifSubIFD_UserComment" || fullname == "JpegComment_JPEGComment") {
if (fullname in attributes){
if (fullname in attributes) {
columnValues[fullname] = Value.String(tag.description)
}
try {
val json = Json.parseToJsonElement(tag.description).jsonObject
json.forEach { (key, value) ->
attributes[key]?.let { attribute ->
value.jsonObject.convertType(attribute.type)?.let { converted ->
columnValues[key] = converted
val jsonElement = Json.parseToJsonElement(tag.description)
if (jsonElement is JsonObject) {
attributes.forEach { (attributeKey, attributeValue) ->
if (jsonElement.containsKey(attributeKey)) {
val jsonValue = jsonElement[attributeKey]?.toValue()

jsonValue?.convertToType(attributeValue.type)?.let { converted ->
columnValues[attributeKey] = converted
}
}
}
}
Expand All @@ -125,7 +136,6 @@ class ExifMetadataExtractor(input: Operator<Retrievable>, analyser: ExifMetadata
columnValues[fullname] = converted
}
}

}
}
}
Expand All @@ -134,3 +144,18 @@ class ExifMetadataExtractor(input: Operator<Retrievable>, analyser: ExifMetadata
return listOf(AnyMapStructDescriptor(UUID.randomUUID(), retrievable.id, attributes.values.toList(), columnValues.mapValues { it.value }, field = this.field))
}
}

private fun convertType(directory: Directory, tagType: Int, type: Type): Value<*>? = when (type) {
Type.Boolean -> Value.Boolean(directory.getBoolean(tagType))
Type.Byte -> Value.Byte(directory.getObject(tagType) as Byte)
Type.Datetime -> convertDate(directory.getString(tagType))?.let { Value.DateTime(it) }
Type.Double -> Value.Double(directory.getDouble(tagType))
Type.Float -> Value.Float(directory.getFloat(tagType))
Type.Int -> Value.Int(directory.getInt(tagType))
Type.Long -> Value.Long(directory.getLong(tagType))
Type.Short -> Value.Short(directory.getObject(tagType) as Short)
Type.String -> Value.String(directory.getString(tagType))
Type.Text -> Value.Text(directory.getString(tagType)) // Ensure Type.Text returns Value.Text
Type.UUID -> Value.UUIDValue(UUID.fromString(directory.getString(tagType)))
is Type.BooleanVector, is Type.DoubleVector, is Type.FloatVector, is Type.IntVector, is Type.LongVector -> throw IllegalArgumentException("Unsupported type: $type")
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import org.vitrivr.engine.core.model.content.factory.ContentFactory
import org.vitrivr.engine.core.model.descriptor.Descriptor
import org.vitrivr.engine.core.model.descriptor.scalar.StringDescriptor
import org.vitrivr.engine.core.model.descriptor.scalar.TextDescriptor
import org.vitrivr.engine.core.model.descriptor.struct.AnyMapStructDescriptor
import org.vitrivr.engine.core.model.descriptor.struct.metadata.source.FileSourceMetadataDescriptor
import org.vitrivr.engine.core.model.retrievable.Ingested
import org.vitrivr.engine.core.model.retrievable.Retrievable
Expand Down Expand Up @@ -46,23 +47,34 @@ class DescriptorAsContentTransformer : TransformerFactory {
descriptor ->
descriptor.field?.fieldName == fieldName
}.forEach{ descriptor ->
val content = convertDescriptorToContent(descriptor)
retrievable.addContent(content)
retrievable.addAttribute(ContentAuthorAttribute(content.id, name))
logger.debug { "Descriptor ${descriptor.id} of retrievable ${retrievable.id} has been converted to content element." }
val pairs = processDescriptor(descriptor)
for (pair in pairs) {
val content = pair.second
retrievable.addContent(content)
for (key in pair.first) {
val attribute = ContentAuthorAttribute(content.id, key)
retrievable.addAttribute(attribute)
}
logger.debug { "Descriptor ${descriptor.id} of retrievable ${retrievable.id} has been converted to content element." }
}
}
emit(retrievable)
}
}

private fun convertDescriptorToContent(descriptor: Descriptor<*>): ContentElement<*> {
private fun processDescriptor(descriptor: Descriptor<*>): List<Pair<Set<String>, ContentElement<*>>> {
return when (descriptor) {
is StringDescriptor -> contentFactory.newTextContent(descriptor.value.value)
is TextDescriptor -> contentFactory.newTextContent(descriptor.value.value)
is FileSourceMetadataDescriptor -> contentFactory.newTextContent(descriptor.path.value)
is StringDescriptor -> listOf(Pair(setOf(name), contentFactory.newTextContent(descriptor.value.value)))
is TextDescriptor -> listOf(Pair(setOf(name), contentFactory.newTextContent(descriptor.value.value)))
is FileSourceMetadataDescriptor -> listOf(Pair(setOf(name), contentFactory.newTextContent(descriptor.path.value)))
is AnyMapStructDescriptor -> {
descriptor.values().map{
entry ->
Pair(setOf(name, "$name.${entry.key}"), contentFactory.newTextContent(entry.value.toString()))
}
}
else -> throw IllegalArgumentException("Descriptor type not supported.")
}

}
}
}

0 comments on commit 2d86a8e

Please sign in to comment.