Skip to content

Commit

Permalink
Fix mimeType and title extraction with tika (#79)
Browse files Browse the repository at this point in the history
  • Loading branch information
DominikWolek authored May 15, 2020
1 parent cc63298 commit a4ee123
Showing 1 changed file with 7 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import java.io.File
import java.io.IOException
import org.apache.tika.Tika
import org.apache.tika.metadata.Metadata
import org.apache.tika.metadata.TikaCoreProperties
import org.slf4j.LoggerFactory
import org.springframework.stereotype.Service
import pl.edu.uj.ii.ksi.mordor.persistence.entities.FileMetadata
Expand All @@ -18,14 +19,16 @@ class TikaMetadataExtractor(private val tika: Tika, private val hashProvider: Fi

override fun extract(file: File): FileMetadata? {
return try {
val mimeType = tika.detect(file)
val metadata = getTikaMetadata(file)
val title = metadata.get(TikaCoreProperties.TITLE) ?: file.nameWithoutExtension

FileMetadata(
author = metadata.get("Author"),
description = metadata.get("Subject"),
author = metadata.get(TikaCoreProperties.CREATOR),
description = metadata.get(TikaCoreProperties.DESCRIPTION),
fileHash = hashProvider.calculate(file),
title = metadata.get("Author"),
mimeType = metadata.get("Author"),
title = title,
mimeType = mimeType,
thumbnail = null,
crawledContent = null
)
Expand Down

0 comments on commit a4ee123

Please sign in to comment.