Skip to content

Commit

Permalink
Implemented ThumbcacheParser to process directories and extract metad…
Browse files Browse the repository at this point in the history
…ata and images
  • Loading branch information
marcus6n committed Oct 18, 2024
1 parent f8c575e commit cc3fdf5
Showing 1 changed file with 16 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,22 +52,30 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
tmp.close();
}

private void recurseDir(DirectoryNode dir, EmbeddedDocumentExtractor extractor, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
private void recurseDir(DirectoryNode dir, EmbeddedDocumentExtractor extractor, XHTMLContentHandler xhtml)
throws IOException, SAXException, TikaException {
for (Entry entry : dir) {
if (entry instanceof DirectoryNode) {
recurseDir((DirectoryNode) entry, extractor, xhtml);
} else {
Metadata entrydata = new Metadata();
entrydata.set(Metadata.CONTENT_TYPE, "thumbcache-entry");
} else if (entry instanceof DocumentNode) {
Metadata entryData = new Metadata();
entryData.set(Metadata.CONTENT_TYPE, "thumbcache-entry");

xhtml.startElement("div", "class", "thumbcache-entry");
xhtml.element("h1", entry.getName());
xhtml.element("h1", entry.getName()); // Nome do arquivo

xhtml.startElement("div", "class", "thumbcache-entry-content");
try (InputStream stream = new DocumentInputStream((DocumentNode) entry)) {
extractor.parseEmbedded(stream, xhtml, entrydata, true);
// Extraímos o conteúdo incorporado do arquivo
extractor.parseEmbedded(stream, xhtml, entryData, true);
} catch (Exception e) {
xhtml.startElement("p");
xhtml.characters("Error processing document: " + e.getMessage());
xhtml.endElement("p");
}
xhtml.endElement("div");
xhtml.endElement("div");

xhtml.endElement("div"); // Fecha "thumbcache-entry-content"
xhtml.endElement("div"); // Fecha "thumbcache-entry"
}
}
}
Expand Down

0 comments on commit cc3fdf5

Please sign in to comment.