diff --git a/iped-app/resources/config/profiles/forensic/conf/ParserConfig.xml b/iped-app/resources/config/profiles/forensic/conf/ParserConfig.xml new file mode 100644 index 0000000000..584a4cbc23 --- /dev/null +++ b/iped-app/resources/config/profiles/forensic/conf/ParserConfig.xml @@ -0,0 +1,14 @@ + + + + + + + + + true + + + + + \ No newline at end of file diff --git a/iped-app/resources/config/profiles/pedo/conf/ParserConfig.xml b/iped-app/resources/config/profiles/pedo/conf/ParserConfig.xml new file mode 100644 index 0000000000..584a4cbc23 --- /dev/null +++ b/iped-app/resources/config/profiles/pedo/conf/ParserConfig.xml @@ -0,0 +1,14 @@ + + + + + + + + + true + + + + + \ No newline at end of file diff --git a/iped-engine/src/main/java/iped/engine/config/ParsersConfig.java b/iped-engine/src/main/java/iped/engine/config/ParsersConfig.java index b3f14e8ebf..7c5b34b333 100644 --- a/iped-engine/src/main/java/iped/engine/config/ParsersConfig.java +++ b/iped-engine/src/main/java/iped/engine/config/ParsersConfig.java @@ -2,11 +2,34 @@ import java.io.File; import java.io.IOException; +import java.io.StringReader; +import java.io.StringWriter; import java.nio.charset.StandardCharsets; import java.nio.file.DirectoryStream.Filter; import java.nio.file.Files; import java.nio.file.Path; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + import iped.configuration.Configurable; public class ParsersConfig implements Configurable { @@ -18,6 +41,8 @@ public class ParsersConfig implements Configurable { private static final String PARSER_CONFIG = "ParserConfig.xml"; //$NON-NLS-1$ + public static final String PARSER_DISABLED_ATTR = "iped:disabled"; + private String parserConfigXml; private transient Path tmp; @@ -33,7 +58,53 @@ public boolean accept(Path entry) throws IOException { @Override public void processConfig(Path resource) throws IOException { - parserConfigXml = new String(Files.readAllBytes(resource), StandardCharsets.UTF_8); + if (parserConfigXml == null) { + parserConfigXml = new String(Files.readAllBytes(resource), StandardCharsets.UTF_8); + parserConfigXml = parserConfigXml.trim().replaceFirst("^([\\W]+)<", "<"); + } else { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newDefaultInstance(); + dbf.setNamespaceAware(false); + DocumentBuilder db; + try { + db = dbf.newDocumentBuilder(); + Document doc = db.parse(new InputSource(new StringReader(parserConfigXml))); + Document changedDoc = db.parse(resource.toFile()); + + Element root = changedDoc.getDocumentElement(); + NodeList rootNl = root.getElementsByTagName("parsers").item(0).getChildNodes(); + for (int i = 0; i < rootNl.getLength(); i++) { + Node child = rootNl.item(i); + if (child instanceof Element) { + Element element = (Element) child; + if (element.getTagName().equals("parser")) { + String className = element.getAttribute("class"); + XPath xPath = XPathFactory.newInstance().newXPath(); + String expression = "/properties/parsers/parser[@class='" + className + "']"; + NodeList nlParser = (NodeList) xPath.compile(expression).evaluate(doc, XPathConstants.NODESET); + + expression = "/properties/parsers"; + NodeList nlParsers = (NodeList) xPath.compile(expression).evaluate(doc, XPathConstants.NODESET); + Node newnode = doc.importNode(element, true); + for (int j = 0; j < nlParsers.getLength(); j++) { + for (int k = 0; k < nlParser.getLength(); k++) { + nlParsers.item(j).removeChild(nlParser.item(k)); + } + nlParsers.item(j).appendChild(newnode); + } + } + } + } + + TransformerFactory tf = TransformerFactory.newInstance(); + Transformer transformer = tf.newTransformer(); + transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); + StringWriter writer = new StringWriter(); + transformer.transform(new DOMSource(doc), new StreamResult(writer)); + parserConfigXml = writer.getBuffer().toString(); + } catch (ParserConfigurationException | SAXException | XPathExpressionException | TransformerException e) { + throw new RuntimeException(e); + } + } } @Override @@ -46,11 +117,34 @@ public void setConfiguration(String config) { parserConfigXml = config; } + public String removeDisabledParsers(String parserConfigXml) { + String[] slices = parserConfigXml.split(PARSER_DISABLED_ATTR + "=\"true\""); + StringBuffer result = new StringBuffer(); + for (int i = 0; i < slices.length; i++) { + String part = slices[i]; + if (i > 0) { + int disabledParserEndIndex = part.indexOf(">"); + if (disabledParserEndIndex == 0 || part.charAt(disabledParserEndIndex - 1) != '/') { + disabledParserEndIndex = part.indexOf(""); + } + part = part.substring(disabledParserEndIndex + 1); + } + if (i < slices.length - 1) { + int disabledParserIndex = part.lastIndexOf(""); //$NON-NLS-1$ } - public InputStream createHTMLReport(List entries) { + public InputStream createHTMLReport(List entries, Exception entriesReadError) { ByteArrayOutputStream bout = new ByteArrayOutputStream(); PrintWriter out = new PrintWriter(new OutputStreamWriter(bout, StandardCharsets.UTF_8)); // $NON-NLS-1$ @@ -90,6 +90,8 @@ public InputStream createHTMLReport(List entries) { out.print(""); + out.print("Error during additional entries read:" + entriesReadError.getMessage()); + endHTMLDocument(out); out.close(); @@ -97,7 +99,8 @@ public InputStream createHTMLReport(List entries) { } - public InputStream createCSVReport(List entries, TemporaryResources tmp) throws IOException { + public InputStream createCSVReport(List entries, TemporaryResources tmp, Exception entriesReadError) + throws IOException { Path path = tmp.createTempFile(); try (OutputStream os = Files.newOutputStream(path); Writer writer = new OutputStreamWriter(os, StandardCharsets.UTF_8); diff --git a/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/usnjrnl/UsnJrnlParser.java b/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/usnjrnl/UsnJrnlParser.java index 16484523c5..86018594e3 100644 --- a/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/usnjrnl/UsnJrnlParser.java +++ b/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/usnjrnl/UsnJrnlParser.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -25,6 +26,7 @@ import iped.data.IItemReader; import iped.io.SeekableInputStream; import iped.parsers.standard.StandardParser; +import iped.parsers.util.MetadataUtil; import iped.properties.BasicProps; import iped.properties.ExtraProperties; import iped.search.IItemSearcher; @@ -55,6 +57,8 @@ public enum ReportType { public static final MediaType USNJRNL_REPORT_CSV = MediaType.parse("application/x-usnjournal-report-csv"); public static final MediaType USNJRNL_REGISTRY = MediaType.parse("application/x-usnjournal-registry"); + static final String USN_REASON_PREFIX = "usnJrnl:"; + private static Set SUPPORTED_TYPES = MediaType.set(USNJRNL_$J); @Override @@ -136,7 +140,8 @@ public UsnJrnlEntry readEntry(SeekableInputStream in) throws IOException { return null; } - private void createReport(ArrayList entries, int n, ParseContext context, ContentHandler handler) + private void createReport(ArrayList entries, int n, ParseContext context, ContentHandler handler, + Exception entriesReadError) throws SAXException, IOException { ReportGenerator rg = new ReportGenerator(); EmbeddedDocumentExtractor extractor = context.get(EmbeddedDocumentExtractor.class, @@ -149,11 +154,11 @@ private void createReport(ArrayList entries, int n, ParseContext c try (TemporaryResources tmp = new TemporaryResources()) { if (reportType == ReportType.CSV) { cMetadata.set(StandardParser.INDEXER_CONTENT_TYPE, USNJRNL_REPORT_CSV.toString()); - is = rg.createCSVReport(entries, tmp); + is = rg.createCSVReport(entries, tmp, entriesReadError); } else if (reportType == ReportType.HTML) { cMetadata.set(StandardParser.INDEXER_CONTENT_TYPE, USNJRNL_REPORT_HTML.toString()); - is = rg.createHTMLReport(entries); + is = rg.createHTMLReport(entries, entriesReadError); name += " " + n; } @@ -179,12 +184,15 @@ private void createReport(ArrayList entries, int n, ParseContext c String[] props = ReportGenerator.cols; - metadataItem.set(TikaCoreProperties.CREATED, rg.timeFormat.format(entry.getFileTime())); metadataItem.set(ReportGenerator.cols[0], String.format("0x%016X", entry.getOffset())); metadataItem.set(props[1], entry.getFileName()); metadataItem.set(props[2], entry.getFullPath()); metadataItem.set(props[3], Long.toString(entry.getUSN())); + String formatedDate = rg.timeFormat.format(entry.getFileTime()); for (String value : entry.getReasons()) { + value = value.toLowerCase(); + MetadataUtil.setMetadataType(USN_REASON_PREFIX + value, Date.class); + metadataItem.set(USN_REASON_PREFIX + value, formatedDate); metadataItem.add(props[5], value); } metadataItem.set(props[6], "0x" + Util.byteArrayToHex(entry.getMftRef())); @@ -231,28 +239,33 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, int n = 1; IItemSearcher searcher = context.get(IItemSearcher.class); IItemReader item = context.get(IItemReader.class); + Exception entriesReadError = null; try (SeekableInputStream sis = item.getSeekableInputStream()) { jumpZeros(sis, 0, sis.size()); - while (findNextEntry(sis)) { - UsnJrnlEntry u = readEntry(sis); - // do not insert empty registries in the list - if (u == null) { - continue; - } - - entries.add(u); - - if (entries.size() % MAX_ENTRIES == 0) { - int baseIndex = ((entries.size() / MAX_ENTRIES) - 1) * MAX_ENTRIES; - rebuildFullPaths(entries.subList(baseIndex, baseIndex + MAX_ENTRIES), searcher, item); - } - - // limits the html table size - if (entries.size() == MAX_ENTRIES && reportType == ReportType.HTML) { - createReport(entries, n, context, handler); - entries.clear(); - n++; + try { + while (findNextEntry(sis)) { + UsnJrnlEntry u = readEntry(sis); + // do not insert empty registries in the list + if (u == null) { + continue; + } + + entries.add(u); + + if (entries.size() % MAX_ENTRIES == 0) { + int baseIndex = ((entries.size() / MAX_ENTRIES) - 1) * MAX_ENTRIES; + rebuildFullPaths(entries.subList(baseIndex, baseIndex + MAX_ENTRIES), searcher, item); + } + + // limits the html table size + if (entries.size() == MAX_ENTRIES && reportType == ReportType.HTML) { + createReport(entries, n, context, handler, entriesReadError); + entries.clear(); + n++; + } } + } catch (Exception e) { + entriesReadError = e; } } @@ -261,7 +274,18 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, int baseIndex = (entries.size() / MAX_ENTRIES) * MAX_ENTRIES; rebuildFullPaths(entries.subList(baseIndex, entries.size()), searcher, item); } - createReport(entries, n, context, handler); + createReport(entries, n, context, handler, entriesReadError); + } + if (entriesReadError instanceof TikaException) { + throw (TikaException) entriesReadError; + } else if (entriesReadError instanceof IOException) { + throw (IOException) entriesReadError; + } else if (entriesReadError instanceof SAXException) { + throw (SAXException) entriesReadError; + } else if (entriesReadError instanceof RuntimeException) { + throw (RuntimeException) entriesReadError; + } else if (entriesReadError != null) { + throw new RuntimeException(entriesReadError); } } diff --git a/iped-parsers/iped-parsers-impl/src/test/java/iped/parsers/usnjrnl/AbstractPkgTest.java b/iped-parsers/iped-parsers-impl/src/test/java/iped/parsers/usnjrnl/AbstractPkgTest.java index d8b29dca33..7919b1827d 100644 --- a/iped-parsers/iped-parsers-impl/src/test/java/iped/parsers/usnjrnl/AbstractPkgTest.java +++ b/iped-parsers/iped-parsers-impl/src/test/java/iped/parsers/usnjrnl/AbstractPkgTest.java @@ -38,6 +38,7 @@ protected static class EmbeddedUsnParser extends AbstractParser { protected List contenttype = new ArrayList(); protected List title = new ArrayList(); protected List created = new ArrayList(); + protected List metadata = new ArrayList(); public Set getSupportedTypes(ParseContext context) { return (new AutoDetectParser()).getSupportedTypes(context); @@ -55,6 +56,8 @@ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, if (metadata.get(TikaCoreProperties.CREATED) != null) created.add(metadata.get(TikaCoreProperties.CREATED)); + + this.metadata.add(metadata); } } diff --git a/iped-parsers/iped-parsers-impl/src/test/java/iped/parsers/usnjrnl/UsnJrnlParserTest.java b/iped-parsers/iped-parsers-impl/src/test/java/iped/parsers/usnjrnl/UsnJrnlParserTest.java index 919a17d147..5bfa86d603 100644 --- a/iped-parsers/iped-parsers-impl/src/test/java/iped/parsers/usnjrnl/UsnJrnlParserTest.java +++ b/iped-parsers/iped-parsers-impl/src/test/java/iped/parsers/usnjrnl/UsnJrnlParserTest.java @@ -50,56 +50,44 @@ public void testUsnJrnlParsingHTML() throws IOException, SAXException, TikaExcep assertEquals(3, usntracker.contenttype.size()); assertEquals(3085, usntracker.title.size()); - assertEquals(3084, usntracker.created.size()); + // assertEquals(3084, usntracker.created.size()); assertEquals("application/x-usnjournal-report-csv", usntracker.contenttype.get(0)); assertEquals("application/x-usnjournal-registry", usntracker.contenttype.get(2)); - Date date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(usntracker.created.get(0)); - if (getVersion() < 9) - assertEquals("20/05/2021 14:52:07", df.format(date)); - if (getVersion() >= 9 && getVersion() < 12) - assertEquals("20 de mai de 2021 14:52:07", df.format(date)); - if (getVersion() >= 12) - assertEquals("20 de mai. de 2021 14:52:07", df.format(date)); - date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(usntracker.created.get(1)); - if (getVersion() < 9) - assertEquals("20/05/2021 14:52:07", df.format(date)); - if (getVersion() >= 9 && getVersion() < 12) - assertEquals("20 de mai de 2021 14:52:07", df.format(date)); - if (getVersion() >= 12) - - date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(usntracker.created.get(2)); - if (getVersion() < 9) - assertEquals("20/05/2021 14:52:07", df.format(date)); - if (getVersion() >= 9 && getVersion() < 12) - assertEquals("20 de mai de 2021 14:52:07", df.format(date)); - if (getVersion() >= 12) - assertEquals("20 de mai. de 2021 14:52:07", df.format(date)); - - date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(usntracker.created.get(3)); - if (getVersion() < 9) - assertEquals("20/05/2021 14:52:07", df.format(date)); - if (getVersion() >= 9 && getVersion() < 12) - assertEquals("20 de mai de 2021 14:52:07", df.format(date)); - if (getVersion() >= 12) - assertEquals("20 de mai. de 2021 14:52:07", df.format(date)); - - date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(usntracker.created.get(3082)); - if (getVersion() < 9) - assertEquals("20/05/2021 14:55:03", df.format(date)); - if (getVersion() >= 9 && getVersion() < 12) - assertEquals("20 de mai de 2021 14:55:03", df.format(date)); - if (getVersion() >= 12) - assertEquals("20 de mai. de 2021 14:55:03", df.format(date)); - - date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(usntracker.created.get(3083)); - if (getVersion() < 9) - assertEquals("20/05/2021 14:55:03", df.format(date)); - if (getVersion() >= 9 && getVersion() < 12) - assertEquals("20 de mai de 2021 14:55:03", df.format(date)); - if (getVersion() >= 12) - assertEquals("20 de mai. de 2021 14:55:03", df.format(date)); + Date date; + Metadata metadata; + String[] reasons; + + for (int i = 1; i <= 4; i++) { + metadata = usntracker.metadata.get(i); + reasons = metadata.getValues("Reasons"); + for (String reason : reasons) { + date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + .parse(metadata.get(UsnJrnlParser.USN_REASON_PREFIX + reason)); + if (getVersion() < 9) + assertEquals("20/05/2021 14:52:07", df.format(date)); + if (getVersion() >= 9 && getVersion() < 12) + assertEquals("20 de mai de 2021 14:52:07", df.format(date)); + if (getVersion() >= 12) + assertEquals("20 de mai. de 2021 14:52:07", df.format(date)); + } + } + + for (int i = 3082; i <= 3083; i++) { + metadata = usntracker.metadata.get(i); + reasons = metadata.getValues("Reasons"); + for (String reason : reasons) { + date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + .parse(metadata.get(UsnJrnlParser.USN_REASON_PREFIX + reason)); + if (getVersion() < 9) + assertEquals("20/05/2021 14:55:03", df.format(date)); + if (getVersion() >= 9 && getVersion() < 12) + assertEquals("20 de mai de 2021 14:55:03", df.format(date)); + if (getVersion() >= 12) + assertEquals("20 de mai. de 2021 14:55:03", df.format(date)); + } + } assertEquals("USN Journal Report", usntracker.title.get(0)); assertEquals("USN journal Entry 6098518016", usntracker.title.get(1)); @@ -131,58 +119,40 @@ public void testUsnJrnlParsingCSV() throws IOException, SAXException, TikaExcept assertEquals(3, usntracker.contenttype.size()); assertEquals(3085, usntracker.title.size()); - assertEquals(3084, usntracker.created.size()); - assertEquals("application/x-usnjournal-report-csv", usntracker.contenttype.get(0)); - assertEquals("application/x-usnjournal-registry", usntracker.contenttype.get(2)); + Date date; + Metadata metadata; + String[] reasons; + + for (int i = 1; i <= 4; i++) { + metadata = usntracker.metadata.get(i); + reasons = metadata.getValues("Reasons"); + for (String reason : reasons) { + date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + .parse(metadata.get(UsnJrnlParser.USN_REASON_PREFIX + reason)); + if (getVersion() < 9) + assertEquals("20/05/2021 14:52:07", df.format(date)); + if (getVersion() >= 9 && getVersion() < 12) + assertEquals("20 de mai de 2021 14:52:07", df.format(date)); + if (getVersion() >= 12) + assertEquals("20 de mai. de 2021 14:52:07", df.format(date)); + } + } - Date date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(usntracker.created.get(0)); - if (getVersion() < 9) - assertEquals("20/05/2021 14:52:07", df.format(date)); - if (getVersion() >= 9 && getVersion() < 12) - assertEquals("20 de mai de 2021 14:52:07", df.format(date)); - if (getVersion() >= 12) - assertEquals("20 de mai. de 2021 14:52:07", df.format(date)); - - date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(usntracker.created.get(1)); - if (getVersion() < 9) - assertEquals("20/05/2021 14:52:07", df.format(date)); - if (getVersion() >= 9 && getVersion() < 12) - assertEquals("20 de mai de 2021 14:52:07", df.format(date)); - if (getVersion() >= 12) - assertEquals("20 de mai. de 2021 14:52:07", df.format(date)); - - date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(usntracker.created.get(2)); - if (getVersion() < 9) - assertEquals("20/05/2021 14:52:07", df.format(date)); - if (getVersion() >= 9 && getVersion() < 12) - assertEquals("20 de mai de 2021 14:52:07", df.format(date)); - if (getVersion() >= 12) - assertEquals("20 de mai. de 2021 14:52:07", df.format(date)); - - date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(usntracker.created.get(3)); - if (getVersion() < 9) - assertEquals("20/05/2021 14:52:07", df.format(date)); - if (getVersion() >= 9 && getVersion() < 12) - assertEquals("20 de mai de 2021 14:52:07", df.format(date)); - if (getVersion() >= 12) - assertEquals("20 de mai. de 2021 14:52:07", df.format(date)); - - date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(usntracker.created.get(3082)); - if (getVersion() < 9) - assertEquals("20/05/2021 14:55:03", df.format(date)); - if (getVersion() >= 9 && getVersion() < 12) - assertEquals("20 de mai de 2021 14:55:03", df.format(date)); - if (getVersion() >= 12) - assertEquals("20 de mai. de 2021 14:55:03", df.format(date)); - - date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(usntracker.created.get(3083)); - if (getVersion() < 9) - assertEquals("20/05/2021 14:55:03", df.format(date)); - if (getVersion() >= 9 && getVersion() < 12) - assertEquals("20 de mai de 2021 14:55:03", df.format(date)); - if (getVersion() >= 12) - assertEquals("20 de mai. de 2021 14:55:03", df.format(date)); + for (int i = 3082; i <= 3083; i++) { + metadata = usntracker.metadata.get(i); + reasons = metadata.getValues("Reasons"); + for (String reason : reasons) { + date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + .parse(metadata.get(UsnJrnlParser.USN_REASON_PREFIX + reason)); + if (getVersion() < 9) + assertEquals("20/05/2021 14:55:03", df.format(date)); + if (getVersion() >= 9 && getVersion() < 12) + assertEquals("20 de mai de 2021 14:55:03", df.format(date)); + if (getVersion() >= 12) + assertEquals("20 de mai. de 2021 14:55:03", df.format(date)); + } + } assertEquals("USN Journal Report", usntracker.title.get(0)); assertEquals("USN journal Entry 6098518016", usntracker.title.get(1));