Skip to content

Commit

Permalink
Merge branch 'MultiValuedItemLocMap' of https://github.com/sepinf-inc…
Browse files Browse the repository at this point in the history
…/IPED.git into MultiValuedItemLocMap
  • Loading branch information
patrickdalla committed Jun 4, 2024
2 parents 37448a9 + c541b4d commit da52c15
Show file tree
Hide file tree
Showing 108 changed files with 3,831 additions and 759 deletions.
4 changes: 2 additions & 2 deletions iped-app/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@
<artifactItem>
<groupId>org.python</groupId>
<artifactId>python-jep-dlib</artifactId>
<version>3.9.12-4.0.3-19.23.1</version>
<version>3.9.12-4.0.3-19.23.1-2</version>
<type>zip</type>
<overWrite>false</overWrite>
<outputDirectory>${release.dir}</outputDirectory>
Expand Down Expand Up @@ -315,7 +315,7 @@
<artifactItem>
<groupId>tesseract</groupId>
<artifactId>tesseract-zip</artifactId>
<version>5.3.2-24-g3922</version>
<version>5.3.2-24-g3922_1</version>
<type>zip</type>
<overWrite>false</overWrite>
<outputDirectory>${tools.dir}</outputDirectory>
Expand Down
10 changes: 6 additions & 4 deletions iped-app/resources/config/IPEDConfig.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,12 @@ enableMinIO = false
enableOCR = false

# Enable audio transcription.
# Default implementation uses VOSK transcription on local CPU (slow and not good accuracy).
# You can change it to a local Facebook Wav2Vec2 implementation (slower on CPU, faster on GPU and good accuracy)
# or remote Microsoft Azure or Google Cloud services (faster and good accuracy).
# Configure it in conf/AudioTranscriptConfig.txt
# Default implementation uses VOSK transcription on local CPU (faster but bad accuracy).
# You can change the algorithm into conf/AudioTranscriptConfig.txt:
# - Wav2Vec2 algorithm (slower and good accuracy)
# - Whisper algorithm (much slower but better accuracy)
# - Google Cloud (about $1.00 per hour cost)
# - Microsoft Azure (about $1.00 per hour cost)
enableAudioTranscription = false

# Enables carving. "addUnallocated" must be enabled to scan unallocated space.
Expand Down
2 changes: 1 addition & 1 deletion iped-app/resources/config/LocalConfig.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Local environment configuration
########################################################################

# Defines program localization/language. Currently there are localizations for 'en', 'pt-BR', 'it-IT', 'de-DE' & 'es-AR'
# Defines program localization/language. Currently there are localizations for 'en', 'pt-BR', 'it-IT', 'de-DE', 'es-AR' and 'fr-FR'.
locale = en

# Temporary directory for processing: "default" uses the system temporary folder.
Expand Down
48 changes: 39 additions & 9 deletions iped-app/resources/config/conf/AudioTranscriptConfig.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,23 @@
# you should download it from https://alphacephei.com/vosk/models and put in 'models/vosk/[lang]' folder.
implementationClass = iped.engine.task.transcript.VoskTranscriptTask

# Uses a local/remote wav2vec2 implementation for transcription. Accuracy is much better than most Vosk models.
# The local impl is AT LEAST 1 order of magnitude slower than Vosk on high end CPUs. Using a good GPU is highly recommended!
# The remote impl is useful if you have a central server/cluster with many GPUs to be shared among processing nodes.
# For both the local or remote options, please check the installation steps: https://github.com/sepinf-inc/IPED/wiki/User-Manual#wav2vec2
# If you use the local implementation, you must set 'huggingFaceModel' param below.
# If you use the remote implementation, you must set 'wav2vec2Service' param below.
# Uses a local wav2vec2 implementation for transcription. Accuracy is much better than most Vosk models.
# This is up to 10x slower than Vosk on high end CPUs. Using a good GPU is highly recommended!
# Please check the installation steps: https://github.com/sepinf-inc/IPED/wiki/User-Manual#wav2vec2
# If you enable this, you must set 'huggingFaceModel' param below.
#implementationClass = iped.engine.task.transcript.Wav2Vec2TranscriptTask
#implementationClass = iped.engine.task.transcript.RemoteWav2Vec2TranscriptTask

# Uses a local Whisper implementation for transcription. Accuracy is better than wav2vec2 depending on the model.
# This is up to 4x slower than wav2vec2 depending on compared models. Using a high end GPU is strongly recommended!
# Please check the installation steps: https://github.com/sepinf-inc/IPED/wiki/User-Manual#whisper
# If you enable this, you must set 'whisperModel' param below.
#implementationClass = iped.engine.task.transcript.WhisperTranscriptTask

# Uses a remote service for transcription.
# The remote service is useful if you have a central server/cluster with many GPUs to be shared among processing nodes.
# Please check steps on https://github.com/sepinf-inc/IPED/wiki/User-Manual#remote-transcription
# If you enable this, you must set 'remoteServiceAddress' param below.
#implementationClass = iped.engine.task.transcript.RemoteTranscriptionTask

# If you want to use the Microsoft Azure service implementation, comment above and uncomment below.
# You MUST include Microsoft client-sdk.jar into plugins folder.
Expand Down Expand Up @@ -91,11 +100,32 @@ minWordScore = 0.5
# huggingFaceModel = jonatasgrosman/wav2vec2-xls-r-1b-french

#########################################
# RemoteWav2Vec2TranscriptTask options
# Local WhisperTranscriptTask options
#########################################

# Possible values: tiny, base, small, medium, large-v3, dwhoelz/whisper-large-pt-cv11-ct2
# large-v3 is much better than medium, but 2x slower and uses 2x more memory.
# If you know the language you want to transcribe, please set the 'language' option above.
# 'language = auto' uses the 'locale' set on LocalConfig.txt
# 'language = detect' uses auto detection, but it can cause mistakes
whisperModel = medium

# Compute type precision. This affects accuracy, speed and memory usage.
# Possible values: float32 (better), float16 (recommended for GPU), int8 (faster)
precision = int8

# Batch size (number of parallel transcriptions). If you have a GPU with enough memory,
# increasing this value to e.g. 16 can speed up transcribing long audios up to 10x.
# Test what is the better value for your GPU before hitting OOM.
# This works just if you are using whisperx library instead of faster_whisper
batchSize = 1

#########################################
# RemoteAudioTranscriptTask options
#########################################

# IP:PORT of the service/central node used by the RemoteWav2Vec2TranscriptTask implementation.
# wav2vec2Service = 127.0.0.1:11111
# remoteServiceAddress = 127.0.0.1:11111

#########################################
# MicrosoftTranscriptTask options
Expand Down
13 changes: 11 additions & 2 deletions iped-app/resources/config/conf/CategoriesConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@
{"name": "Ares Galaxy", "mimes": ["application/x-ares-galaxy","application/x-ares-galaxy-entry"]},
{"name": "E-Mule", "mimes": ["application/x-emule", "application/x-emule-part-met", "application/x-emule-searches", "application/x-emule-preferences-ini", "application/x-emule-preferences-dat", "application/x-emule-known-met-entry", "application/x-emule-part-met-entry"]},
{"name": "Shareaza", "mimes": ["application/x-shareaza-searches-dat", "application/x-shareaza-library-dat", "application/x-shareaza-library-dat-entry", "application/x-shareaza-download"]},
{"name": "Torrent", "mimes": ["application/x-bittorrent-resume-dat", "application/x-bittorrent"]},
{"name": "Torrent", "mimes": ["application/x-bittorrent-resume-dat","application/x-bittorrent-settings-dat", "application/x-bittorrent"]},
{"name": "Other Peer-to-peer", "mimes": ["application/x-p2p"]}
]},
{"name": "Browser Artifacts", "categories":[
Expand Down Expand Up @@ -151,7 +151,16 @@
]}
]},
{"name": "Extraction Summary", "mimes": ["application/x-ufed-html-summary"]},
{"name": "Calls", "mimes": ["application/x-ufed-html-calls", "application/x-ufed-call", "call/x-threema-call", "call/x-whatsapp-call", "call/x-telegram-call", "call/x-discord-call", "application/x-ios-calllog-db", "application/x-ios8-calllog-db"]},
{"name": "Calls", "categories":[
{"name": "Discord Calls", "mimes":["call/x-discord-call"]},
{"name": "Facebook Calls", "mimes":[]},
{"name": "Phone Calls", "mimes":[]},
{"name": "Signal Calls", "mimes":[]},
{"name": "Telegram Calls", "mimes":["call/x-telegram-call"]},
{"name": "Threema Calls", "mimes":["call/x-threema-call"]},
{"name": "WhatsApp Calls", "mimes":["call/x-whatsapp-call"]},
{"name": "Other Calls", "mimes":["application/x-ufed-html-calls", "application/x-ufed-call", "application/x-ios-calllog-db", "application/x-ios8-calllog-db"]}
]},
{"name": "SMS Messages", "mimes": ["application/x-ufed-html-sms", "application/x-ufed-sms", "application/x-ios-sms-db"]},
{"name": "MMS Messages", "mimes": ["application/x-ufed-html-mms", "application/x-ufed-mms"]},
{"name": "Instant Messages", "mimes": ["message/x-chat-message", "message/x-threema-message", "application/x-ufed-instantmessage", "message/x-whatsapp-message", "message/x-skype-message", "message/x-skype-filetransfer", "message/x-telegram-message", "message/x-discord-message"]},
Expand Down
14 changes: 13 additions & 1 deletion iped-app/resources/config/conf/CustomSignatures.xml
Original file line number Diff line number Diff line change
Expand Up @@ -441,13 +441,25 @@
<_comment>BitTorrent Client Resume.dat file</_comment>
<sub-class-of type="application/x-p2p"/>
<magic priority="50">
<match value="d10:.fileguard" type="string" offset="0"/>
<match value="d10:.fileguard" type="string" offset="0">
<match value="e4:info" type="string" offset="16:1024"/>
</match>
</magic>
<glob pattern="resume.dat" />
<glob pattern="resume.dat.old" />
<glob pattern="resume.dat.new" />
</mime-type>

<mime-type type="application/x-bittorrent-settings-dat">
<_comment>BitTorrent Client Settings.dat file</_comment>
<sub-class-of type="application/x-p2p"/>
<magic priority="50">
<match value="d10:.fileguard" type="string" offset="0">
<match value="e9:autostart" type="string" offset="16:1024"/>
</match>
</magic>
</mime-type>

<mime-type type="application/x-bittorrent">
<sub-class-of type="application/x-p2p"/>
<magic priority="50">
Expand Down
4 changes: 2 additions & 2 deletions iped-app/resources/config/conf/MakePreviewConfig.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
supportedMimes = application/x-msaccess; application/x-lnk; application/x-firefox-savedsession
supportedMimes = application/x-sqlite3; application/sqlite-skype; application/x-win10-timeline; application/x-gdrive-cloud-graph; application/x-gdrive-snapshot
supportedMimes = application/x-whatsapp-db; application/x-whatsapp-db-f; application/x-whatsapp-chatstorage; application/x-whatsapp-chatstorage-f; application/x-threema-chatstorage; application/x-shareaza-searches-dat; application/x-msie-cache
supportedMimes = application/x-prefetch; text/x-vcard; application/x-bittorrent-resume-dat; application/x-bittorrent; application/x-emule-preferences-dat; application/vnd.android.package-archive
supportedMimes = application/x-prefetch; text/x-vcard; application/x-emule-preferences-dat; application/vnd.android.package-archive; application/x-bittorrent-settings-dat

# List of mimetypes which parsers insert links to other case items into preview
supportedMimesWithLinks = application/x-emule; application/x-emule-part-met; application/x-ares-galaxy; application/x-shareaza-library-dat; application/x-shareaza-download
supportedMimesWithLinks = application/x-emule; application/x-emule-part-met; application/x-ares-galaxy; application/x-shareaza-library-dat; application/x-shareaza-download; application/x-bittorrent-resume-dat; application/x-bittorrent
2 changes: 1 addition & 1 deletion iped-app/resources/config/conf/OCRConfig.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# You can use the parameter -ocr "bookmark_name" to restrict the OCR to a specific bookmark (just works when creating reports from cmd line).

# Dictionary language to be used for OCR. You can specify multiple languages e.g. por+eng but you will pay an important processing cost.
# Languages supported: eng, por, ita, deu, spa
# Languages supported: eng, por, ita, deu, spa, fra
OCRLanguage = por

# Skip known files found in the hash lookup database.
Expand Down
1 change: 1 addition & 0 deletions iped-app/resources/config/conf/ParserConfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@
</params>
</parser>
<parser class="iped.parsers.bittorrent.BitTorrentResumeDatParser"></parser>
<parser class="iped.parsers.bittorrent.BitTorrentGenericDatParser"></parser>
<parser class="iped.parsers.bittorrent.TorrentFileParser"></parser>
<parser class="iped.parsers.lnk.LNKShortcutParser"></parser>
<parser class="iped.parsers.misc.GenericOLEParser"></parser>
Expand Down
2 changes: 2 additions & 0 deletions iped-app/resources/config/conf/metadataTypes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3392,6 +3392,8 @@ og:type = java.lang.String
og:url = java.lang.String
ontent-Type" = java.lang.String
p2pHistoryEntries = java.lang.Integer
childPornHashHits = java.lang.Integer
p2p:torrentFilesFoundInCase = java.lang.Integer
containerTrackId = java.lang.String
parentTrackId = java.lang.String
trackId = java.lang.String
Expand Down
14 changes: 14 additions & 0 deletions iped-app/resources/config/profiles/forensic/conf/ParserConfig.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>

<!-- this config is merged with default config XML -->
<properties>

<parsers>
<parser class="iped.parsers.usnjrnl.UsnJrnlParser">
<params>
<param name="extractEntries" type="bool">true</param>
</params>
</parser>
</parsers>

</properties>
14 changes: 14 additions & 0 deletions iped-app/resources/config/profiles/pedo/conf/ParserConfig.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>

<!-- this config is merged with default config XML -->
<properties>

<parsers>
<parser class="iped.parsers.usnjrnl.UsnJrnlParser">
<params>
<param name="extractEntries" type="bool">true</param>
</params>
</parser>
</parsers>

</properties>
8 changes: 8 additions & 0 deletions iped-app/resources/localization/iped-categories.properties
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ Folders=Folders
Scanned\ Documents=Scanned\ Documents
Extraction\ Summary=Extraction\ Summary
Calls=Calls
Discord\ Calls=Discord\ Calls
Facebook\ Calls=Facebook\ Calls
Other\ Calls=Other\ Calls
Phone\ Calls=Phone\ Calls
Signal\ Calls=Signal\ Calls
Telegram\ Calls=Telegram\ Calls
Threema\ Calls=Threema\ Calls
WhatsApp\ Calls=WhatsApp\ Calls
SMS\ Messages=SMS\ Messages
MMS\ Messages=MMS\ Messages
Instant\ Messages=Instant\ Messages
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Other\ Documents=sonstige\ Dokumente
Emails\ and\ Mailboxes=E-Mails\ und\ Postfächer
Emails=E-Mails
Mailboxes=Postfächer
Appointments=Appointments[TBT]
Appointments=Termine
GDrive\ Synced\ Files=GDrive\ synchronisierte\ Dateien
GDrive\ File\ Entries=GDrive\ Datei\ Einträge
Databases=Datenbanken
Expand Down Expand Up @@ -92,6 +92,14 @@ Folders=Verzeichnisse
Scanned\ Documents=gescannte\ Dokumente
Extraction\ Summary=Extraktions-Zusammenfassung
Calls=Anrufe
Discord\ Calls=Discord\ Calls[TBT]
Facebook\ Calls=Facebook\ Calls[TBT]
Other\ Calls=Other\ Calls[TBT]
Phone\ Calls=Phone\ Calls[TBT]
Signal\ Calls=Signal\ Calls[TBT]
Telegram\ Calls=Telegram\ Calls[TBT]
Threema\ Calls=Threema\ Calls[TBT]
WhatsApp\ Calls=WhatsApp\ Calls[TBT]
SMS\ Messages=SMS\ Nachrichten
MMS\ Messages=MMS\ Nachrichten
Instant\ Messages=Sofortnachrichten
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ Folders=Carpetas
Scanned\ Documents=Documentos\ Escaneados
Extraction\ Summary=Resumen\ Extracción
Calls=Reg.\ Llamadas
Discord\ Calls=Discord\ Calls[TBT]
Facebook\ Calls=Facebook\ Calls[TBT]
Other\ Calls=Other\ Calls[TBT]
Phone\ Calls=Phone\ Calls[TBT]
Signal\ Calls=Signal\ Calls[TBT]
Telegram\ Calls=Telegram\ Calls[TBT]
Threema\ Calls=Threema\ Calls[TBT]
WhatsApp\ Calls=WhatsApp\ Calls[TBT]
SMS\ Messages=Mensajes\ SMS
MMS\ Messages=Mensajes\ MMS
Instant\ Messages=Mensajes\ Instantáneos
Expand Down
Loading

0 comments on commit da52c15

Please sign in to comment.