Skip to content

Commit

Permalink
Test dnstwist source
Browse files Browse the repository at this point in the history
  • Loading branch information
jarelllama authored Apr 5, 2024
1 parent 8f106ed commit 5fa8e67
Show file tree
Hide file tree
Showing 9 changed files with 103 additions and 1,342,043 deletions.
1 change: 1 addition & 0 deletions .github/workflows/manually_add.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,4 @@ jobs:
build:
needs: get-domains
uses: ./.github/workflows/retrieve_domains.yml
secrets: inherit
1 change: 1 addition & 0 deletions .github/workflows/opensquat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ jobs:
build:
needs: run-opensquat
uses: ./.github/workflows/retrieve_domains.yml
secrets: inherit
2 changes: 1 addition & 1 deletion SOURCES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Source | Type | Inactive | Excluded from light
:--- |:--- |:--- |:---
[ANFRAS](https://anfras.com/fakeshops/) | Fake | Yes | -
[Artists Against 419](https://db.aa419.org/fakebankslist.php) | Advance-fee | |
[DFPI's Crypto Scam Tracker](https://dfpi.ca.gov/crypto-scams/) | Crypto | | Yes
[DFPI's Crypto Scam Tracker](https://dfpi.ca.gov/crypto-scams/) | Crypto | Yes | -
[Google's Custom Search JSON API](https://developers.google.com/custom-search/v1/introduction) | Google Search | |
[GunTab](https://www.guntab.com/scam-websites) | Firearm | | Yes
[Hagezi's NRD List](https://github.com/hagezi/dns-blocklists?tab=readme-ov-file#nrd) | NRD | - | -
Expand Down
27 changes: 27 additions & 0 deletions config/dnstwist_targets.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
aliexpress.com
shopee.com
lazada.com
amazon.com
google.com
apple.com
walmart.com
facebook.com
fedex.com
homedepot.com
tiktok.com
pornhub.com
telegram.org
whatsapp.com
walmart.com
coinbase.com
nicehash.com
spotify.com
onlyfans.com
paypal.com
google.com
crypto.com
gemini.com
bitcoin.org
microsoft.com
metamask.io
bitwarden.com
26 changes: 26 additions & 0 deletions data/tlds.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
com
de
bond
shop
net
org
online
za
store
xyz
top
info
site
marketing
at
us
uk
today
ru
co
pl
fr
life
club
live
pro
12 changes: 6 additions & 6 deletions functions/opensquat.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

readonly RAW='data/raw.txt'
readonly KEYWORDS='config/opensquat_keywords.txt'
readonly NRD='lists/wildcard_domains/nrd.txt'
readonly DEAD_DOMAINS='data/dead_domains.txt'
readonly PARKED_DOMAINS='data/parked_domains.txt'

Expand All @@ -20,21 +19,22 @@ opensquat() {
git clone -q https://github.com/atenreiro/opensquat
pip install -qr opensquat/requirements.txt

# Collate fresh NRD list and exit if any link is broken
# Collate NRD list and exit if any link is broken
# NRDs feeds are limited to domains registered in the last 10 days
{
wget -qO - 'https://raw.githubusercontent.com/shreshta-labs/newly-registered-domains/main/nrd-1w.csv' \
|| exit 1
wget -qO - 'https://cdn.jsdelivr.net/gh/hagezi/dns-blocklists@latest/wildcard/nrds.10-onlydomains.txt' \
| grep -vF '#' || exit 1
curl -sH 'User-Agent: openSquat-2.1.0' 'https://feeds.opensquat.com/domain-names.txt' \
|| exit 1
} >> "$NRD"
} > nrd.tmp

bash functions/tools.sh format "$NRD"
bash functions/tools.sh format nrd.tmp

# Filter out previously processed domains and known dead/parked domains
comm -23 "$NRD" <(sort "$RAW" "$DEAD_DOMAINS" "$PARKED_DOMAINS") \
> nrd.tmp
comm -23 nrd.tmp <(sort "$RAW" "$DEAD_DOMAINS" "$PARKED_DOMAINS") \
> temp && mv temp nrd.tmp

# Exit if no domains to process
if [[ ! -s nrd.tmp ]]; then
Expand Down
45 changes: 41 additions & 4 deletions functions/retrieve_domains.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ readonly SUBDOMAINS_TO_REMOVE='config/subdomains.txt'
readonly WILDCARDS='data/wildcards.txt'
readonly DEAD_DOMAINS='data/dead_domains.txt'
readonly PARKED_DOMAINS='data/parked_domains.txt'
readonly DNSTWIST_TARGETS='config/dnstwist_targets.txt'
readonly TLDS='data/tlds.txt'
readonly SOURCE_LOG='config/source_log.csv'
readonly DOMAIN_LOG='config/domain_log.csv'
TIME_FORMAT="$(date -u +"%H:%M:%S %d-%m-%y")"
Expand All @@ -34,14 +36,15 @@ source() {

source_manual
source_aa419
source_dfpi
#source_dfpi # Deactivated
source_dnstwist
source_google_search
source_guntab
source_opensquat
source_petscams
source_scamdirectory
source_scamadviser
source_stopgunscams
source_google_search
source_opensquat
}

# Function 'process_source' filters results retrieved from a source.
Expand Down Expand Up @@ -418,6 +421,40 @@ source_opensquat() {
process_source
}

source_dnstwist() {
local source='dnstwist'
local ignore_from_light=true
local results_file="data/pending/domains_${source}.tmp"

# Install dnstwist
apt install -yqq dnstwist

# Collate NRD list and exit if any link is broken
# NRDs feeds are limited to domains registered in the 30 days
{
wget -qO - 'https://raw.githubusercontent.com/shreshta-labs/newly-registered-domains/main/nrd-1m.csv' \
|| exit 1
wget -qO - 'https://cdn.jsdelivr.net/gh/hagezi/dns-blocklists@latest/wildcard/nrds.10-onlydomains.txt' \
| grep -vF '#' || exit 1
curl -sH 'User-Agent: openSquat-2.1.0' 'https://feeds.opensquat.com/domain-names-month.txt' \
|| exit 1
} > nrd.tmp

format_list nrd.tmp

# Collate results
while read -r domain; do
dnstwist -f list -r "$domain" --tld "$TLDS" >> results.tmp
done < "$DNSTWIST_TARGETS"

format_list results.tmp

# Find matching NRD
comm -12 results.tmp nrd.tmp > "$results_file"

process_source
}

source_manual() {
local source='Manual'
local results_file='data/pending/domains_manual.tmp'
Expand Down Expand Up @@ -555,7 +592,7 @@ source_stopgunscams() {
trap cleanup EXIT

# Install jq
command -v jq &> /dev/null || apt-get install -yqq jq
command -v jq &> /dev/null || apt install -yqq jq

for file in config/* data/*; do
format_file "$file"
Expand Down
1 change: 0 additions & 1 deletion functions/update_readme.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ Statistics for each source:
Today | Yesterday | Excluded | Source
$(print_stats 'Google Search')
$(print_stats 'aa419.org')
$(print_stats 'dfpi.ca.gov')
$(print_stats 'guntab.com')
$(print_stats 'openSquat')
$(print_stats 'petscams.com')
Expand Down
Loading

0 comments on commit 5fa8e67

Please sign in to comment.