diff --git a/config/blacklist.txt b/config/blacklist.txt index d1cd4ab00..980c0aeed 100644 --- a/config/blacklist.txt +++ b/config/blacklist.txt @@ -1,3 +1,8 @@ +usdtzer.com +1xbet-officials22.top +po-fxd1.com +redditrace.com +shopeeoficialbr.com 1xbet-officialsite.com 4uvize.com 5adsday.com @@ -191,7 +196,6 @@ rakuten.co.jp.rakutle.xyz rarebreedtriggerusa.com rateglo.com raybansales.us -redditrace.com] registereddocumentseu.com reifenversand-arndt.com relayreporty.com diff --git a/config/search_terms.csv b/config/search_terms.csv index c7ca30722..f18ef1ed3 100644 --- a/config/search_terms.csv +++ b/config/search_terms.csv @@ -79,4 +79,4 @@ your number one source for all things cosmetic and personal care product.,,Low c "share of the global digital asset trading market within 1 year. Very recently in 2021 at the Crypto Expo Dubai",y,Low count (9). Low count for monthly (2) "Our Goal is to provide the best online automated trading solutions to our customers globally using our team of experienced experts.",y,No results for monthly "Loading... Password. Confirm password. Invitation Code. I agree with the 《Registration Agreement》. Sign Up.",y,No results -"Our pharmacy helps thousands of people to get high-quality generic and brand medications from world-renowned and tested suppliers. Your strong health is not a wonder, it's a work of the best specialists in health-care area, and we are here to share with you the results of their comprehensive scientific study.",, +"Our pharmacy helps thousands of people to get high-quality generic and brand medications from world-renowned and tested suppliers. Your strong health is not a wonder, it's a work of the best specialists in health-care area, and we are here to share with you the results of their comprehensive scientific study.",y,No results for monthly diff --git a/scripts/retrieve_domains.sh b/scripts/retrieve_domains.sh index 198d93138..a59d1a08b 100644 --- a/scripts/retrieve_domains.sh +++ b/scripts/retrieve_domains.sh @@ -195,7 +195,7 @@ process_source() { whitelisted_tld="$(grep -E '\.(gov|edu|mil)(\.[a-z]{2})?$' "$results_file")" whitelisted_tld_count="$(filter "$whitelisted_tld" tld)" - # Remove non-domain entries including IP addresses excluding punycode + # Remove non-domain entries including IP addresses excluding Punycode invalid="$(grep -vE "^${STRICT_DOMAIN_REGEX}$" "$results_file")" # Note invalid entries are not counted filter "$invalid" invalid --preserve > /dev/null