-
Notifications
You must be signed in to change notification settings - Fork 2
/
spidey.py
40 lines (31 loc) · 1.06 KB
/
spidey.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import random
import urllib
import requests
from bs4 import BeautifulSoup
# function to download pictures from the links of image sources
def download_web(url):
name = random.randrange(1, 1000)
full = str(name)
urllib.urlretrieve(url, full)
# function to crawl the website and send the links of all the images
# to the download_web()
def spidy():
url = raw_input("Enter the url ")
src = requests.get(url)
text = src.text
soup = BeautifulSoup(text, "html.parser")
for img in soup.findAll('img'):
image = img.get('src')
# Sometimes the links of images are incomplete.
# Like they dont have https:// which makes the link invalid
# Here we are checking if the crawled link is complete or not.
# if it is complete,then its cool
# otherwise we just add https:// to the incomplete link
if image.startswith('http'):
complete = image
else:
complete = "https:" + image
print(complete)
download_web(complete)
if __name__ == '__main__':
spidy()