-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
65 lines (51 loc) · 2.14 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
import numpy as np
import re
PATH = "/Users/favourkelvin/Downloads/chromedriver"
driver = webdriver.Chrome(PATH)
# parse the page source using get() function
driver.get("https://www.allrecipes.com/search/results/?wt=meatballs&page={}")
#next_button = driver.find_element_by_class_name("next").find_element_by_tag_name("a").click()
all_details = []
for c in range(1, 6):
try:
# get the page
driver.get(
"https://www.allrecipes.com/search/results/?wt=meatballs&page={}".format(c))
print(
"https://www.allrecipes.com/search/results/?wt=meatballs&page={}".format(c))
incategory = driver.find_elements_by_class_name("fixed-recipe-card")
links = []
for i in range(len(incategory)):
item = incategory[i]
# get the href property
a = item.find_element_by_tag_name(
"h3").find_element_by_tag_name("a").get_property("href")
# Append the link to list links
links.append(a)
# Lets loop through each link to acces the page of each recipe
recipe = []
for link in links:
# get one recipe url
driver.get(url=link)
# title of the recipe
elements = driver.find_elements_by_xpath(
"/html/body/div[2]/div/main/div[1]/div[2]/div[1]/div[1]/div[1]/div/h1")
title = [el.text for el in elements]
# author
elements = driver.find_elements_by_xpath(
"/html/body/div[2]/div/main/div[1]/div[2]/div[1]/div[1]/div[4]/div/span/span/a")
author = [el.text for el in elements]
# ingredient
elements = driver.find_elements_by_xpath(
"//*[@id='ar-calvera-app']/section[1]/fieldset/ul/li/label/span/span")
ingredient = [el.text for el in elements]
r = (title, author, ingredient)
# append r to all details
recipe.append(r)
except:
driver.close()
print(recipe)