diff --git a/misc-resources/web-scraping/workshop_2024/session1_intro-to-web-scraping.html b/misc-resources/web-scraping/workshop_2024/session1_intro-to-web-scraping.html index 1848a0f..4acf543 100644 --- a/misc-resources/web-scraping/workshop_2024/session1_intro-to-web-scraping.html +++ b/misc-resources/web-scraping/workshop_2024/session1_intro-to-web-scraping.html @@ -546,9 +546,10 @@

A note on ~AI~

Homework: Installations for Next Time

diff --git a/misc-resources/web-scraping/workshop_2024/session1_intro-to-web-scraping.qmd b/misc-resources/web-scraping/workshop_2024/session1_intro-to-web-scraping.qmd index 8ca3f68..993a3a4 100644 --- a/misc-resources/web-scraping/workshop_2024/session1_intro-to-web-scraping.qmd +++ b/misc-resources/web-scraping/workshop_2024/session1_intro-to-web-scraping.qmd @@ -110,9 +110,10 @@ sm.report('display') ## Homework: Installations for Next Time - Install Python via Anaconda - see guidance from PUG's Python Installation training [here](https://ui-research.github.io/python-at-urban/content/installation.html) -- Install the following Python packages: `requests`, `beautifulsoup4`, `lxml`, and `selenium` +- Install the following Python packages: `requests`, `beautifulsoup4`, `lxml`, `selenium`, and `webdriver-manager`. - Launch a new Jupyter Notebook if you've never done so before - see guidance from PUG's Intro to Python training [here](https://ui-research.github.io/python-at-urban/content/intro-to-python.html) - If you have any issues, please use the #python-users channel and we'd love to help. Someone else probably has the same question! +- Sign up for GitHub using [this guide](https://ui-research.github.io/reproducibility-at-urban/git-installation.html) if you haven't so that you can access these workshop materials! ## Next Session - How to scrape text from static webpages using BeautifulSoup diff --git a/site/content/web-scraping-dynamic.ipynb b/site/content/web-scraping-dynamic.ipynb index ead7048..9ab1e1c 100644 --- a/site/content/web-scraping-dynamic.ipynb +++ b/site/content/web-scraping-dynamic.ipynb @@ -48,6 +48,8 @@ "from selenium.webdriver.common.by import By\n", "from selenium.webdriver.chrome.service import Service \n", "from webdriver_manager.chrome import ChromeDriverManager\n", + "from selenium.webdriver.chrome.options import Options\n", + "\n", "## NOTE: Some users may want to try a Firefox Driver instead;\n", "## Can comment above two lines and uncomment the below two lines\n", "# from selenium.webdriver.firefox.service import Service\n", @@ -55,7 +57,13 @@ "from selenium.webdriver.support import expected_conditions as EC\n", "from selenium.webdriver.support.ui import Select, WebDriverWait\n", "import pandas as pd\n", - "import time" + "import time\n", + "\n", + "# Set Chrome options - NOTE: you can remove these options and still have the code work when running things locally\n", + "options = Options()\n", + "options.add_argument(\"--headless\") # Run Chrome in headless mode\n", + "options.add_argument(\"--no-sandbox\")\n", + "options.add_argument(\"--disable-dev-shm-usage\")" ] }, { @@ -71,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "3b5b3848", "metadata": {}, "outputs": [],