{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from selenium import webdriver\n", "from selenium.webdriver.chrome.service import Service\n", "from webdriver_manager.chrome import ChromeDriverManager\n", "\n", "CHROMEDRIVER_PATH = ChromeDriverManager().install()\n", "driver = webdriver.Chrome(service=Service(CHROMEDRIVER_PATH))\n", "driver.get(\"http://www.python.org\")\n", "time.sleep(2)\n", "driver.close()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "CHROMEDRIVER_PATH = ChromeDriverManager().install()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Here is some important text you want to retrieve!\n", "A button to click!\n" ] } ], "source": [ "from selenium import webdriver\n", "from selenium.webdriver.common.by import By\n", "from selenium.webdriver.chrome.options import Options\n", "import time\n", "\n", "chrome_options = Options()\n", "chrome_options.add_argument(\"--headless\")\n", "driver = webdriver.Chrome(service=Service(CHROMEDRIVER_PATH), options=chrome_options)\n", "driver.get('http://pythonscraping.com/pages/javascript/ajaxDemo.html')\n", "time.sleep(3)\n", "print(driver.find_element(By.CSS_SELECTOR, '#content').text)\n", "driver.close()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Here is some important text you want to retrieve!\n", "A button to click!\n" ] } ], "source": [ "from selenium import webdriver\n", "from selenium.webdriver.common.by import By\n", "from selenium.webdriver.support.ui import WebDriverWait\n", "from selenium.webdriver.support import expected_conditions as EC\n", "\n", "chrome_options = Options()\n", "chrome_options.add_argument(\"--headless\")\n", "driver = webdriver.Chrome(\n", " service=Service(CHROMEDRIVER_PATH),\n", " options=chrome_options)\n", "\n", "driver.get('http://pythonscraping.com/pages/javascript/ajaxDemo.html')\n", "try:\n", " element = WebDriverWait(driver, 10).until(\n", " EC.presence_of_element_located((By.ID, 'loadedButton')))\n", "finally:\n", " print(driver.find_element(By.ID, 'content').text)\n", " driver.close()" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Timing out after 10 seconds and returning\n", "\n", "The Destination Page!\n", "\n", "\n", "\n", "This is the page you are looking for!\n", "\n", "\n" ] } ], "source": [ "from selenium import webdriver\n", "from selenium.webdriver.chrome.options import Options\n", "from selenium.common.exceptions import StaleElementReferenceException\n", "import time\n", "\n", "def waitForLoad(driver):\n", " elem = driver.find_element(By.TAG_NAME, \"html\")\n", " count = 0\n", " for _ in range(0, 20):\n", " try:\n", " elem == driver.find_element(By.TAG_NAME, \"html\")\n", " except StaleElementReferenceException:\n", " return\n", " time.sleep(0.5)\n", " print(\"Timing out after 10 seconds and returning\")\n", " \n", "chrome_options = Options()\n", "chrome_options.add_argument(\"--headless\")\n", "driver = webdriver.Chrome(service=Service(CHROMEDRIVER_PATH), options=chrome_options)\n", "driver.get(\"http://pythonscraping.com/pages/javascript/redirectDemo1.html\")\n", "waitForLoad(driver)\n", "print(driver.page_source)\n", "driver.close()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "This is the page you are looking for!\n" ] } ], "source": [ "from selenium.webdriver.common.by import By\n", "from selenium.webdriver.support.ui import WebDriverWait\n", "from selenium.webdriver.chrome.options import Options\n", "from selenium.webdriver.support import expected_conditions as EC\n", "from selenium.common.exceptions import TimeoutException\n", "\n", "chrome_options = Options()\n", "chrome_options.add_argument(\"--headless\")\n", "driver = webdriver.Chrome(\n", " executable_path='drivers/chromedriver', \n", " options=chrome_options)\n", "driver.get('http://pythonscraping.com/pages/javascript/redirectDemo1.html')\n", "try:\n", " bodyElement = WebDriverWait(driver, 15).until(EC.presence_of_element_located(\n", " (By.XPATH, '//body[contains(text(), \"This is the page you are looking for!\")]')))\n", " print(bodyElement.text)\n", "except TimeoutException:\n", " print('Did not find the element')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from webdriver_manager.firefox import GeckoDriverManager\n", "print(GeckoDriverManager().install())" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[WDM] - Downloading: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 10.2M/10.2M [00:01<00:00, 7.44MB/s]\n" ] }, { "data": { "text/plain": [ "'/Users/RSpecht/.wdm/drivers/edgedriver/mac64/113.0.1774.57/msedgedriver'" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from webdriver_manager.microsoft import EdgeChromiumDriverManager\n", "print(EdgeChromiumDriverManager().install())" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 2 }