Hi,
I am quite new at Python. I made a code to download public data from a website (a lot of data) and it works quite well until I ask it to download each link and extract it in my computer. Can anyone help me? I am pasting my code below… I think it only fails in the last module, because i see that it solves all other previous sentences correctly.
Thanks in advance for any help,
AAAA
#------------------------------------------------------------------------------------------------------------------------------------
import requests
import lxml
import dload
import time
import selenium
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
#URL to dig into and order
url=‘https://www.spensiones.cl/apps/preciosIF.php’
#initiating the webdriver. Parameter includes the path of the webdriver.
driver = webdriver.Chrome(’./chromedriver’)
chrome_options=Options()
options = webdriver.ChromeOptions()
options.add_argument(’–log-level=3’)
options.add_experimental_option(‘excludeSwitches’, [‘enable-logging’])
driver.get(url)
wait = WebDriverWait(driver, 10)
Acá elijo un mes y un año específicos
select = Select(driver.find_element_by_name(‘mm’))
select.select_by_index(2)
select = Select(driver.find_element_by_name(‘aaaa’))
select.select_by_index(4)
time.sleep(10)
Acá apreta el botón
botonazo=driver.find_element_by_css_selector("[type=‘submit’]")
botonazo.click()
#adentro!
res=driver.page_source
soup = BeautifulSoup(res,‘lxml’)
#soup = BeautifulSoup(res.text, “html.parser”)
time.sleep(10)
for link in soup.find_all(‘a’, href=True):
href = link[‘href’]
print(link)
#*********************************************************esto hacia abajo
#***********************************************es lo que no me funciona
if any(href.endswith(x) for x in [’.zip’]):
time.sleep(10)
print(“Downloading ‘{}’”.format(href))
#remote_file = requests.get(res + href)
dload.save_unzip(format(href))
driver.close()
##############################################################################