I’m working on a web scraper using BeautifulSoup that exports the scraped data to excel as an xlsx. I can export to excel but what I’m getting is: [
Thursday, June 2
, when all I need is the date “Thursday, June 2.” The website I’m getting the data from is seen in the code below. Thanks for the help!!!
#Required libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
#function to extract data from website
def get_data(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
#grayson portal, get tile titles and import into excel!
dates = soup.find_all("div", class_= "news-wrapper")
data = []
#loop to iterate through each title in "titles" above
for date in dates:
item = {}
item["Dates"] = date.find_all("div", class_="news-date")
data.append(item)
return data
#function to create pandas dataframe and export the dataframe to excel
def export_data(data):
df = pd.DataFrame(data)
df.to_excel("dates.xlsx")
if __name__ == '__main__':
data = get_data("https://grayson.edu/")
export_data(data)
print(" ")
print("Extract complete....")
print(" ")