i want to web scrap the “search:pc” part of the website called jumia. i wanted to iterate over all the pages ,but unfortunetly it didn’t work , i don’t why it overwrites the file while it is outside theloop. and by using:
with pd.ExcelWriter("output.xlsx", engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
pop.to_excel(writer, sheet_name="sheet1"
instead of:
with open(f"output.xlsx" ,"a") :
with pd.ExcelWriter("output.xlsx") as writer:
pop.to_excel(writer,sheet_name="sheet2")
but it results in an error:
File "c:\Users\hp\Desktop\python_projects\test3.py", line 40, in <module>
find_computers()
File "c:\Users\hp\Desktop\python_projects\test3.py", line 33, in find_computers
with pd.ExcelWriter("output.xlsx", engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hp\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\io\excel\_openpyxl.py", line 61, in __init__
super().__init__(
File "C:\Users\hp\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\io\excel\_base.py", line 1263, in __init__
self._handles = get_handle(
^^^^^^^^^^^
File "C:\Users\hp\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\io\common.py", line 872, in get_handle
handle = open(handle, ioargs.mode)
^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: 'output.xlsx'
this is my actual code:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import requests
import time
import openpyxl
import os
from bs4 import Tag
def find_computers():
n=1
while n<=50:
html_text=requests.get(f"https://www.jumia.ma/catalog/?q=pc&page={n}#catalog-listing").text
soup=BeautifulSoup(html_text,"lxml")
computers=soup.find_all("a",class_="core")
df={"price": [],"original price": [],"promo":[]}
computer_name_list=[]
for computer in computers:
computer_name=computer.find("h3",class_="name").text.strip()
price=computer.find("div",class_="prc").text.strip()
original_price_element=computer.find("div",class_="old")
original_price=original_price_element.text.strip() if isinstance(original_price_element, Tag) else "N/A"
promo_element = computer.find("div", class_="bdg _dsct _sm")
promo = promo_element.text.strip() if isinstance(promo_element, Tag) else "N/A"
df["price"].append(price)
df["original price"].append(original_price)
df["promo"].append(promo)
computer_name_list.append(computer_name)
n+=1
pop=pd.DataFrame(df,index=computer_name_list)
pd.set_option('colheader_justify', 'center')
with pd.ExcelWriter("output.xlsx") as writer:
pop.to_excel(writer,sheet_name="sheet2")
if __name__=="__main__":
while True:
find_computers()
time_s = 10
time.sleep(6 * time_s)
I will be thankful if someone can guide me.