I’m creating a newspaper scraper with Newspaper3k. I was looking into implementing multi-threading to speed up the download process. I have tried using ThreadPoolExecutor from concurrent but I don’t see any time difference in execution time. Can anyone take a look at my code and give me some guidance. Thank you.
from newspaper import Article
import timeit
from concurrent.futures import ThreadPoolExecutor
import csv, random, sys
article_titles = []
article_text = []
article_author = []
article_date = []
article_urls = []
URLS = [
"https://www.newsday.com/classifieds/cars/bmw-general-motors-among-manufacturers-using-robots-for-efficiency-1.6680097",
"https://www.courant.com/business/hc-robots-take-factory-jobs-20161102-story.html",
"https://www.chicagotribune.com/opinion/commentary/ct-ford-motors-mexico-jobs-robots-trump-20170105-story.html",
"https://www.automationworld.com/factory/robotics/blog/13318590/the-newest-robotics-in-car-manufacturing",
"https://www.stltoday.com/business/local/automation-and-robots-on-the-rise-reshaping-the-workplace/article_f99e6ba7-17b8-586d-901e-1ac28e87501c.html",
"https://www.stltoday.com/business/local/automation-and-robots-on-the-rise-reshaping-the-workplace/article_f99e6ba7-17b8-586d-901e-1ac28e87501c.html",
"https://www.stltoday.com/business/local/automation-and-robots-on-the-rise-reshaping-the-workplace/article_f99e6ba7-17b8-586d-901e-1ac28e87501c.html",
"https://www.arkansasonline.com/news/2016/nov/07/blame-put-on-robots-for-job-losses-2016/",
"https://www.chicagotribune.com/business/ct-biz-cobots-manufacturing-replace-human-workers-20181010-story.html",
"https://www.assemblymag.com/articles/94569-gm-to-cut-workers-idle-us-manufacturing-plants",
"https://www.robotics.org/content-detail.cfm/Industrial-Robotics-Industry-Insights/Readying-Your-Robots-and-Workforce-for-Industry-4-0/content_id/6553",
"https://www.engineering.com/AdvancedManufacturing/ArticleID/13540/A-History-of-Collaborative-Robots-From-Intelligent-Lift-Assists-to-Cobots.aspx",
"https://www.industryweek.com/operations/article/22024809/reliability-approach-to-asset-uptime",
"https://www.roboticstomorrow.com/article/2016/06/the-abc-of-rpa-what-is-robotics-and-automation-in-the-office/8278",
"https://www.engineering.com/AdvancedManufacturing/ArticleID/13540/A-History-of-Collaborative-Robots-From-Intelligent-Lift-Assists-to-Cobots.aspx",
"https://www.industryweek.com/operations/article/22024809/reliability-approach-to-asset-uptime",
"https://www.roboticstomorrow.com/article/2016/06/the-abc-of-rpa-what-is-robotics-and-automation-in-the-office/8278",
"https://psmag.com/economics/the-future-of-work-manufacturing-is-now-for-the-robots-the-middle-class-needs-more",
"https://www.thefiscaltimes.com/Articles/2013/01/14/The-Rise-of-Robots-and-Decline-of-Jobs-Is-Here",
"https://www.roboticsbusinessreview.com/financial/robo-global-etf-nyse-bell/",
"https://www.discovermagazine.com/technology/beware-the-blue-collar-bots",
"https://www.digitaltrends.com/cool-tech/google-see-robots-anyway/",
"https://www.isa.org/intech/20160601/",
"https://robohub.org/the-evolution-of-assembly-lines-a-brief-history/",
"https://roboticsandautomationnews.com/2016/06/21/automatica-the-greatest-show-of-robotics-technology-on-earth-probably/5203/",
]
def parse():
for url in URLS:
try:
article = Article(url)
article.download()
article.parse()
article_titles.append(article.title)
except Exception as e:
print(e)
print("failed to retrive article")
def main():
executor = ThreadPoolExecutor(max_workers=4)
task1 = executor.submit(parse())
if __name__ == "__main__":
start = timeit.default_timer()
main()
print(article_titles)
stop = timeit.default_timer()
print(stop - start)