#!/usr/bin/env python # Example of using BeautifulSoup4 to parse a site import os from time import sleep from urllib.parse import urlsplit import requests from bs4 import BeautifulSoup session = requests.Session() page_urls = [ "https://slackmojis.com/categories/19-random-emojis", ] for _ in range(1, 34): page_urls.append(f"https://slackmojis.com/categories/19-random-emojis?page={_}") base_dir = "/mnt/tank/media/pictures/emojis/Random/" emoji_images = [] for page_url in page_urls: emoji_page = session.get(page_url) emoji_page.raise_for_status() emoji_parsed = BeautifulSoup(emoji_page.text) for image_list in [_.find_all("img") for _ in emoji_parsed.find_all("li", "emoji")]: emoji_images.extend(image_list) sleep(1) for image_link in [_.get("src") for _ in emoji_images]: with session.get(image_link, stream=True) as request: filename = f"{base_dir}{os.path.basename(urlsplit(image_link).path)}" base_filename = ".".join(filename.split(".")[0:-1]) file_ext = filename.split(".")[-1] dup_count = 1 while os.path.exists(filename): filename = f"{base_filename}_{dup_count}.{file_ext}" dup_count += 1 with open(filename, "wb") as f: for chunk in request.iter_content(chunk_size=8192): f.write(chunk)