|
- #!/usr/bin/env python
-
- # Example of using BeautifulSoup4 to parse a site
-
- import os
- from time import sleep
- from urllib.parse import urlsplit
-
- import requests
- from bs4 import BeautifulSoup
-
- session = requests.Session()
-
- page_urls = [
- "https://slackmojis.com/categories/19-random-emojis",
- ]
- for _ in range(1, 34):
- page_urls.append(f"https://slackmojis.com/categories/19-random-emojis?page={_}")
-
- base_dir = "/mnt/tank/media/pictures/emojis/Random/"
- emoji_images = []
-
- for page_url in page_urls:
- emoji_page = session.get(page_url)
- emoji_page.raise_for_status()
- emoji_parsed = BeautifulSoup(emoji_page.text)
- for image_list in [_.find_all("img") for _ in emoji_parsed.find_all("li", "emoji")]:
- emoji_images.extend(image_list)
- sleep(1)
-
- for image_link in [_.get("src") for _ in emoji_images]:
- with session.get(image_link, stream=True) as request:
- filename = f"{base_dir}{os.path.basename(urlsplit(image_link).path)}"
- base_filename = ".".join(filename.split(".")[0:-1])
- file_ext = filename.split(".")[-1]
- dup_count = 1
- while os.path.exists(filename):
- filename = f"{base_filename}_{dup_count}.{file_ext}"
- dup_count += 1
- with open(filename, "wb") as f:
- for chunk in request.iter_content(chunk_size=8192):
- f.write(chunk)
|