|
|
|
|
|
|
|
|
|
|
|
#!/usr/bin/env python |
|
|
|
|
|
|
|
|
|
|
|
# Example of using BeautifulSoup4 to parse a site |
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
|
|
|
from time import sleep |
|
|
|
|
|
from urllib.parse import urlsplit |
|
|
|
|
|
|
|
|
|
|
|
import requests |
|
|
|
|
|
from bs4 import BeautifulSoup |
|
|
|
|
|
|
|
|
|
|
|
session = requests.Session() |
|
|
|
|
|
|
|
|
|
|
|
page_urls = [ |
|
|
|
|
|
"https://slackmojis.com/categories/19-random-emojis", |
|
|
|
|
|
] |
|
|
|
|
|
for _ in range(1, 34): |
|
|
|
|
|
page_urls.append(f"https://slackmojis.com/categories/19-random-emojis?page={_}") |
|
|
|
|
|
|
|
|
|
|
|
base_dir = "/mnt/tank/media/pictures/emojis/Random/" |
|
|
|
|
|
emoji_images = [] |
|
|
|
|
|
|
|
|
|
|
|
for page_url in page_urls: |
|
|
|
|
|
emoji_page = session.get(page_url) |
|
|
|
|
|
emoji_page.raise_for_status() |
|
|
|
|
|
emoji_parsed = BeautifulSoup(emoji_page.text) |
|
|
|
|
|
for image_list in [_.find_all("img") for _ in emoji_parsed.find_all("li", "emoji")]: |
|
|
|
|
|
emoji_images.extend(image_list) |
|
|
|
|
|
sleep(1) |
|
|
|
|
|
|
|
|
|
|
|
for image_link in [_.get("src") for _ in emoji_images]: |
|
|
|
|
|
with session.get(image_link, stream=True) as request: |
|
|
|
|
|
filename = f"{base_dir}{os.path.basename(urlsplit(image_link).path)}" |
|
|
|
|
|
base_filename = ".".join(filename.split(".")[0:-1]) |
|
|
|
|
|
file_ext = filename.split(".")[-1] |
|
|
|
|
|
dup_count = 1 |
|
|
|
|
|
while os.path.exists(filename): |
|
|
|
|
|
filename = f"{base_filename}_{dup_count}.{file_ext}" |
|
|
|
|
|
dup_count += 1 |
|
|
|
|
|
with open(filename, "wb") as f: |
|
|
|
|
|
for chunk in request.iter_content(chunk_size=8192): |
|
|
|
|
|
f.write(chunk) |