commit e7f8dd4d258a0e1dd9a7d4068c4fc4b0e567d03f Author: Sangha Lee Date: Mon Aug 4 03:36:45 2025 +0900 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6cc50d9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +.venv/ +downloads/ diff --git a/main.py b/main.py new file mode 100644 index 0000000..07b3464 --- /dev/null +++ b/main.py @@ -0,0 +1,59 @@ +import asyncio +from dataclasses import dataclass +from argparse import ArgumentParser +from pathlib import Path +from datetime import datetime + +from tracker import Tracker + + +@dataclass +class ArgumentInterface: + drawing_id: str + download_path: Path + download_format: str + download_delay: int + +parser = ArgumentParser() +parser.add_argument('drawing_id', type=str) +parser.add_argument('--download-path', type=Path, default=Path('downloads')) +parser.add_argument('--download-format', type=str, default='{timestamp}__{drawing_id}.{extension}') +parser.add_argument('--download-delay', type=int, default=60) + +args = ArgumentInterface(**vars(parser.parse_args())) + + +async def main(): + try: + async with Tracker(headless=False) as tracker: + await tracker.open_drawing(args.drawing_id) + + while args.download_delay > 0: + print(f'downloading {len(tracker.pages)} drawing(s); delay={args.download_delay}') + + timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + + await asyncio.gather( + asyncio.sleep(max(0, args.download_delay)), + *[ + tracker.download( + drawing_id, + args.download_path.joinpath( + args.download_format.format(**{ + 'timestamp': timestamp, + 'drawing_id': drawing_id, + 'extension': 'psd' # TODO: no hardcode >:( + }) + ) + ) + for drawing_id in tracker.pages.keys() + ], + return_exceptions=True + ) + + except Exception as e: + print(e) + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3ff43f5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,24 @@ +attrs==25.3.0 +certifi==2025.8.3 +cffi==1.17.1 +greenlet==3.2.3 +h11==0.16.0 +idna==3.10 +ImageHash==4.3.2 +numpy==2.3.2 +outcome==1.3.0.post0 +pillow==11.3.0 +playwright==1.54.0 +pycparser==2.22 +pyee==13.0.0 +PySocks==1.7.1 +PyWavelets==1.8.0 +scipy==1.16.1 +sniffio==1.3.1 +sortedcontainers==2.4.0 +trio==0.30.0 +trio-websocket==0.12.2 +typing_extensions==4.14.1 +urllib3==2.5.0 +websocket-client==1.8.0 +wsproto==1.2.0 diff --git a/tracker.py b/tracker.py new file mode 100644 index 0000000..5b46a78 --- /dev/null +++ b/tracker.py @@ -0,0 +1,119 @@ +import asyncio + +from typing import Dict +from contextlib import AbstractAsyncContextManager +from pathlib import Path + +from playwright.async_api import async_playwright +from playwright.async_api import Playwright, Browser, Page +from playwright.async_api import Response + +from imagehash import ImageHash, phash + + +class Tracker(AbstractAsyncContextManager): + playwright: Playwright + browser: Browser + + pages: Dict[str, Page] = {} + pages_lock = asyncio.Lock() + pages_active_lock = asyncio.Lock() + + drawing_hashes: Dict[str, ImageHash] = {} + + + def __init__(self, *browser_args, **browser_kwargs): + self.browser_args = browser_args + self.browser_kwargs = browser_kwargs + + async def __aenter__(self): + self.playwright = await async_playwright().start() + self.browser = await self.playwright.firefox.launch( + *self.browser_args, + **self.browser_kwargs + ) + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.playwright.stop() + + + async def open_drawing(self, drawing_id: str) -> Page: + async with self.pages_lock: + if drawing_id in self.pages: + if not self.pages[drawing_id].is_closed(): + return self.pages[drawing_id] + + await self.pages[drawing_id].close() + del self.pages[drawing_id] + + self.pages[drawing_id] = await self.browser.new_page() + print(f'open new tab for drawing#{drawing_id}') + + # 앵귤러 프로젝트라 외부에서 데이터 긁어오기 쉽지 않음 + # `POST /open-socket` 요청으로부터 현 프로젝트의 모든 이미지 아이디 긁어올 예정 + socket_response = asyncio.Queue() + socket_response_count = 0 + + async def on_response(response: Response): + nonlocal socket_response_count + + if response.request.method != 'POST': + return + if not response.request.url.endswith('/open-socket'): + return + + # 첫 데이터는 자신만을 담고 있기 때문에 두번째 데이터를 가져와야함 + socket_response_count += 1 + if socket_response_count < 2: + return + + drawing_ids = response.request.post_data_json['ids'] + await asyncio.gather(*[ + self.open_drawing(id) + for id in drawing_ids + ]) + + await socket_response.put(drawing_ids) + + + page = self.pages[drawing_id] + page.on('response', on_response) + + await page.goto(f'https://magma.com/d/{drawing_id}') + + # 로그인 페이지가 나를 괴롭혀서 마음이 아파요 + await page.add_style_tag(content='modals-box { display: none !important }') + + # 현 프로젝트의 모든 그림 아이디 가져올 때까지 대기하기 + await socket_response.get() + + return page + + async def download( + self, + drawing_id: str, + path_to_save: Path = None + ): + page = await self.open_drawing(drawing_id) + + previous_hash = self.drawing_hashes.get(drawing_id) + + async with page.expect_download() as event: + # 파이어폭스는 활성화된 창에서만 Hover -> Click 이 가능함 + async with self.pages_active_lock: + await page.click('button[aria-label=File]') + await page.hover('.dropdown-submenu:has(button[command=save-psd])') + await page.click('button[command=save-psd]') + + print(f'downloading drawing#{drawing_id}') + + file = await event.value + + path = await file.path() + + if path_to_save: + path.rename(path_to_save) + return path_to_save + + return path