init
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
__pycache__/
|
||||
.venv/
|
||||
downloads/
|
59
main.py
Normal file
59
main.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
from tracker import Tracker
|
||||
|
||||
|
||||
@dataclass
|
||||
class ArgumentInterface:
|
||||
drawing_id: str
|
||||
download_path: Path
|
||||
download_format: str
|
||||
download_delay: int
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument('drawing_id', type=str)
|
||||
parser.add_argument('--download-path', type=Path, default=Path('downloads'))
|
||||
parser.add_argument('--download-format', type=str, default='{timestamp}__{drawing_id}.{extension}')
|
||||
parser.add_argument('--download-delay', type=int, default=60)
|
||||
|
||||
args = ArgumentInterface(**vars(parser.parse_args()))
|
||||
|
||||
|
||||
async def main():
|
||||
try:
|
||||
async with Tracker(headless=False) as tracker:
|
||||
await tracker.open_drawing(args.drawing_id)
|
||||
|
||||
while args.download_delay > 0:
|
||||
print(f'downloading {len(tracker.pages)} drawing(s); delay={args.download_delay}')
|
||||
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
|
||||
await asyncio.gather(
|
||||
asyncio.sleep(max(0, args.download_delay)),
|
||||
*[
|
||||
tracker.download(
|
||||
drawing_id,
|
||||
args.download_path.joinpath(
|
||||
args.download_format.format(**{
|
||||
'timestamp': timestamp,
|
||||
'drawing_id': drawing_id,
|
||||
'extension': 'psd' # TODO: no hardcode >:(
|
||||
})
|
||||
)
|
||||
)
|
||||
for drawing_id in tracker.pages.keys()
|
||||
],
|
||||
return_exceptions=True
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
24
requirements.txt
Normal file
24
requirements.txt
Normal file
@@ -0,0 +1,24 @@
|
||||
attrs==25.3.0
|
||||
certifi==2025.8.3
|
||||
cffi==1.17.1
|
||||
greenlet==3.2.3
|
||||
h11==0.16.0
|
||||
idna==3.10
|
||||
ImageHash==4.3.2
|
||||
numpy==2.3.2
|
||||
outcome==1.3.0.post0
|
||||
pillow==11.3.0
|
||||
playwright==1.54.0
|
||||
pycparser==2.22
|
||||
pyee==13.0.0
|
||||
PySocks==1.7.1
|
||||
PyWavelets==1.8.0
|
||||
scipy==1.16.1
|
||||
sniffio==1.3.1
|
||||
sortedcontainers==2.4.0
|
||||
trio==0.30.0
|
||||
trio-websocket==0.12.2
|
||||
typing_extensions==4.14.1
|
||||
urllib3==2.5.0
|
||||
websocket-client==1.8.0
|
||||
wsproto==1.2.0
|
119
tracker.py
Normal file
119
tracker.py
Normal file
@@ -0,0 +1,119 @@
|
||||
import asyncio
|
||||
|
||||
from typing import Dict
|
||||
from contextlib import AbstractAsyncContextManager
|
||||
from pathlib import Path
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
from playwright.async_api import Playwright, Browser, Page
|
||||
from playwright.async_api import Response
|
||||
|
||||
from imagehash import ImageHash, phash
|
||||
|
||||
|
||||
class Tracker(AbstractAsyncContextManager):
|
||||
playwright: Playwright
|
||||
browser: Browser
|
||||
|
||||
pages: Dict[str, Page] = {}
|
||||
pages_lock = asyncio.Lock()
|
||||
pages_active_lock = asyncio.Lock()
|
||||
|
||||
drawing_hashes: Dict[str, ImageHash] = {}
|
||||
|
||||
|
||||
def __init__(self, *browser_args, **browser_kwargs):
|
||||
self.browser_args = browser_args
|
||||
self.browser_kwargs = browser_kwargs
|
||||
|
||||
async def __aenter__(self):
|
||||
self.playwright = await async_playwright().start()
|
||||
self.browser = await self.playwright.firefox.launch(
|
||||
*self.browser_args,
|
||||
**self.browser_kwargs
|
||||
)
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
await self.playwright.stop()
|
||||
|
||||
|
||||
async def open_drawing(self, drawing_id: str) -> Page:
|
||||
async with self.pages_lock:
|
||||
if drawing_id in self.pages:
|
||||
if not self.pages[drawing_id].is_closed():
|
||||
return self.pages[drawing_id]
|
||||
|
||||
await self.pages[drawing_id].close()
|
||||
del self.pages[drawing_id]
|
||||
|
||||
self.pages[drawing_id] = await self.browser.new_page()
|
||||
print(f'open new tab for drawing#{drawing_id}')
|
||||
|
||||
# 앵귤러 프로젝트라 외부에서 데이터 긁어오기 쉽지 않음
|
||||
# `POST /open-socket` 요청으로부터 현 프로젝트의 모든 이미지 아이디 긁어올 예정
|
||||
socket_response = asyncio.Queue()
|
||||
socket_response_count = 0
|
||||
|
||||
async def on_response(response: Response):
|
||||
nonlocal socket_response_count
|
||||
|
||||
if response.request.method != 'POST':
|
||||
return
|
||||
if not response.request.url.endswith('/open-socket'):
|
||||
return
|
||||
|
||||
# 첫 데이터는 자신만을 담고 있기 때문에 두번째 데이터를 가져와야함
|
||||
socket_response_count += 1
|
||||
if socket_response_count < 2:
|
||||
return
|
||||
|
||||
drawing_ids = response.request.post_data_json['ids']
|
||||
await asyncio.gather(*[
|
||||
self.open_drawing(id)
|
||||
for id in drawing_ids
|
||||
])
|
||||
|
||||
await socket_response.put(drawing_ids)
|
||||
|
||||
|
||||
page = self.pages[drawing_id]
|
||||
page.on('response', on_response)
|
||||
|
||||
await page.goto(f'https://magma.com/d/{drawing_id}')
|
||||
|
||||
# 로그인 페이지가 나를 괴롭혀서 마음이 아파요
|
||||
await page.add_style_tag(content='modals-box { display: none !important }')
|
||||
|
||||
# 현 프로젝트의 모든 그림 아이디 가져올 때까지 대기하기
|
||||
await socket_response.get()
|
||||
|
||||
return page
|
||||
|
||||
async def download(
|
||||
self,
|
||||
drawing_id: str,
|
||||
path_to_save: Path = None
|
||||
):
|
||||
page = await self.open_drawing(drawing_id)
|
||||
|
||||
previous_hash = self.drawing_hashes.get(drawing_id)
|
||||
|
||||
async with page.expect_download() as event:
|
||||
# 파이어폭스는 활성화된 창에서만 Hover -> Click 이 가능함
|
||||
async with self.pages_active_lock:
|
||||
await page.click('button[aria-label=File]')
|
||||
await page.hover('.dropdown-submenu:has(button[command=save-psd])')
|
||||
await page.click('button[command=save-psd]')
|
||||
|
||||
print(f'downloading drawing#{drawing_id}')
|
||||
|
||||
file = await event.value
|
||||
|
||||
path = await file.path()
|
||||
|
||||
if path_to_save:
|
||||
path.rename(path_to_save)
|
||||
return path_to_save
|
||||
|
||||
return path
|
Reference in New Issue
Block a user