From 69a5fd2e97dcc0bedbf444f5ca21dff44135fb77 Mon Sep 17 00:00:00 2001 From: Sangha Lee Date: Mon, 4 Aug 2025 12:31:10 +0900 Subject: [PATCH] refactor: clean up variable names --- models/post.py | 8 +++---- utils/middlewares.py | 2 +- utils/scraper.py | 50 ++++++++++++++++++++++---------------------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/models/post.py b/models/post.py index 70f1a7f..df2580b 100644 --- a/models/post.py +++ b/models/post.py @@ -20,10 +20,10 @@ class Attachment: @dataclass class Post: id: int - boardId: str - boardPath: BoardPath - authorId: Optional[str] = None - authorName: Optional[str] = None + board_id: str + board_path: BoardPath + author_id: Optional[str] = None + author_name: Optional[str] = None category: Optional[str] = None title: Optional[str] = None body: Optional[Tag] = None diff --git a/utils/middlewares.py b/utils/middlewares.py index 785d9c7..90a68e2 100644 --- a/utils/middlewares.py +++ b/utils/middlewares.py @@ -2,7 +2,7 @@ import asyncio from aiohttp import ClientRequest, ClientResponse, ClientHandlerType -class SemaphoreMiddleware(asyncio.Semaphore): +class Semaphore(asyncio.Semaphore): async def __call__(self, req: ClientRequest, handler: ClientHandlerType) -> ClientResponse: async with self: return await handler(req) diff --git a/utils/scraper.py b/utils/scraper.py index cc4ed0b..7a63a5a 100644 --- a/utils/scraper.py +++ b/utils/scraper.py @@ -37,10 +37,10 @@ class Scraper(ClientSession): async def list( self, - boardId: str, - boardPath: BoardPath = 'board', + board_id: str, + board_path: BoardPath = 'board', page: int = 1, - categoryId: int = 0, + category_id: int = 0, only_recommended: bool = False, only_notice: bool = False, search_type: Optional[SearchType] = None, @@ -50,20 +50,20 @@ class Scraper(ClientSession): """ 특정 게시판으로부터 특정 조건에 맞는 게시글 목록을 가져옵니다 - :param boardId: 게시판 아이디 - :param boardPath: 게시판 경로(종류) + :param board_id: 게시판 아이디 + :param board_path: 게시판 경로(종류) :param page: 페이지 번호 - :param categoryId: 말머리 아이디 + :param category_id: 말머리 아이디 :param only_recommended: 개념글 게시글만 조회할지? :param only_notice: 공지 게시글만 조회할지? :param search_type: 검색 종류 :param search_position: 검색 지점 :param search_value: 검색어 """ - url = f'https://m.dcinside.com/{boardPath}/{boardId}' + url = f'https://m.dcinside.com/{board_path}/{board_id}' params = { 'page': page, - 'headid': categoryId, + 'headid': category_id, 'recommend': only_recommended and '1' or '0', 'notice': only_notice and '1' or '0', 's_type': search_type or '', @@ -78,8 +78,8 @@ class Scraper(ClientSession): return [ Post( id=int(re.findall(r'/\d+', tag.select_one('a[href]:first-child')['href'])[0][1:]), - boardId=boardId, - boardPath=boardPath + board_id=board_id, + board_path=board_path ) for tag in document.select('.gall-detail-lnktb') ] @@ -90,35 +90,35 @@ class Scraper(ClientSession): :param post: 조회할 게시글 인스턴스 """ - async with self.get(f'https://m.dcinside.com/{post.boardPath}/{post.boardId}/{post.id}') as response: + async with self.get(f'https://m.dcinside.com/{post.board_path}/{post.board_id}/{post.id}') as response: html = await response.text() document = BeautifulSoup(html, 'lxml') # 상단 제목 요소는 `li`로 나누어져있고 무슨 지랄을 해도 정확히 2개임 # 만약 아니라면 어처피 파싱 무결성 전체가 깨질테니 예외 처리는 나도 몰?루 - authorTag, timestampTag, *_ = document.select('.gallview-tit-box .ginfo2 > li') - authorAnchorTag = authorTag.select_one('a') + author_tag, timestamp_tag, *_ = document.select('.gallview-tit-box .ginfo2 > li') + author_anchor_tag = author_tag.select_one('a') # 작성일 파싱 post.created_at = ( datetime - .strptime(timestampTag.get_text(strip=True), '%Y.%m.%d %H:%M') + .strptime(timestamp_tag.get_text(strip=True), '%Y.%m.%d %H:%M') .replace(tzinfo=ZoneInfo('Asia/Seoul')) ) # 작성자 정보 파싱 - if authorAnchorTag: + if author_anchor_tag: # 작성자 요소에 앵커 태그가 있다면 갤로그가 존재하는 상태임 - post.authorId = re.findall(r'\/\w+$', authorAnchorTag['href'])[0][1:] - post.authorName = authorAnchorTag.get_text(strip=True) + post.author_id = re.findall(r'\/\w+$', author_anchor_tag['href'])[0][1:] + post.author_name = author_anchor_tag.get_text(strip=True) else: - authorParts = authorTag.get_text(strip=True).split('(') - post.authorId = authorParts.pop()[:-1].strip() # 123.123) -> 123.123 - post.authorName = authorParts.pop().strip() + author_parts = author_tag.get_text(strip=True).split('(') + post.author_id = author_parts.pop()[:-1].strip() # 123.123) -> 123.123 + post.author_name = author_parts.pop().strip() # 모바일 웹에서 말머리와 제목은 `\n`으로 분리되어있음 - titleTexts = ( + title_texts = ( document .select_one('.gallview-tit-box .tit') .get_text(strip=True) @@ -126,10 +126,10 @@ class Scraper(ClientSession): ) # 제목과 말머리 파싱 - post.title = titleTexts.pop().strip() + post.title = title_texts.pop().strip() - if titleTexts: - post.category = titleTexts.pop()[1:~1].strip() # [XX] -> XX + if title_texts: + post.category = title_texts.pop()[1:~1].strip() # [XX] -> XX # 본문 파싱 post.body = document.select_one('.thum-txtin') @@ -138,7 +138,7 @@ class Scraper(ClientSession): for tag in post.body.select('script, style'): tag.extract() - print(f'{post.boardId}/{post.id}: {post.title}') + print(f'{post.board_id}/{post.id}: {post.title}') async def fetch_voice(self, id: str): """