refactor: clean up variable names

fix: forgot to store file hash
2025-08-04 12:31:10 +09:00 · 2025-08-04 11:54:48 +09:00
3 changed files with 33 additions and 31 deletions
--- a/models/post.py
+++ b/models/post.py
@@ -20,10 +20,10 @@ class Attachment:
@dataclass
 class Post:
    id: int
-    boardId: str
-    boardPath: BoardPath
-    authorId: Optional[str] = None
-    authorName: Optional[str] = None
+    board_id: str
+    board_path: BoardPath
+    author_id: Optional[str] = None
+    author_name: Optional[str] = None
    category: Optional[str] = None
    title: Optional[str] = None
    body: Optional[Tag] = None
--- a/utils/middlewares.py
+++ b/utils/middlewares.py
@@ -2,7 +2,7 @@ import asyncio

 from aiohttp import ClientRequest, ClientResponse, ClientHandlerType

-class SemaphoreMiddleware(asyncio.Semaphore):
+class Semaphore(asyncio.Semaphore):
    async def __call__(self, req: ClientRequest, handler: ClientHandlerType) -> ClientResponse:
        async with self:
            return await handler(req)
--- a/utils/scraper.py
+++ b/utils/scraper.py
@@ -37,10 +37,10 @@ class Scraper(ClientSession):

    async def list(
        self, 
-        boardId: str,
-        boardPath: BoardPath = 'board',
+        board_id: str,
+        board_path: BoardPath = 'board',
        page: int = 1,
-        categoryId: int = 0,
+        category_id: int = 0,
        only_recommended: bool = False,
        only_notice: bool = False,
        search_type: Optional[SearchType] = None,
@@ -50,20 +50,20 @@ class Scraper(ClientSession):
        """
        특정 게시판으로부터 특정 조건에 맞는 게시글 목록을 가져옵니다

-        :param boardId:             게시판 아이디
-        :param boardPath:           게시판 경로(종류)
+        :param board_id:            게시판 아이디
+        :param board_path:          게시판 경로(종류)
        :param page:                페이지 번호
-        :param categoryId:          말머리 아이디
+        :param category_id:         말머리 아이디
        :param only_recommended:    개념글 게시글만 조회할지?
        :param only_notice:         공지 게시글만 조회할지?
        :param search_type:         검색 종류
        :param search_position:     검색 지점
        :param search_value:        검색어
        """
-        url = f'https://m.dcinside.com/{boardPath}/{boardId}'
+        url = f'https://m.dcinside.com/{board_path}/{board_id}'
        params = {
            'page': page,
-            'headid': categoryId,
+            'headid': category_id,
            'recommend': only_recommended and '1' or '0',
            'notice': only_notice and '1' or '0',
            's_type': search_type or '',
@@ -78,8 +78,8 @@ class Scraper(ClientSession):
        return [
            Post(
                id=int(re.findall(r'/\d+', tag.select_one('a[href]:first-child')['href'])[0][1:]),
-                boardId=boardId,
-                boardPath=boardPath
+                board_id=board_id,
+                board_path=board_path
            )
            for tag in document.select('.gall-detail-lnktb')
        ]
@@ -90,35 +90,35 @@ class Scraper(ClientSession):

        :param post:    조회할 게시글 인스턴스
        """
-        async with self.get(f'https://m.dcinside.com/{post.boardPath}/{post.boardId}/{post.id}') as response:
+        async with self.get(f'https://m.dcinside.com/{post.board_path}/{post.board_id}/{post.id}') as response:
            html = await response.text()
            document = BeautifulSoup(html, 'lxml')

        # 상단 제목 요소는 `li`로 나누어져있고 무슨 지랄을 해도 정확히 2개임
        # 만약 아니라면 어처피 파싱 무결성 전체가 깨질테니 예외 처리는 나도 몰?루
-        authorTag, timestampTag, *_ = document.select('.gallview-tit-box .ginfo2 > li')
-        authorAnchorTag = authorTag.select_one('a')
+        author_tag, timestamp_tag, *_ = document.select('.gallview-tit-box .ginfo2 > li')
+        author_anchor_tag = author_tag.select_one('a')

        # 작성일 파싱
        post.created_at = (
            datetime
-                .strptime(timestampTag.get_text(strip=True), '%Y.%m.%d %H:%M')
+                .strptime(timestamp_tag.get_text(strip=True), '%Y.%m.%d %H:%M')
                .replace(tzinfo=ZoneInfo('Asia/Seoul'))

        )

        # 작성자 정보 파싱
-        if authorAnchorTag:
+        if author_anchor_tag:
            # 작성자 요소에 앵커 태그가 있다면 갤로그가 존재하는 상태임
-            post.authorId = re.findall(r'\/\w+$', authorAnchorTag['href'])[0][1:]
-            post.authorName = authorAnchorTag.get_text(strip=True)
+            post.author_id = re.findall(r'\/\w+$', author_anchor_tag['href'])[0][1:]
+            post.author_name = author_anchor_tag.get_text(strip=True)
        else:
-            authorParts = authorTag.get_text(strip=True).split('(')
-            post.authorId = authorParts.pop()[:-1].strip() # 123.123) -> 123.123
-            post.authorName = authorParts.pop().strip()
+            author_parts = author_tag.get_text(strip=True).split('(')
+            post.author_id = author_parts.pop()[:-1].strip() # 123.123) -> 123.123
+            post.author_name = author_parts.pop().strip()

        # 모바일 웹에서 말머리와 제목은 `\n`으로 분리되어있음
-        titleTexts = (
+        title_texts = (
            document
                .select_one('.gallview-tit-box .tit')
                .get_text(strip=True)
@@ -126,10 +126,10 @@ class Scraper(ClientSession):
        )

        # 제목과 말머리 파싱
-        post.title = titleTexts.pop().strip()
+        post.title = title_texts.pop().strip()

-        if titleTexts:
-            post.category = titleTexts.pop()[1:~1].strip() # [XX] -> XX
+        if title_texts:
+            post.category = title_texts.pop()[1:~1].strip() # [XX] -> XX

        # 본문 파싱
        post.body = document.select_one('.thum-txtin')
@@ -138,7 +138,7 @@ class Scraper(ClientSession):
        for tag in post.body.select('script, style'):
            tag.extract()

-        print(f'{post.boardId}/{post.id}: {post.title}')
+        print(f'{post.board_id}/{post.id}: {post.title}')

    async def fetch_voice(self, id: str):
        """
@@ -217,7 +217,9 @@ class Scraper(ClientSession):
                attachment.source_filename = response.content_disposition.filename
                attachment.source_suffix = Path(attachment.source_filename).suffix

-            saved_path = save_dir / f'{hash.hexdigest()}{attachment.source_suffix}'
+            attachment.hash = hash.hexdigest()
+
+            saved_path = save_dir / f'{attachment.hash}{attachment.source_suffix}'
            
            # 임시로 받은 파일 옮기기
            if not saved_path.exists():
Author	SHA1	Message	Date
Sangha Lee	69a5fd2e97	refactor: clean up variable names	2025-08-04 12:31:10 +09:00
Sangha Lee	6987c26b11	fix: forgot to store file hash	2025-08-04 11:54:48 +09:00