diff --git a/README.md b/README.md index 4d47f7e..de9f5d8 100644 --- a/README.md +++ b/README.md @@ -1 +1,68 @@ -# backend-python \ No newline at end of file +# 다양성 평가 리포트 웹사이트 백엔드 + +![CD Status](https://github.com/NewWays-TechForImpactKAIST/backend-python/actions/workflows/build-dev-image.yaml/badge.svg) + +FastAPI로 개발되는 다양성 평가 리포트 웹사이트의 백엔드 레포지토리입니다. + +## Docs + +본 프로젝트는 Swagger를 사용하여 API 문서를 작성하고 있습니다. +[Swagger](https://diversity-api.tech4impact.kr/docs) 에서 API Endpoints 들을 확인하고 테스트 할 수 있습니다. + +## Setup + +이 프로젝트를 실행하기 위해서는 Python(v3.9 이상)이 설치되어 있어야 합니다. + +### 개발환경 설정 과정 + +1. 파이썬 가상환경 생성 + - 아래 명령을 실행하여 파이썬 가상환경을 생성합니다. + ```bash + cd ~ && virtualenv newways --python=3.10 + ``` +2. 가상환경 활성화 + - 아래 명령을 실행하여 가상환경을 활성화합니다. + ```bash + source ~/newways/bin/activate + ``` +3. 레포지토리 클론 + - 아래 명령을 실행하여 레포지토리를 클론합니다. + ```bash + git clone https://github.com/NewWays-TechForImpactKAIST/backend-python + ``` +4. 필요한 패키지 설치 + - requirements.txt에 명시된 패키지를 설치합니다. + ```bash + pip install -r requirements.txt + ``` +5. 환경 변수 설정 + - `.env.example` 파일을 복사하여 `.env` 파일을 생성합니다. + ```bash + cp .env.example .env + ``` + - `.env` 파일을 열어 환경변수를 필요에 따라 변경합니다. +6. uvicorn 실행 + - uvicorn을 사용해 fastapi를 실행합니다. + ```bash + uvicorn main:app --host HOST --port PORT + ``` + +### 배포 과정 + +이 레포의 main 브랜치에 새 커밋이 생성될 때마다, GitHub Actions를 통해 배포용 Docker 이미지가 빌드됩니다. +이 Docker 이미지를 사용하여 서비스를 배포할 수 있습니다. + +1. 환경변수 설정 + - `.env.example` 파일을 복사하여 `.env` 파일을 생성합니다. + ```bash + cp .env.example .env + ``` + - `.env` 파일을 열어 환경변수를 필요에 따라 변경합니다. + +2. 백엔드 컨테이너 배포 + - 컨테이너를 아래 명령으로 생성합니다. + ```bash + docker-compose -f docker-compose.dev.yml up -d + ``` + - `newways-watchtower`는 1분에 한 번씩 새 백엔드 이미지가 있는지 확인하여, 백엔드 컨테이너를 주기적으로 업데이트하는 역할을 수행합니다. + diff --git a/main.py b/main.py index 61f00a7..21b0d14 100644 --- a/main.py +++ b/main.py @@ -1,12 +1,19 @@ from fastapi import FastAPI, Request from dotenv import load_dotenv -from routers import scrapResult, commonInfo +from routers import ( + commonInfo, + ageHist, + scrapResultLocal, + scrapResultMetro, + scrapResultNational, +) from contextlib import asynccontextmanager from typing import Dict from model import MongoDB from model.ResponseType import ChartResponse, GenderInfo, PartyInfo, AgeInfo from fastapi.middleware.cors import CORSMiddleware + @asynccontextmanager async def initMongo(app: FastAPI): MongoDB.client.connect() @@ -20,16 +27,20 @@ async def initMongo(app: FastAPI): origin = [ "http://localhost:5173", - "https://diversity.tech4impact.kr/" + "https://diversity.tech4impact.kr", ] app.add_middleware( CORSMiddleware, allow_origins=origin, + allow_origin_regex="https://.*\.netlify\.app", allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) -app.include_router(scrapResult.router) +app.include_router(scrapResultLocal.router) +app.include_router(scrapResultMetro.router) +app.include_router(scrapResultNational.router) app.include_router(commonInfo.router) +app.include_router(ageHist.router) diff --git a/model/AgeHist.py b/model/AgeHist.py new file mode 100644 index 0000000..f02659c --- /dev/null +++ b/model/AgeHist.py @@ -0,0 +1,34 @@ +from pydantic import BaseModel +from enum import StrEnum + + +class AgeHistDataTypes(StrEnum): + elected = "elected" + candidate = "candidate" + + +class AgeHistMethodTypes(StrEnum): + equal = "equal" + kmeans = "kmeans" + + +class AgeHistDataPoint(BaseModel): + minAge: int + maxAge: int + count: int + ageGroup: int + + +class NationalAgeHistData(BaseModel): + data: list[AgeHistDataPoint] + + +class MetroAgeHistData(BaseModel): + metroId: int + data: list[AgeHistDataPoint] + + +class LocalAgeHistData(BaseModel): + metroId: int + localId: int + data: list[AgeHistDataPoint] diff --git a/model/BasicResponse.py b/model/BasicResponse.py index f6523bb..06d1217 100644 --- a/model/BasicResponse.py +++ b/model/BasicResponse.py @@ -3,6 +3,8 @@ SUCCESS = 200 REGION_CODE_ERR = 400 +COLLECTION_NOT_EXIST_ERR = 600 +NO_DATA_ERROR = 800 class MessageResponse(BaseModel): @@ -14,3 +16,12 @@ class ErrorResponse(BaseModel): error: str code: int message: str + + +NO_DATA_ERROR_RESPONSE: ErrorResponse = ErrorResponse.model_validate( + { + "error": "NoDataError", + "code": NO_DATA_ERROR, + "message": "No data was retrieved with the provided input.", + } +) diff --git a/model/MongoDB.py b/model/MongoDB.py index 7228259..afc663e 100644 --- a/model/MongoDB.py +++ b/model/MongoDB.py @@ -10,11 +10,13 @@ def __init__(self): self.client = None self.council_db = None self.district_db = None + self.stats_db = None def connect(self): self.client = AsyncIOMotorClient(os.getenv("MONGO_CONNECTION_URI")) self.council_db = AsyncIOMotorDatabase(self.client, "council") self.district_db = AsyncIOMotorDatabase(self.client, "district") + self.stats_db = AsyncIOMotorDatabase(self.client, "stats") def close(self): self.client.close() diff --git a/model/ScrapResult.py b/model/ScrapResultCommon.py similarity index 77% rename from model/ScrapResult.py rename to model/ScrapResultCommon.py index d53b501..1438b26 100644 --- a/model/ScrapResult.py +++ b/model/ScrapResultCommon.py @@ -14,21 +14,6 @@ class FactorType(StrEnum): party = "party" -# ============================================== -# = Template Data Types = -# ============================================== -class GenderTemplateData(BaseModel): - genderDiversityIndex: float - - -class AgeTemplateData(BaseModel): - ageDiversityIndex: float - - -class PartyTemplateData(BaseModel): - partyDiversityIndex: float - - # ============================================== # = Chart Data Types = # ============================================== @@ -39,7 +24,7 @@ class GenderChartDataPoint(BaseModel): class AgeChartDataPoint(BaseModel): minAge: int # 닫힌 구간 - maxAge: int # 닫힌 구간 + maxAge: int # 열린 구간 count: int diff --git a/model/ScrapResultLocal.py b/model/ScrapResultLocal.py new file mode 100644 index 0000000..bab4e3f --- /dev/null +++ b/model/ScrapResultLocal.py @@ -0,0 +1,78 @@ +from pydantic import BaseModel + + +# ============================================== +# = Template Data Types = +# ============================================== +class GenderTemplateDataLocal(BaseModel): + class GenderTemplateDataPoint(BaseModel): + year: int + malePop: int + femalePop: int + + metroId: int + localId: int + genderDiversityIndex: float + current: GenderTemplateDataPoint + currentCandidate: GenderTemplateDataPoint + prev: GenderTemplateDataPoint + prevCandidate: GenderTemplateDataPoint + meanMalePop: float + meanFemalePop: float + + +class AgeTemplateDataLocal(BaseModel): + class AgeRankingParagraphData(BaseModel): + class AgeRankingAllIndices(BaseModel): + localId: int + rank: int + ageDiversityIndex: float + + ageDiversityIndex: float + allIndices: list[AgeRankingAllIndices] + + class AgeIndexHistoryParagraphData(BaseModel): + class AgeIndexHistoryIndexData(BaseModel): + year: int + unit: int + candidateCount: int + candidateDiversityIndex: float + candidateDiversityRank: int + electedDiversityIndex: float + electedDiversityRank: int + + mostRecentYear: int + history: list[AgeIndexHistoryIndexData] + + class AgeHistogramParagraphData(BaseModel): + class AgeHistogramAreaData(BaseModel): + localId: int + firstQuintile: int + lastQuintile: int + + year: int + candidateCount: int + electedCount: int + firstQuintile: int + lastQuintile: int + divArea: AgeHistogramAreaData + uniArea: AgeHistogramAreaData + + metroId: int + localId: int + rankingParagraph: AgeRankingParagraphData + indexHistoryParagraph: AgeIndexHistoryParagraphData + ageHistogramParagraph: AgeHistogramParagraphData + + +class PartyTemplateDataLocal(BaseModel): + class PartyCountDataPoint(BaseModel): + party: str + count: int + + metroId: int + localId: int + partyDiversityIndex: float + prevElected: list[PartyCountDataPoint] + currentElected: list[PartyCountDataPoint] + currentCandidate: list[PartyCountDataPoint] diff --git a/model/ScrapResultMetro.py b/model/ScrapResultMetro.py new file mode 100644 index 0000000..90dd843 --- /dev/null +++ b/model/ScrapResultMetro.py @@ -0,0 +1,75 @@ +from pydantic import BaseModel + + +# ============================================== +# = Template Data Types = +# ============================================== +class GenderTemplateDataMetro(BaseModel): + class GenderTemplateDataPoint(BaseModel): + year: int + malePop: int + femalePop: int + + metroId: int + genderDiversityIndex: float + current: GenderTemplateDataPoint + currentCandidate: GenderTemplateDataPoint + prev: GenderTemplateDataPoint + prevCandidate: GenderTemplateDataPoint + meanMalePop: float + meanFemalePop: float + + +class AgeTemplateDataMetro(BaseModel): + class AgeRankingParagraphData(BaseModel): + class AgeRankingAllIndices(BaseModel): + metroId: int + rank: int + ageDiversityIndex: float + + ageDiversityIndex: float + allIndices: list[AgeRankingAllIndices] + + class AgeIndexHistoryParagraphData(BaseModel): + class AgeIndexHistoryIndexData(BaseModel): + year: int + unit: int + candidateCount: int + candidateDiversityIndex: float + candidateDiversityRank: int + electedDiversityIndex: float + electedDiversityRank: int + + mostRecentYear: int + history: list[AgeIndexHistoryIndexData] + + class AgeHistogramParagraphData(BaseModel): + class AgeHistogramAreaData(BaseModel): + metroId: int + firstQuintile: int + lastQuintile: int + + year: int + candidateCount: int + electedCount: int + firstQuintile: int + lastQuintile: int + divArea: AgeHistogramAreaData + uniArea: AgeHistogramAreaData + + metroId: int + rankingParagraph: AgeRankingParagraphData + indexHistoryParagraph: AgeIndexHistoryParagraphData + ageHistogramParagraph: AgeHistogramParagraphData + + +class PartyTemplateDataMetro(BaseModel): + class PartyCountDataPoint(BaseModel): + party: str + count: int + + metroId: int + partyDiversityIndex: float + prevElected: list[PartyCountDataPoint] + currentElected: list[PartyCountDataPoint] + currentCandidate: list[PartyCountDataPoint] diff --git a/model/ScrapResultNational.py b/model/ScrapResultNational.py new file mode 100644 index 0000000..0c0fd74 --- /dev/null +++ b/model/ScrapResultNational.py @@ -0,0 +1,57 @@ +from pydantic import BaseModel + + +# ============================================== +# = Template Data Types = +# ============================================== +class GenderTemplateDataNational(BaseModel): + class GenderTemplateDataPoint(BaseModel): + year: int + malePop: int + femalePop: int + + genderDiversityIndex: float + current: GenderTemplateDataPoint + currentCandidate: GenderTemplateDataPoint + prev: GenderTemplateDataPoint + prevCandidate: GenderTemplateDataPoint + + +class AgeTemplateDataNational(BaseModel): + class AgeRankingParagraphData(BaseModel): + ageDiversityIndex: float + + class AgeIndexHistoryParagraphData(BaseModel): + class AgeIndexHistoryIndexData(BaseModel): + year: int + unit: int + candidateCount: int + candidateDiversityIndex: float + candidateDiversityRank: int + electedDiversityIndex: float + electedDiversityRank: int + + mostRecentYear: int + history: list[AgeIndexHistoryIndexData] + + class AgeHistogramParagraphData(BaseModel): + year: int + candidateCount: int + electedCount: int + firstQuintile: int + lastQuintile: int + + rankingParagraph: AgeRankingParagraphData + indexHistoryParagraph: AgeIndexHistoryParagraphData + ageHistogramParagraph: AgeHistogramParagraphData + + +class PartyTemplateDataNational(BaseModel): + class PartyCountDataPoint(BaseModel): + party: str + count: int + + partyDiversityIndex: float + prevElected: list[PartyCountDataPoint] + currentElected: list[PartyCountDataPoint] + currentCandidate: list[PartyCountDataPoint] diff --git a/requirements.txt b/requirements.txt index c2d4271..a24bbbd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,6 @@ pydantic_core==2.10.1 pymongo==4.6.0 python-dotenv==1.0.0 sniffio==1.3.0 -starlette==0.27.0 +starlette==0.40.0 typing_extensions==4.8.0 uvicorn==0.24.0.post1 diff --git a/routers/ageHist.py b/routers/ageHist.py new file mode 100644 index 0000000..a1e58db --- /dev/null +++ b/routers/ageHist.py @@ -0,0 +1,128 @@ +from fastapi import APIRouter +from model import BasicResponse, MongoDB +from model.AgeHist import ( + AgeHistDataTypes, + AgeHistMethodTypes, + LocalAgeHistData, + MetroAgeHistData, + NationalAgeHistData, +) + + +router = APIRouter(prefix="/age-hist", tags=["age-hist"]) + + +@router.get("/") +async def getNationalAgeHistData( + ageHistType: AgeHistDataTypes, year: int, method: AgeHistMethodTypes +) -> BasicResponse.ErrorResponse | NationalAgeHistData: + histogram = await MongoDB.client.stats_db["age_hist"].find_one( + { + "councilorType": "national_councilor", + "is_elected": ageHistType == AgeHistDataTypes.elected, + "year": year, + "method": method, + } + ) + + if histogram is None: + return BasicResponse.ErrorResponse.model_validate( + { + "error": "NoDataError", + "code": BasicResponse.NO_DATA_ERROR, + "message": "No data retrieved with the provided input.", + } + ) + + return NationalAgeHistData.model_validate({"data": histogram["data"]}) + + +@router.get("/{metroId}") +async def getMetroAgeHistData( + metroId: int, ageHistType: AgeHistDataTypes, year: int, method: AgeHistMethodTypes +) -> BasicResponse.ErrorResponse | MetroAgeHistData: + if ( + await MongoDB.client.district_db["metro_district"].find_one( + {"metroId": metroId} + ) + is None + ): + return BasicResponse.ErrorResponse.model_validate( + { + "error": "RegionCodeError", + "code": BasicResponse.REGION_CODE_ERR, + "message": f"No metro district with metroId {metroId}.", + } + ) + + histogram = await MongoDB.client.stats_db["age_hist"].find_one( + { + "level": 1, + "councilorType": "metro_councilor", + "is_elected": ageHistType == AgeHistDataTypes.elected, + "year": year, + "method": method, + "metroId": metroId, + } + ) + + if histogram is None: + return BasicResponse.ErrorResponse.model_validate( + { + "error": "NoDataError", + "code": BasicResponse.NO_DATA_ERROR, + "message": "No data retrieved with the provided input.", + } + ) + + return MetroAgeHistData.model_validate( + {"metroId": metroId, "data": histogram["data"]} + ) + + +@router.get("/{metroId}/{localId}") +async def getLocalAgeHistData( + metroId: int, + localId: int, + ageHistType: AgeHistDataTypes, + year: int, + method: AgeHistMethodTypes, +) -> BasicResponse.ErrorResponse | LocalAgeHistData: + if ( + await MongoDB.client.district_db["local_district"].find_one( + {"metroId": metroId, "localId": localId} + ) + is None + ): + return BasicResponse.ErrorResponse.model_validate( + { + "error": "RegionCodeError", + "code": BasicResponse.REGION_CODE_ERR, + "message": f"No local district with metroId {metroId} and localId {localId}.", + } + ) + + histogram = await MongoDB.client.stats_db["age_hist"].find_one( + { + "level": 2, + "councilorType": "local_councilor", + "is_elected": ageHistType == AgeHistDataTypes.elected, + "year": year, + "method": method, + "metroId": metroId, + "localId": localId, + } + ) + + if histogram is None: + return BasicResponse.ErrorResponse.model_validate( + { + "error": "NoDataError", + "code": BasicResponse.NO_DATA_ERROR, + "message": "No data retrieved with the provided input.", + } + ) + + return LocalAgeHistData.model_validate( + {"metroId": metroId, "localId": localId, "data": histogram["data"]} + ) diff --git a/routers/scrapResult.py b/routers/scrapResult.py deleted file mode 100644 index d6d32cc..0000000 --- a/routers/scrapResult.py +++ /dev/null @@ -1,126 +0,0 @@ -from fastapi import APIRouter -from model import BasicResponse, MongoDB, ScrapResult -from utils import diversity -from typing import TypeVar - - -router = APIRouter(prefix="/localCouncil", tags=["localCouncil"]) - -AGE_STAIR = 10 - - -@router.get("/template-data/{metroId}/{localId}") -async def getLocalTemplateData( - metroId: int, localId: int, factor: ScrapResult.FactorType -) -> BasicResponse.ErrorResponse | ScrapResult.GenderTemplateData | ScrapResult.AgeTemplateData | ScrapResult.PartyTemplateData: - if ( - await MongoDB.client.district_db["local_district"].find_one( - {"localId": localId, "metroId": metroId} - ) - is None - ): - return BasicResponse.ErrorResponse.model_validate( - { - "error": "RegionCodeError", - "code": BasicResponse.REGION_CODE_ERR, - "message": f"No local district with metroId {metroId} and localId {localId}.", - } - ) - - councilors = MongoDB.client.council_db["local_councilor"].find({"localId": localId}) - - match factor: - case ScrapResult.FactorType.gender: - gender_list = [councilor["gender"] async for councilor in councilors] - gender_diversity_index = diversity.gini_simpson(gender_list) - return ScrapResult.GenderTemplateData.model_validate( - {"genderDiversityIndex": gender_diversity_index} - ) - - case ScrapResult.FactorType.age: - age_list = [councilor["age"] async for councilor in councilors] - age_diversity_index = diversity.gini_simpson(age_list, stair=AGE_STAIR) - return ScrapResult.AgeTemplateData.model_validate( - {"ageDiversityIndex": age_diversity_index} - ) - - case ScrapResult.FactorType.party: - party_list = [councilor["jdName"] async for councilor in councilors] - party_diversity_index = diversity.gini_simpson(party_list) - return ScrapResult.PartyTemplateData.model_validate( - {"partyDiversityIndex": party_diversity_index} - ) - - -T = TypeVar( - "T", - ScrapResult.GenderChartDataPoint, - ScrapResult.AgeChartDataPoint, - ScrapResult.PartyChartDataPoint, -) - - -@router.get("/chart-data/{metroId}/{localId}") -async def getLocalChartData( - metroId: int, localId: int, factor: ScrapResult.FactorType -) -> BasicResponse.ErrorResponse | ScrapResult.ChartData[T]: - if ( - await MongoDB.client.district_db["local_district"].find_one( - {"localId": localId, "metroId": metroId} - ) - is None - ): - return BasicResponse.ErrorResponse.model_validate( - { - "error": "RegionCodeError", - "code": BasicResponse.REGION_CODE_ERR, - "message": f"No local district with metroId {metroId} and localId {localId}.", - } - ) - - councilors = MongoDB.client.council_db["local_councilor"].find({"localId": localId}) - - match factor: - case ScrapResult.FactorType.gender: - gender_list = [councilor["gender"] async for councilor in councilors] - gender_count = diversity.count(gender_list) - return ScrapResult.ChartData[ - ScrapResult.GenderChartDataPoint - ].model_validate( - { - "data": [ - {"gender": gender, "count": gender_count[gender]} - for gender in gender_count - ] - } - ) - - case ScrapResult.FactorType.age: - age_list = [councilor["age"] async for councilor in councilors] - age_count = diversity.count(age_list, stair=AGE_STAIR) - return ScrapResult.ChartData[ScrapResult.AgeChartDataPoint].model_validate( - { - "data": [ - { - "minAge": age, - "maxAge": age + AGE_STAIR - 1, - "count": age_count[age], - } - for age in age_count - ] - } - ) - - case ScrapResult.FactorType.party: - party_list = [councilor["jdName"] async for councilor in councilors] - party_count = diversity.count(party_list) - return ScrapResult.ChartData[ - ScrapResult.PartyChartDataPoint - ].model_validate( - { - "data": [ - {"party": party, "count": party_count[party]} - for party in party_count - ] - } - ) diff --git a/routers/scrapResultLocal.py b/routers/scrapResultLocal.py new file mode 100644 index 0000000..2b0d1c6 --- /dev/null +++ b/routers/scrapResultLocal.py @@ -0,0 +1,589 @@ +from typing import TypeVar +from fastapi import APIRouter +from model.BasicResponse import ErrorResponse, REGION_CODE_ERR, NO_DATA_ERROR_RESPONSE +from model.MongoDB import client +from model.ScrapResultCommon import ( + GenderChartDataPoint, + AgeChartDataPoint, + PartyChartDataPoint, + FactorType, + ChartData, +) +from model.ScrapResultLocal import ( + GenderTemplateDataLocal, + AgeTemplateDataLocal, + PartyTemplateDataLocal, +) +from utils import diversity + + +router = APIRouter(prefix="/localCouncil", tags=["localCouncil"]) + +AGE_STAIR = 10 + + +@router.get("/template-data/{metroId}/{localId}") +async def getLocalTemplateData( + metroId: int, localId: int, factor: FactorType, year: int = 2022 +) -> ErrorResponse | GenderTemplateDataLocal | AgeTemplateDataLocal | PartyTemplateDataLocal: + if ( + await client.district_db["local_district"].find_one( + {"localId": localId, "metroId": metroId} + ) + is None + ): + return ErrorResponse.model_validate( + { + "error": "RegionCodeError", + "code": REGION_CODE_ERR, + "message": f"No local district with metroId {metroId} and localId {localId}.", + } + ) + + local_stat = await client.stats_db["diversity_index"].find_one({"localId": localId}) + + if local_stat is None: + return NO_DATA_ERROR_RESPONSE + + match factor: + case FactorType.gender: + years = list( + { + doc["year"] + async for doc in client.stats_db["gender_hist"].find( + { + "councilorType": "local_councilor", + "level": 2, + "is_elected": True, + "localId": localId, + "metroId": metroId, + } + ) + } + ) + years.sort() + assert len(years) >= 2 + + year_index = years.index(year) + if year_index == 0: + return NO_DATA_ERROR_RESPONSE + + current = await client.stats_db["gender_hist"].find_one( + { + "councilorType": "local_councilor", + "level": 2, + "is_elected": True, + "localId": localId, + "metroId": metroId, + "year": years[year_index], + } + ) + + current_candidate = await client.stats_db["gender_hist"].find_one( + { + "councilorType": "local_councilor", + "level": 2, + "is_elected": False, + "localId": localId, + "metroId": metroId, + "year": years[year_index], + } + ) + + previous = await client.stats_db["gender_hist"].find_one( + { + "councilorType": "local_councilor", + "level": 2, + "is_elected": True, + "localId": localId, + "metroId": metroId, + "year": years[year_index - 1], + } + ) + + previous_candidate = await client.stats_db["gender_hist"].find_one( + { + "councilorType": "local_councilor", + "level": 2, + "is_elected": False, + "localId": localId, + "metroId": metroId, + "year": years[year_index], + } + ) + + current_all = ( + await client.stats_db["gender_hist"] + .aggregate( + [ + { + "$match": { + "councilorType": "local_councilor", + "level": 2, + "is_elected": True, + "year": years[year_index], + } + }, + { + "$group": { + "_id": None, + "male_tot": {"$sum": "$남"}, + "female_tot": {"$sum": "$여"}, + "district_cnt": {"$sum": 1}, + } + }, + ] + ) + .to_list(500) + ) + assert len(current_all) == 1 + current_all = current_all[0] + + return GenderTemplateDataLocal.model_validate( + { + "metroId": metroId, + "localId": localId, + "genderDiversityIndex": local_stat["genderDiversityIndex"], + "current": { + "year": years[year_index], + "malePop": current["남"], + "femalePop": current["여"], + }, + "currentCandidate": { + "year": years[year_index], + "malePop": current_candidate["남"], + "femalePop": current_candidate["여"], + }, + "prev": { + "year": years[year_index - 1], + "malePop": previous["남"], + "femalePop": previous["여"], + }, + "prevCandidate": { + "year": years[year_index], + "malePop": previous_candidate["남"], + "femalePop": previous_candidate["여"], + }, + "meanMalePop": current_all["male_tot"] + / current_all["district_cnt"], + "meanFemalePop": current_all["female_tot"] + / current_all["district_cnt"], + } + ) + + case FactorType.age: + # ============================ + # rankingParagraph + # ============================ + age_diversity_index = local_stat["ageDiversityIndex"] + + localIds_of_same_metroId = [ + doc["localId"] + async for doc in client.district_db["local_district"].find( + {"metroId": metroId} + ) + ] + all_indices = ( + await client.stats_db["diversity_index"] + .find({"localId": {"$in": localIds_of_same_metroId}}) + .to_list(500) + ) + all_indices.sort(key=lambda x: x["ageDiversityRank"]) + + # ============================ + # indexHistoryParagraph + # ============================ + years = list( + { + doc["year"] + async for doc in client.stats_db["age_hist"].find( + {"councilorType": "local_councilor"} + ) + } + ) + years.sort() + + year_index = years.index(year) + if year_index == 0: + return NO_DATA_ERROR_RESPONSE + + history_candidate = [ + await client.stats_db["age_hist"].find_one( + { + "year": year, + "level": 2, + "councilorType": "local_councilor", + "is_elected": False, + "method": "equal", + "metroId": metroId, + "localId": localId, + } + ) + for year in years + ] + history_elected = [ + await client.stats_db["age_hist"].find_one( + { + "year": year, + "level": 2, + "councilorType": "local_councilor", + "is_elected": True, + "method": "equal", + "metroId": metroId, + "localId": localId, + } + ) + for year in years + ] + + # ============================ + # ageHistogramParagraph + # ============================ + age_stat_elected = ( + await client.stats_db["age_stat"] + .aggregate( + [ + { + "$match": { + "level": 2, + "councilorType": "local_councilor", + "is_elected": True, + "metroId": metroId, + "localId": localId, + } + }, + {"$sort": {"year": -1}}, + {"$limit": 1}, + ] + ) + .to_list(500) + )[0] + most_recent_year = year + age_stat_candidate = await client.stats_db["age_stat"].find_one( + { + "level": 2, + "councilorType": "local_councilor", + "is_elected": False, + "metroId": metroId, + "localId": localId, + "year": most_recent_year, + } + ) + + areas_sorted = ( + client.stats_db["diversity_index"] + .find({"localId": {"$exists": True}}) + .sort("ageDiversityRank") + ) + async for area in areas_sorted: + divArea = await client.stats_db["age_stat"].find_one( + { + "level": 2, + "councilorType": "local_councilor", + "is_elected": True, + "localId": area["localId"], + "year": most_recent_year, + } + ) + if divArea is not None: + break + + if divArea is None: + return NO_DATA_ERROR_RESPONSE + + uniArea_id = ( + await client.stats_db["diversity_index"].find_one( + {"localId": {"$exists": True}, "ageDiversityRank": 226} + ) + )["localId"] + uniArea = await client.stats_db["age_stat"].find_one( + { + "level": 2, + "councilorType": "local_councilor", + "is_elected": True, + "localId": uniArea_id, + "year": most_recent_year, + } + ) + + return AgeTemplateDataLocal.model_validate( + { + "metroId": metroId, + "localId": localId, + "rankingParagraph": { + "ageDiversityIndex": age_diversity_index, + "allIndices": [ + { + "localId": doc["localId"], + "rank": idx + 1, + "ageDiversityIndex": doc["ageDiversityIndex"], + } + for idx, doc in enumerate(all_indices) + ], + }, + "indexHistoryParagraph": { + "mostRecentYear": years[-1], + "history": [ + { + "year": year, + "unit": (year - 1998) / 4 + 2, + "candidateCount": sum( + group["count"] + for group in history_candidate[idx]["data"] + ), + "candidateDiversityIndex": history_candidate[idx][ + "diversityIndex" + ], + "candidateDiversityRank": history_candidate[idx][ + "diversityRank" + ], + "electedDiversityIndex": history_elected[idx][ + "diversityIndex" + ], + "electedDiversityRank": history_elected[idx][ + "diversityRank" + ], + } + for idx, year in enumerate(years) + ], + }, + "ageHistogramParagraph": { + "year": most_recent_year, + "candidateCount": age_stat_candidate["data"][0]["population"], + "electedCount": age_stat_elected["data"][0]["population"], + "firstQuintile": age_stat_elected["data"][0]["firstquintile"], + "lastQuintile": age_stat_elected["data"][0]["lastquintile"], + "divArea": { + "localId": divArea["localId"], + "firstQuintile": divArea["data"][0]["firstquintile"], + "lastQuintile": divArea["data"][0]["lastquintile"], + }, + "uniArea": { + "localId": uniArea_id, + "firstQuintile": uniArea["data"][0]["firstquintile"], + "lastQuintile": uniArea["data"][0]["lastquintile"], + }, + }, + } + ) + + case FactorType.party: + party_diversity_index = local_stat["partyDiversityIndex"] + years = list( + { + doc["year"] + async for doc in client.stats_db["party_hist"].find( + { + "councilorType": "local_councilor", + "level": 2, + "is_elected": True, + "localId": localId, + "metroId": metroId, + } + ) + } + ) + years.sort() + assert len(years) >= 2 + + year_index = years.index(year) + if year_index == 0: + return NO_DATA_ERROR_RESPONSE + + current_elected = client.stats_db["party_hist"].find( + { + "councilorType": "local_councilor", + "level": 2, + "is_elected": True, + "localId": localId, + "metroId": metroId, + "year": years[year_index], + }, + { + "_id": 0, + "councilorType": 0, + "level": 0, + "is_elected": 0, + "localId": 0, + "metroId": 0, + "year": 0, + }, + ) + current_candidate = client.stats_db["party_hist"].find( + { + "councilorType": "local_councilor", + "level": 2, + "is_elected": False, + "localId": localId, + "metroId": metroId, + "year": years[year_index], + }, + { + "_id": 0, + "councilorType": 0, + "level": 0, + "is_elected": 0, + "localId": 0, + "metroId": 0, + "year": 0, + }, + ) + previous = client.stats_db["party_hist"].find( + { + "councilorType": "local_councilor", + "level": 2, + "is_elected": True, + "localId": localId, + "metroId": metroId, + "year": years[year_index - 1], + }, + { + "_id": 0, + "councilorType": 0, + "level": 0, + "is_elected": 0, + "localId": 0, + "metroId": 0, + "year": 0, + }, + ) + + return PartyTemplateDataLocal.model_validate( + { + "metroId": metroId, + "localId": localId, + "partyDiversityIndex": party_diversity_index, + "prevElected": [ + {"party": party, "count": doc[party]} + async for doc in previous + for party in doc + ], + "currentElected": [ + {"party": party, "count": doc[party]} + async for doc in current_elected + for party in doc + ], + "currentCandidate": [ + {"party": party, "count": doc[party]} + async for doc in current_candidate + for party in doc + ], + } + ) + + +@router.get("/chart-data/{metroId}/{localId}") +async def getLocalChartData( + metroId: int, localId: int, factor: FactorType, year: int = 2022 +) -> ErrorResponse | ChartData[GenderChartDataPoint] | ChartData[ + AgeChartDataPoint +] | ChartData[PartyChartDataPoint]: + if ( + await client.district_db["local_district"].find_one( + {"localId": localId, "metroId": metroId} + ) + is None + ): + return ErrorResponse.model_validate( + { + "error": "RegionCodeError", + "code": REGION_CODE_ERR, + "message": f"No local district with metroId {metroId} and localId {localId}.", + } + ) + + match factor: + case FactorType.gender: + gender_cnt = ( + await client.stats_db["gender_hist"] + .find( + { + "councilorType": "local_councilor", + "level": 2, + "is_elected": True, + "localId": localId, + "metroId": metroId, + "year": year, + } + ) + .limit(1) + .to_list(5) + )[0] + + return ChartData[GenderChartDataPoint].model_validate( + { + "data": [ + {"gender": "남", "count": gender_cnt["남"]}, + {"gender": "여", "count": gender_cnt["여"]}, + ] + } + ) + + case FactorType.age: + age_cnt = ( + await client.stats_db["age_hist"] + .find( + { + "councilorType": "local_councilor", + "level": 2, + "is_elected": True, + "method": "equal", + "localId": localId, + "metroId": metroId, + "year": year, + } + ) + .limit(1) + .to_list(5) + )[0] + age_list = [ + age["minAge"] for age in age_cnt["data"] for _ in range(age["count"]) + ] + age_stair = diversity.count(age_list, stair=AGE_STAIR) + return ChartData[AgeChartDataPoint].model_validate( + { + "data": [ + { + "minAge": age, + "maxAge": age + AGE_STAIR, + "count": age_stair[age], + } + for age in age_stair + ] + } + ) + + case FactorType.party: + party_count = ( + await client.stats_db["party_hist"] + .find( + { + "councilorType": "local_councilor", + "level": 2, + "is_elected": True, + "localId": localId, + "metroId": metroId, + "year": year, + } + ) + .limit(1) + .to_list(5) + )[0] + return ChartData[PartyChartDataPoint].model_validate( + { + "data": [ + {"party": party, "count": party_count[party]} + for party in party_count + if party + not in [ + "_id", + "councilorType", + "level", + "is_elected", + "localId", + "metroId", + "year", + ] + ] + } + ) diff --git a/routers/scrapResultMetro.py b/routers/scrapResultMetro.py new file mode 100644 index 0000000..92c4bc7 --- /dev/null +++ b/routers/scrapResultMetro.py @@ -0,0 +1,553 @@ +from typing import TypeVar +from fastapi import APIRouter +from model.BasicResponse import ErrorResponse, REGION_CODE_ERR, NO_DATA_ERROR_RESPONSE +from model.MongoDB import client +from model.ScrapResultCommon import ( + GenderChartDataPoint, + AgeChartDataPoint, + PartyChartDataPoint, + FactorType, + ChartData, +) +from model.ScrapResultMetro import ( + GenderTemplateDataMetro, + AgeTemplateDataMetro, + PartyTemplateDataMetro, +) +from utils import diversity + + +router = APIRouter(prefix="/metroCouncil", tags=["metroCouncil"]) + +AGE_STAIR = 10 + + +@router.get("/template-data/{metroId}") +async def getMetroTemplateData( + metroId: int, factor: FactorType, year: int = 2022 +) -> ErrorResponse | GenderTemplateDataMetro | AgeTemplateDataMetro | PartyTemplateDataMetro: + if ( + await client.district_db["metro_district"].find_one({"metroId": metroId}) + is None + ): + return ErrorResponse.model_validate( + { + "error": "RegionCodeError", + "code": REGION_CODE_ERR, + "message": f"No metro district with metroId {metroId}.", + } + ) + + metro_stat = await client.stats_db["diversity_index"].find_one({"metroId": metroId}) + + match factor: + case FactorType.gender: + years = list( + { + doc["year"] + async for doc in client.stats_db["gender_hist"].find( + { + "councilorType": "metro_councilor", + "level": 1, + "is_elected": True, + "metroId": metroId, + } + ) + } + ) + years.sort() + assert len(years) >= 2 + year_index = years.index(year) + if year_index == 0: + return NO_DATA_ERROR_RESPONSE + + current = await client.stats_db["gender_hist"].find_one( + { + "councilorType": "metro_councilor", + "level": 1, + "is_elected": True, + "metroId": metroId, + "year": years[year_index], + } + ) + + current_candidate = await client.stats_db["gender_hist"].find_one( + { + "councilorType": "metro_councilor", + "level": 1, + "is_elected": False, + "metroId": metroId, + "year": years[year_index], + } + ) + + previous = await client.stats_db["gender_hist"].find_one( + { + "councilorType": "metro_councilor", + "level": 1, + "is_elected": True, + "metroId": metroId, + "year": years[year_index - 1], + } + ) + + previous_candidate = await client.stats_db["gender_hist"].find_one( + { + "councilorType": "metro_councilor", + "level": 1, + "is_elected": False, + "metroId": metroId, + "year": years[year_index], + } + ) + + current_all = ( + await client.stats_db["gender_hist"] + .aggregate( + [ + { + "$match": { + "councilorType": "metro_councilor", + "level": 1, + "is_elected": True, + "year": years[year_index], + } + }, + { + "$group": { + "_id": None, + "male_tot": {"$sum": "$남"}, + "female_tot": {"$sum": "$여"}, + "district_cnt": {"$sum": 1}, + } + }, + ] + ) + .to_list(500) + ) + assert len(current_all) == 1 + current_all = current_all[0] + + return GenderTemplateDataMetro.model_validate( + { + "metroId": metroId, + "genderDiversityIndex": metro_stat["genderDiversityIndex"], + "current": { + "year": years[year_index], + "malePop": current["남"], + "femalePop": current["여"], + }, + "currentCandidate": { + "year": years[year_index], + "malePop": current_candidate["남"], + "femalePop": current_candidate["여"], + }, + "prev": { + "year": years[year_index - 1], + "malePop": previous["남"], + "femalePop": previous["여"], + }, + "prevCandidate": { + "year": years[year_index], + "malePop": previous_candidate["남"], + "femalePop": previous_candidate["여"], + }, + "meanMalePop": current_all["male_tot"] + / current_all["district_cnt"], + "meanFemalePop": current_all["female_tot"] + / current_all["district_cnt"], + } + ) + + case FactorType.age: + # ============================ + # rankingParagraph + # ============================ + age_diversity_index = metro_stat["ageDiversityIndex"] + + all_metroIds = [ + doc["metroId"] + async for doc in client.district_db["metro_district"].find() + ] + all_indices = ( + await client.stats_db["diversity_index"] + .find({"metroId": {"$in": all_metroIds}}) + .to_list(500) + ) + all_indices.sort(key=lambda x: x["ageDiversityRank"]) + + # ============================ + # indexHistoryParagraph + # ============================ + years = list( + { + doc["year"] + async for doc in client.stats_db["age_hist"].find( + {"councilorType": "metro_councilor"} + ) + } + ) + years.sort() + year_index = years.index(year) + if year_index == 0: + return NO_DATA_ERROR_RESPONSE + + history_candidate = [ + await client.stats_db["age_hist"].find_one( + { + "year": year, + "level": 1, + "councilorType": "metro_councilor", + "is_elected": False, + "method": "equal", + "metroId": metroId, + } + ) + for year in years + ] + history_elected = [ + await client.stats_db["age_hist"].find_one( + { + "year": year, + "level": 1, + "councilorType": "metro_councilor", + "is_elected": True, + "method": "equal", + "metroId": metroId, + } + ) + for year in years + ] + + # ============================ + # ageHistogramParagraph + # ============================ + age_stat_elected = ( + await client.stats_db["age_stat"] + .aggregate( + [ + { + "$match": { + "level": 1, + "councilorType": "metro_councilor", + "is_elected": True, + "metroId": metroId, + "year": years[year_index], + } + }, + ] + ) + .to_list(500) + )[0] + most_recent_year = year + age_stat_candidate = await client.stats_db["age_stat"].find_one( + { + "level": 1, + "councilorType": "metro_councilor", + "is_elected": False, + "metroId": metroId, + "year": most_recent_year, + } + ) + + divArea_id = ( + await client.stats_db["diversity_index"].find_one( + {"metroId": {"$exists": True}, "ageDiversityRank": 1} + ) + )["metroId"] + divArea = await client.stats_db["age_stat"].find_one( + { + "level": 1, + "councilorType": "metro_councilor", + "is_elected": True, + "metroId": divArea_id, + "year": most_recent_year, + } + ) + + uniArea_id = ( + await client.stats_db["diversity_index"].find_one( + {"metroId": {"$exists": True}, "ageDiversityRank": 16} + ) + )["metroId"] + uniArea = await client.stats_db["age_stat"].find_one( + { + "level": 1, + "councilorType": "metro_councilor", + "is_elected": True, + "metroId": uniArea_id, + "year": most_recent_year, + } + ) + + return AgeTemplateDataMetro.model_validate( + { + "metroId": metroId, + "rankingParagraph": { + "ageDiversityIndex": age_diversity_index, + "allIndices": [ + { + "metroId": doc["metroId"], + "rank": doc["ageDiversityRank"], + "ageDiversityIndex": doc["ageDiversityIndex"], + } + for doc in all_indices + ], + }, + "indexHistoryParagraph": { + "mostRecentYear": years[year_index], + "history": [ + { + "year": year, + "unit": (year - 1998) / 4 + 2, + "candidateCount": sum( + group["count"] + for group in history_candidate[idx]["data"] + ), + "candidateDiversityIndex": history_candidate[idx][ + "diversityIndex" + ], + "candidateDiversityRank": history_candidate[idx][ + "diversityRank" + ], + "electedDiversityIndex": history_elected[idx][ + "diversityIndex" + ], + "electedDiversityRank": history_elected[idx][ + "diversityRank" + ], + } + for idx, year in enumerate(years) + ], + }, + "ageHistogramParagraph": { + "year": most_recent_year, + "candidateCount": age_stat_candidate["data"][0]["population"], + "electedCount": age_stat_elected["data"][0]["population"], + "firstQuintile": age_stat_elected["data"][0]["firstquintile"], + "lastQuintile": age_stat_elected["data"][0]["lastquintile"], + "divArea": { + "metroId": divArea_id, + "firstQuintile": divArea["data"][0]["firstquintile"], + "lastQuintile": divArea["data"][0]["lastquintile"], + }, + "uniArea": { + "metroId": uniArea_id, + "firstQuintile": uniArea["data"][0]["firstquintile"], + "lastQuintile": uniArea["data"][0]["lastquintile"], + }, + }, + } + ) + + case FactorType.party: + party_diversity_index = metro_stat["partyDiversityIndex"] + years = list( + { + doc["year"] + async for doc in client.stats_db["party_hist"].find( + { + "councilorType": "metro_councilor", + "level": 1, + "is_elected": True, + "metroId": metroId, + } + ) + } + ) + years.sort() + assert len(years) >= 2 + + year_index = years.index(year) + if year_index == 0: + return NO_DATA_ERROR_RESPONSE + + current_elected = client.stats_db["party_hist"].find( + { + "councilorType": "metro_councilor", + "level": 1, + "is_elected": True, + "metroId": metroId, + "year": years[year_index], + }, + { + "_id": 0, + "councilorType": 0, + "level": 0, + "is_elected": 0, + "metroId": 0, + "year": 0, + }, + ) + current_candidate = client.stats_db["party_hist"].find( + { + "councilorType": "metro_councilor", + "level": 1, + "is_elected": False, + "metroId": metroId, + "year": years[year_index], + }, + { + "_id": 0, + "councilorType": 0, + "level": 0, + "is_elected": 0, + "metroId": 0, + "year": 0, + }, + ) + previous = client.stats_db["party_hist"].find( + { + "councilorType": "metro_councilor", + "level": 1, + "is_elected": True, + "metroId": metroId, + "year": years[year_index - 1], + }, + { + "_id": 0, + "councilorType": 0, + "level": 0, + "is_elected": 0, + "metroId": 0, + "year": 0, + }, + ) + + return PartyTemplateDataMetro.model_validate( + { + "metroId": metroId, + "partyDiversityIndex": party_diversity_index, + "prevElected": [ + {"party": party, "count": doc[party]} + async for doc in previous + for party in doc + ], + "currentElected": [ + {"party": party, "count": doc[party]} + async for doc in current_elected + for party in doc + ], + "currentCandidate": [ + {"party": party, "count": doc[party]} + async for doc in current_candidate + for party in doc + ], + } + ) + + +T = TypeVar( + "T", + GenderChartDataPoint, + AgeChartDataPoint, + PartyChartDataPoint, +) + + +@router.get("/chart-data/{metroId}") +async def getMetroChartData( + metroId: int, factor: FactorType, year: int = 2022 +) -> ErrorResponse | ChartData[GenderChartDataPoint] | ChartData[ + AgeChartDataPoint +] | ChartData[PartyChartDataPoint]: + if ( + await client.district_db["metro_district"].find_one({"metroId": metroId}) + is None + ): + return ErrorResponse.model_validate( + { + "error": "RegionCodeError", + "code": REGION_CODE_ERR, + "message": f"No metro district with metroId {metroId}.", + } + ) + + match factor: + case FactorType.gender: + gender_cnt = ( + await client.stats_db["gender_hist"] + .find( + { + "councilorType": "metro_councilor", + "level": 1, + "is_elected": True, + "metroId": metroId, + "year": year, + } + ) + .to_list(5) + )[0] + + return ChartData[GenderChartDataPoint].model_validate( + { + "data": [ + {"gender": "남", "count": gender_cnt["남"]}, + {"gender": "여", "count": gender_cnt["여"]}, + ] + } + ) + + case FactorType.age: + age_cnt = ( + await client.stats_db["age_hist"] + .find( + { + "councilorType": "metro_councilor", + "level": 1, + "is_elected": True, + "method": "equal", + "metroId": metroId, + "year": year, + } + ) + .to_list(5) + )[0] + age_list = [ + age["minAge"] for age in age_cnt["data"] for _ in range(age["count"]) + ] + age_stair = diversity.count(age_list, stair=AGE_STAIR) + return ChartData[AgeChartDataPoint].model_validate( + { + "data": [ + { + "minAge": age, + "maxAge": age + AGE_STAIR, + "count": age_stair[age], + } + for age in age_stair + ] + } + ) + + case FactorType.party: + party_count = ( + await client.stats_db["party_hist"] + .find( + { + "councilorType": "metro_councilor", + "level": 1, + "is_elected": True, + "metroId": metroId, + "year": year, + } + ) + .to_list(5) + )[0] + return ChartData[PartyChartDataPoint].model_validate( + { + "data": [ + {"party": party, "count": party_count[party]} + for party in party_count + if party + not in [ + "_id", + "councilorType", + "level", + "is_elected", + "metroId", + "year", + ] + ] + } + ) diff --git a/routers/scrapResultNational.py b/routers/scrapResultNational.py new file mode 100644 index 0000000..c62af76 --- /dev/null +++ b/routers/scrapResultNational.py @@ -0,0 +1,407 @@ +from typing import TypeVar +from fastapi import APIRouter +from model.BasicResponse import ErrorResponse, NO_DATA_ERROR_RESPONSE +from model.MongoDB import client +from model.ScrapResultCommon import ( + GenderChartDataPoint, + AgeChartDataPoint, + PartyChartDataPoint, + FactorType, + ChartData, +) +from model.ScrapResultNational import ( + GenderTemplateDataNational, + AgeTemplateDataNational, + PartyTemplateDataNational, +) +from utils import diversity + + +router = APIRouter(prefix="/nationalCouncil", tags=["nationalCouncil"]) + +AGE_STAIR = 10 + + +@router.get("/template-data") +async def getNationalTemplateData( + factor: FactorType, year: int = 2020 +) -> ErrorResponse | GenderTemplateDataNational | AgeTemplateDataNational | PartyTemplateDataNational: + national_stat = await client.stats_db["diversity_index"].find_one( + {"national": True} + ) + if national_stat is None: + return NO_DATA_ERROR_RESPONSE + + match factor: + case FactorType.gender: + years = list( + { + doc["year"] + async for doc in client.stats_db["gender_hist"].find( + { + "councilorType": "national_councilor", + "level": 0, + "is_elected": True, + } + ) + } + ) + years.sort() + assert len(years) >= 2 + + year_index = years.index(year) + if year_index == 0: + return NO_DATA_ERROR_RESPONSE + + current = await client.stats_db["gender_hist"].find_one( + { + "councilorType": "national_councilor", + "level": 0, + "is_elected": True, + "year": years[year_index], + } + ) + + current_candidate = await client.stats_db["gender_hist"].find_one( + { + "councilorType": "national_councilor", + "level": 0, + "is_elected": False, + "year": years[year_index - 1], + } + ) + + previous = await client.stats_db["gender_hist"].find_one( + { + "councilorType": "national_councilor", + "level": 0, + "is_elected": True, + "year": years[year_index], + } + ) + + previous_candidate = await client.stats_db["gender_hist"].find_one( + { + "councilorType": "national_councilor", + "level": 0, + "is_elected": False, + "year": years[year_index - 1], + } + ) + + return GenderTemplateDataNational.model_validate( + { + "genderDiversityIndex": national_stat["genderDiversityIndex"], + "current": { + "year": years[year_index], + "malePop": current["남"], + "femalePop": current["여"], + }, + "currentCandidate": { + "year": years[year_index - 1], + "malePop": current_candidate["남"], + "femalePop": current_candidate["여"], + }, + "prev": { + "year": years[year_index], + "malePop": previous["남"], + "femalePop": previous["여"], + }, + "prevCandidate": { + "year": years[year_index - 1], + "malePop": previous_candidate["남"], + "femalePop": previous_candidate["여"], + }, + } + ) + + case FactorType.age: + # ============================ + # rankingParagraph + # ============================ + age_diversity_index = national_stat["ageDiversityIndex"] + + # ============================ + # indexHistoryParagraph + # ============================ + years = list( + { + doc["year"] + async for doc in client.stats_db["age_hist"].find( + {"councilorType": "national_councilor"} + ) + } + ) + years.sort() + history_candidate = [ + await client.stats_db["age_hist"].find_one( + { + "year": year, + "councilorType": "national_councilor", + "is_elected": False, + "method": "equal", + } + ) + for year in years + ] + history_elected = [ + await client.stats_db["age_hist"].find_one( + { + "year": year, + "councilorType": "national_councilor", + "is_elected": True, + "method": "equal", + } + ) + for year in years + ] + + # ============================ + # ageHistogramParagraph + # ============================ + age_stat_elected = ( + await client.stats_db["age_stat"] + .aggregate( + [ + { + "$match": { + "level": 0, + "councilorType": "national_councilor", + "is_elected": True, + "year": year, + } + }, + {"$sort": {"year": -1}}, + {"$limit": 1}, + ] + ) + .to_list(500) + )[0] + most_recent_year = age_stat_elected["year"] + age_stat_candidate = await client.stats_db["age_stat"].find_one( + { + "councilorType": "national_councilor", + "is_elected": False, + "year": most_recent_year, + } + ) + + return AgeTemplateDataNational.model_validate( + { + "rankingParagraph": { + "ageDiversityIndex": age_diversity_index, + }, + "indexHistoryParagraph": { + "mostRecentYear": years[-1], + "history": [ + { + "year": year, + "unit": (year - 2000) / 4 + 2, + "candidateCount": sum( + group["count"] + for group in history_candidate[idx]["data"] + ), + "candidateDiversityIndex": history_candidate[idx][ + "diversityIndex" + ], + "candidateDiversityRank": history_candidate[idx][ + "diversityRank" + ], + "electedDiversityIndex": history_elected[idx][ + "diversityIndex" + ], + "electedDiversityRank": history_elected[idx][ + "diversityRank" + ], + } + for idx, year in enumerate(years) + ], + }, + "ageHistogramParagraph": { + "year": most_recent_year, + "candidateCount": age_stat_candidate["data"][0]["population"], + "electedCount": age_stat_elected["data"][0]["population"], + "firstQuintile": age_stat_elected["data"][0]["firstquintile"], + "lastQuintile": age_stat_elected["data"][0]["lastquintile"], + }, + } + ) + + case FactorType.party: + party_diversity_index = national_stat["partyDiversityIndex"] + years = list( + { + doc["year"] + async for doc in client.stats_db["party_hist"].find( + { + "councilorType": "national_councilor", + "level": 0, + "is_elected": True, + } + ) + } + ) + years.sort() + assert len(years) >= 2 + year_index = years.index(year) + if year_index == 0: + return NO_DATA_ERROR_RESPONSE + + current_elected = client.stats_db["party_hist"].find( + { + "councilorType": "national_councilor", + "level": 0, + "is_elected": True, + "year": years[year_index], + }, + { + "_id": 0, + "councilorType": 0, + "level": 0, + "is_elected": 0, + "year": 0, + }, + ) + current_candidate = client.stats_db["party_hist"].find( + { + "councilorType": "national_councilor", + "level": 0, + "is_elected": False, + "year": years[year_index], + }, + { + "_id": 0, + "councilorType": 0, + "level": 0, + "is_elected": 0, + "year": 0, + }, + ) + previous = client.stats_db["party_hist"].find( + { + "councilorType": "national_councilor", + "level": 0, + "is_elected": True, + "year": years[year_index - 1], + }, + { + "_id": 0, + "councilorType": 0, + "level": 0, + "is_elected": 0, + "year": 0, + }, + ) + + return PartyTemplateDataNational.model_validate( + { + "partyDiversityIndex": party_diversity_index, + "prevElected": [ + {"party": party, "count": doc[party]} + async for doc in previous + for party in doc + ], + "currentElected": [ + {"party": party, "count": doc[party]} + async for doc in current_elected + for party in doc + ], + "currentCandidate": [ + {"party": party, "count": doc[party]} + async for doc in current_candidate + for party in doc + ], + } + ) + + +@router.get("/chart-data") +async def getNationalChartData( + factor: FactorType, year: int = 2020 +) -> ErrorResponse | ChartData[GenderChartDataPoint] | ChartData[ + AgeChartDataPoint +] | ChartData[PartyChartDataPoint]: + match factor: + case FactorType.gender: + gender_cnt = ( + await client.stats_db["gender_hist"] + .find( + { + "councilorType": "national_councilor", + "level": 0, + "year": year, + "is_elected": True, + } + ) + .to_list(5) + )[0] + + return ChartData[GenderChartDataPoint].model_validate( + { + "data": [ + {"gender": "남", "count": gender_cnt["남"]}, + {"gender": "여", "count": gender_cnt["여"]}, + ] + } + ) + + case FactorType.age: + age_cnt = ( + await client.stats_db["age_hist"] + .find( + { + "councilorType": "national_councilor", + "level": 0, + "is_elected": True, + "method": "equal", + "year": year, + } + ) + .to_list(5) + )[0] + age_list = [ + age["minAge"] for age in age_cnt["data"] for _ in range(age["count"]) + ] + age_stair = diversity.count(age_list, stair=AGE_STAIR) + return ChartData[AgeChartDataPoint].model_validate( + { + "data": [ + { + "minAge": age, + "maxAge": age + AGE_STAIR, + "count": age_stair[age], + } + for age in age_stair + ] + } + ) + + case FactorType.party: + party_count = ( + await client.stats_db["party_hist"] + .find( + { + "councilorType": "national_councilor", + "level": 0, + "is_elected": True, + "year": year, + } + ) + .to_list(5) + )[0] + return ChartData[PartyChartDataPoint].model_validate( + { + "data": [ + {"party": party, "count": party_count[party]} + for party in party_count + if party + not in [ + "_id", + "councilorType", + "level", + "is_elected", + "year", + ] + ] + } + ) diff --git a/utils/diversity.py b/utils/diversity.py index 95536e0..be91f56 100644 --- a/utils/diversity.py +++ b/utils/diversity.py @@ -19,10 +19,12 @@ def gini_simpson(data, stair=0, opts=True): """ counts = count(data, stair) total = sum(counts.values()) - gs_idx = 1 - sum((n / total) ** 2 for n in counts.values()) + gs_idx = 1 - sum((n / total) * ((n - 1) / (total - 1)) for n in counts.values()) if opts: num_cats = len([c for c in counts.values() if c > 0]) + if num_cats <= 1: + return 0.0 max_gs_idx = (num_cats - 1) / num_cats * total / (total - 1) gs_idx /= max_gs_idx