diff --git a/.github/workflows/build_and_deploy.yml b/.github/workflows/build_and_deploy.yml new file mode 100644 index 0000000..508ad55 --- /dev/null +++ b/.github/workflows/build_and_deploy.yml @@ -0,0 +1,45 @@ +name: build_and_deploy +on: workflow_dispatch +env: + IMAGE_NAME: ${{secrets.REGISTRY}}/effects_api + CONTAINER_NAME: effects_api + +jobs: + build: + runs-on: 102_runner + outputs: + now: ${{steps.date.outputs.NOW}} + steps: + - name: Set current date as env variable + id: date + run: echo "NOW=$(date +'%Y-%m-%dT%H-%M-%S')" >> $GITHUB_OUTPUT + - name: checkout + uses: actions/checkout@v4 + - name: copy_env + env: + ENV_PATH: ${{secrets.ENV_PATH}} + run: cp "$ENV_PATH"/.env.production ./ + - name: build + env: + NOW: ${{steps.date.outputs.now}} + run: docker build -t "$IMAGE_NAME":"$NOW" . + - name: push_to_registry + env: + NOW: ${{steps.date.outputs.now}} + run: docker push "$IMAGE_NAME":"$NOW" + stop_container: + runs-on: 102_runner + needs: build + steps: + - name: stop_container + run: docker rm -f "$CONTAINER_NAME" + run_container: + runs-on: 102_runner + needs: [build, stop_container] + env: + NOW: ${{needs.build.outputs.now}} + steps: + - name: set env + run: echo "IMAGE=$IMAGE_NAME:$NOW" >> $GITHUB_ENV + - name: run + run: docker compose -f docker-compose.actions.yml up -d diff --git a/.gitignore b/.gitignore index 39a19ca..0f0d464 100644 --- a/.gitignore +++ b/.gitignore @@ -84,6 +84,7 @@ target/ # IPython profile_default/ ipython_config.py +*.ipynb # pyenv # For a library or package, you might want to ignore these files since the code is @@ -125,8 +126,7 @@ celerybeat.pid *.sage.py # Environments -app/.env.development -.env.development +.env.* .venv env/ venv/ @@ -171,3 +171,4 @@ gdf_with_obj.geojson boundaries.parquet roads.parquet water.parquet +__effects_cache__ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9703de4 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: + - repo: https://github.com/psf/black + rev: 25.1.0 + hooks: + - id: black + language_version: python3.11 + + - repo: https://github.com/pycqa/isort + rev: 6.0.1 + hooks: + - id: isort + name: isort (python) + args: ["--profile", "black"] \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index b8219db..c0a67f0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,10 @@ ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 # Enables env file -ENV APP_ENV=development +ENV APP_ENV=production + +#add pypi mirror to config +COPY pip.conf /etc/xdg/pip/pip.conf # Install pip requirements COPY requirements.txt . @@ -23,4 +26,4 @@ WORKDIR /app COPY . /app # During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug -CMD ["gunicorn", "--bind", "0.0.0.0:80", "-k", "uvicorn.workers.UvicornWorker", "--workers", "2", "app.main:app"] \ No newline at end of file +CMD ["sh", "-c", "gunicorn --bind 0.0.0.0:80 -k uvicorn.workers.UvicornWorker --workers 1 --timeout 600 app.main:app"] \ No newline at end of file diff --git a/app/api/routers/effects/effects_controller.py b/app/api/routers/effects/effects_controller.py deleted file mode 100644 index a7ffe88..0000000 --- a/app/api/routers/effects/effects_controller.py +++ /dev/null @@ -1,200 +0,0 @@ -import os -from datetime import datetime -from typing import Annotated - -from loguru import logger -from blocksnet.models import ServiceType -from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException -from ...utils import auth, const, decorators -from . import effects_models as em -from . import effects_service as es -from .services import service_type_service as sts -from app.api.routers.effects.task_schema import TaskSchema, TaskStatusSchema, TaskInfoSchema -from app.api.routers.effects.services.task_api_service import get_scenario_info, get_all_project_info, get_project_id - -router = APIRouter(prefix='/effects', tags=['Effects']) - -def on_startup(): # TODO оценка базовых сценариев - if not os.path.exists(const.DATA_PATH): - logger.info(f'Creating data folder at {const.DATA_PATH}') - os.mkdir(const.DATA_PATH) - -tasks: dict[int, TaskSchema] = {} - -def check_task_evaluation(scenario_id: int) -> None: - - if not tasks.get(scenario_id): - raise HTTPException( - 404, - detail={ - "msg": f"Calculations for scenario {scenario_id} was never started", - "detail": { - "available scenarios": list(tasks.keys()) - } - } - - ) - elif tasks[scenario_id].task_status.task_status == "pending": - raise HTTPException( - 400, - detail={ - "msg": f"Calculations for scenario {scenario_id} is still running", - "detail": { - "available results": [ i for i in tasks.values() if i.task_status.task_status == "success" ], - } - } - ) - - elif tasks[scenario_id].task_status == "error": - raise HTTPException( - 500, - detail={ - "msg": f"Calculations for scenario {scenario_id} failed", - "detail": { - "error": tasks[scenario_id].task_status.task_status, - } - } - ) - elif tasks[scenario_id].task_status.task_status == "success": - return - else: - raise HTTPException( - 500, - detail={ - "msg": f"Unexpected error during task check", - "detail": { - "unknown status": tasks[scenario_id].task_status.task_status, - } - } - ) - -@router.get('/service_types') -def get_service_types(region_id: int) -> list[ServiceType]: - return sts.get_bn_service_types(region_id) - -@router.get('/provision_layer') -@decorators.gdf_to_geojson -def get_provision_layer(project_scenario_id: int, scale_type: em.ScaleType, service_type_id: int, - token: str = Depends(auth.verify_token)): - check_task_evaluation(project_scenario_id) - return es.get_provision_layer(project_scenario_id, scale_type, service_type_id, token) - -@router.get('/provision_data') -def get_provision_data( - project_scenario_id: int, - scale_type: Annotated[em.ScaleTypeModel, Depends(em.ScaleTypeModel)], - token: str = Depends(auth.verify_token) -): - check_task_evaluation(project_scenario_id) - return es.get_provision_data(project_scenario_id, scale_type.scale_type, token) - -@router.get('/transport_layer') -@decorators.gdf_to_geojson -def get_transport_layer(project_scenario_id: int, scale_type: em.ScaleType, token: str = Depends(auth.verify_token)): - check_task_evaluation(project_scenario_id) - return es.get_transport_layer(project_scenario_id, scale_type, token) - -@router.get('/transport_data') -def get_transport_data(project_scenario_id: int, scale_type: em.ScaleType, token: str = Depends(auth.verify_token)): - check_task_evaluation(project_scenario_id) - return es.get_transport_data(project_scenario_id, scale_type, token) - -@router.get('/connectivity_layer') -@decorators.gdf_to_geojson -def get_connectivity_layer(project_scenario_id: int, scale_type: em.ScaleType, token: str = Depends(auth.verify_token)): - check_task_evaluation(project_scenario_id) - return es.get_connectivity_layer(project_scenario_id, scale_type, token) - -@router.get('/connectivity_data') -def get_connectivity_data(project_scenario_id: int, scale_type: em.ScaleType, token: str = Depends(auth.verify_token)): - check_task_evaluation(project_scenario_id) - return es.get_connectivity_data(project_scenario_id, scale_type, token) - -#ToDo rewrite to check token firstly -def check_or_set_status(project_scenario_id: int, token) -> dict: - - scenario_info = get_scenario_info(project_scenario_id, token) - - if task_info := tasks.get(project_scenario_id): - task_date = task_info.task_info.lust_update - if scenario_info.get("updated_at"): - actual_date = datetime.strptime(scenario_info["updated_at"], "%Y-%m-%dT%H:%M:%S.%fZ") - else: - actual_date = datetime.strptime(scenario_info["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ") - if actual_date > task_date: - task_info.task_info.lust_update = actual_date - task_info.task_status.task_status = "pending" - return {"action": "continue"} - match task_info.task_status.task_status: - case "success": - return { - "action": "return", - "msg": "task is already done and up to date", - "task_info": task_info, - } - case "pending": - return { - "action": "return", - "msg": "task is already running", - "task_info": task_info, - } - case"done": - return { - "action": "return", - "msg": "task is done", - "task_info": task_info, - } - case "error": - return { - "action": "return", - "msg": "task failed due to error", - "task_info": task_info, - } - case _: - raise HTTPException(status_code=500, detail="Unknown task status") - else: - project_id = get_project_id(project_scenario_id, token) - project_info = get_all_project_info(project_id, token) - if scenario_info.get("updated_at"): - lust_update = datetime.strptime(scenario_info["updated_at"], "%Y-%m-%dT%H:%M:%S.%fZ") - else: - lust_update = datetime.strptime(scenario_info["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ") - task_info_to_add = TaskInfoSchema( - project_id=project_info["project_id"], - base_scenario_id=project_info["base_scenario"]["id"], - lust_update=lust_update - ) - tasks[project_scenario_id] = TaskSchema( - task_status=TaskStatusSchema(task_status="pending"), - target_scenario_id=project_scenario_id, - task_info=task_info_to_add - ) - return {"action": "continue"} - -def _evaluate_effects_task(project_scenario_id: int, token: str): - - try: - es.evaluate_effects(project_scenario_id, token) - tasks[project_scenario_id].task_status.task_status = "success" - except Exception as e: - logger.error(e) - logger.exception(e) - tasks[project_scenario_id].task_status.task_status = 'error' - tasks[project_scenario_id].task_status.error_info = e.__str__() - -@router.post('/evaluate') -def evaluate(background_tasks: BackgroundTasks, project_scenario_id: int, token: str = Depends(auth.verify_token)): - check_result = check_or_set_status(project_scenario_id, token) - if check_result["action"] == "return": - del check_result["action"] - return check_result - background_tasks.add_task(_evaluate_effects_task, project_scenario_id, token) - return {'task_id' : project_scenario_id } - -@router.delete('/evaluation') -def delete_evaluation(project_scenario_id : int): - try: - es.delete_evaluation(project_scenario_id) - return 'oke' - except: - return 'oops' diff --git a/app/api/routers/effects/effects_models.py b/app/api/routers/effects/effects_models.py deleted file mode 100644 index a3bb0a2..0000000 --- a/app/api/routers/effects/effects_models.py +++ /dev/null @@ -1,26 +0,0 @@ -from enum import Enum -from typing import Literal - -from pydantic import BaseModel, Field - - -class EffectType(Enum): - TRANSPORT='Транспорт' - PROVISION='Обеспеченность' - CONNECTIVITY='Связность' - - -class ScaleType(Enum): - PROJECT='Проект' - CONTEXT='Контекст' - - -class ScaleTypeModel(BaseModel): - scale_type: ScaleType = Field(...) - - -class ChartData(BaseModel): - name : str - before : float - after : float - delta : float \ No newline at end of file diff --git a/app/api/routers/effects/effects_service.py b/app/api/routers/effects/effects_service.py deleted file mode 100644 index e639d00..0000000 --- a/app/api/routers/effects/effects_service.py +++ /dev/null @@ -1,323 +0,0 @@ -import os -import math -from typing import Literal - -import geopandas as gpd -import warnings -import pandas as pd -import numpy as np -from urllib3.exceptions import InsecureRequestWarning -from loguru import logger -from blocksnet import City, WeightedConnectivity, Connectivity, Provision -from ...utils import const -from . import effects_models as em -from .services import blocksnet_service as bs, project_service as ps, service_type_service as sts - -for warning in [pd.errors.PerformanceWarning, RuntimeWarning, pd.errors.SettingWithCopyWarning, InsecureRequestWarning, FutureWarning]: - warnings.filterwarnings(action='ignore', category=warning) - -PROVISION_COLUMNS = ['provision', 'demand', 'demand_within'] - -def _get_file_path(project_scenario_id: int, effect_type: em.EffectType, scale_type: em.ScaleType): - file_path = f'{project_scenario_id}_{effect_type.name}_{scale_type.name}' - return os.path.join(const.DATA_PATH, f'{file_path}.parquet') - -def _get_total_provision(gdf_orig, name): - gdf = gdf_orig.copy() - - for column in PROVISION_COLUMNS: - new_column = column.replace(f'{name}_', '') - gdf = gdf.rename(columns={f'{name}_{column}': new_column}) - - return round(Provision.total(gdf), 2) - -def _sjoin_gdfs(gdf_before : gpd.GeoDataFrame, gdf_after : gpd.GeoDataFrame): - gdf_before = gdf_before.to_crs(gdf_after.crs) - # set i to identify intersections - gdf_before['i'] = gdf_before.index - gdf_after['i'] = gdf_after.index - gdf_sjoin = gdf_after.sjoin(gdf_before, how='left', predicate='intersects', lsuffix='after', rsuffix='before') - # filter nans - gdf_sjoin = gdf_sjoin[~gdf_sjoin['i_before'].isna()] - gdf_sjoin = gdf_sjoin[~gdf_sjoin['i_after'].isna()] - # get intersections area and keep largest - gdf_sjoin['area'] = gdf_sjoin.apply(lambda s : gdf_before.loc[s['i_before'], 'geometry'].intersection(gdf_after.loc[s['i_after'], 'geometry']).area, axis=1) - gdf_sjoin = gdf_sjoin.sort_values(by='area') - return gdf_sjoin.drop_duplicates(subset=['i_after'], keep='last') - -def get_transport_layer(project_scenario_id: int, scale_type: em.ScaleType, token: str): - project_info = ps.get_project_info(project_scenario_id, token) - based_scenario_id = ps.get_based_scenario_id(project_info, token) - - # get both files - before_file_path = _get_file_path(based_scenario_id, em.EffectType.TRANSPORT, scale_type) - after_file_path = _get_file_path(project_scenario_id, em.EffectType.TRANSPORT, scale_type) - - gdf_before = gpd.read_parquet(before_file_path) - gdf_after = gpd.read_parquet(after_file_path) - - # calculate delta - gdf_delta = _sjoin_gdfs(gdf_before, gdf_after) - gdf_delta = gdf_delta.rename(columns={ - 'weighted_connectivity_before': 'before', - 'weighted_connectivity_after': 'after' - })[['geometry', 'before', 'after']] - gdf_delta['delta'] = gdf_delta['after'] - gdf_delta['before'] - - # round digits - for column in ['before', 'after', 'delta']: - gdf_delta[column] = gdf_delta[column].apply(lambda v : round(v,1)) - - return gdf_delta - -def get_transport_data(project_scenario_id: int, scale_type: em.ScaleType, token: str): - project_info = ps.get_project_info(project_scenario_id, token) - based_scenario_id = ps.get_based_scenario_id(project_info, token) - - # get both files - before_file_path = _get_file_path(based_scenario_id, em.EffectType.TRANSPORT, scale_type) - after_file_path = _get_file_path(project_scenario_id, em.EffectType.TRANSPORT, scale_type) - - gdf_before = gpd.read_parquet(before_file_path) - gdf_after = gpd.read_parquet(after_file_path) - - # calculate chart data - names_funcs = { - 'Среднее': np.mean, - 'Медиана': np.median, - 'Мин': np.min, - 'Макс': np.max - } - - items = [] - for name, func in names_funcs.items(): - before = func(gdf_before['weighted_connectivity']) - after = func(gdf_after['weighted_connectivity']) - delta = after - before - items.append({ - 'name': name, - 'before': round(before,1), - 'after': round(after,1), - 'delta': round(delta,1) - }) - return items - -def get_connectivity_layer(project_scenario_id: int, scale_type: em.ScaleType, token: str): - project_info = ps.get_project_info(project_scenario_id, token) - based_scenario_id = ps.get_based_scenario_id(project_info, token) - - # get both files - before_file_path = _get_file_path(based_scenario_id, em.EffectType.CONNECTIVITY, scale_type) - after_file_path = _get_file_path(project_scenario_id, em.EffectType.CONNECTIVITY, scale_type) - - gdf_before = gpd.read_parquet(before_file_path) - gdf_after = gpd.read_parquet(after_file_path) - - # calculate delta - gdf_delta = _sjoin_gdfs(gdf_before, gdf_after) - gdf_delta = gdf_delta.rename(columns={ - 'connectivity_before': 'before', - 'connectivity_after': 'after' - })[['geometry', 'before', 'after']] - gdf_delta['delta'] = gdf_delta['after'] - gdf_delta['before'] - - # round digits - for column in ['before', 'after', 'delta']: - gdf_delta[column] = gdf_delta[column].apply(lambda v : round(v,1)) - - return gdf_delta - -def get_connectivity_data(project_scenario_id: int, scale_type: em.ScaleType, token: str): - project_info = ps.get_project_info(project_scenario_id, token) - based_scenario_id = ps.get_based_scenario_id(project_info, token) - - # get both files - before_file_path = _get_file_path(based_scenario_id, em.EffectType.CONNECTIVITY, scale_type) - after_file_path = _get_file_path(project_scenario_id, em.EffectType.CONNECTIVITY, scale_type) - - gdf_before = gpd.read_parquet(before_file_path) - gdf_after = gpd.read_parquet(after_file_path) - - # calculate chart data - names_funcs = { - 'Среднее': np.mean, - 'Мин': np.min, - 'Макс': np.max - } - - items = [] - for name, func in names_funcs.items(): - before = func(gdf_before['connectivity']) - after = func(gdf_after['connectivity']) - delta = after - before - items.append({ - 'name': name, - 'before': round(before,1), - 'after': round(after,1), - 'delta': round(delta,1) - }) - print(items) - return items - -def get_provision_layer(project_scenario_id: int, scale_type: em.ScaleType, service_type_id: int, token: str): - project_info = ps.get_project_info(project_scenario_id, token) - based_scenario_id = ps.get_based_scenario_id(project_info, token) - - service_types = sts.get_bn_service_types(project_info['region_id']) - service_type = list(filter(lambda x: x.code == str(service_type_id), service_types))[0] - - before_file_path = _get_file_path(based_scenario_id, em.EffectType.PROVISION, scale_type) - after_file_path = _get_file_path(project_scenario_id, em.EffectType.PROVISION, scale_type) - - gdf_before = gpd.read_parquet(before_file_path) - gdf_after = gpd.read_parquet(after_file_path) - - provision_column = f'{service_type.name}_provision' - - # calculate delta - gdf_delta = _sjoin_gdfs(gdf_before, gdf_after) - gdf_delta = gdf_delta.rename(columns={ - f'{provision_column}_before': 'before', - f'{provision_column}_after': 'after' - })[['geometry', 'before', 'after']] - gdf_delta['delta'] = gdf_delta['after'] - gdf_delta['before'] - - for column in ['before', 'after', 'delta']: - gdf_delta[column] = gdf_delta[column].apply(lambda v : round(v,2)) - - return gdf_delta - - -def get_provision_data( - project_scenario_id: int, - scale_type: Literal["Проект", "Контекст"], - token: str -) -> list[em.ChartData]: - project_info = ps.get_project_info(project_scenario_id, token) - based_scenario_id = ps.get_based_scenario_id(project_info, token) - - before_file_path = _get_file_path(based_scenario_id, em.EffectType.PROVISION, scale_type) - after_file_path = _get_file_path(project_scenario_id, em.EffectType.PROVISION, scale_type) - - gdf_before = gpd.read_parquet(before_file_path) - gdf_after = gpd.read_parquet(after_file_path) - - service_types = sts.get_bn_service_types(project_info['region_id']) - results = [] - for st in service_types: - name = st.name - - before = _get_total_provision(gdf_before, name) - after = _get_total_provision(gdf_after, name) - delta = after - before - - results.append({ - 'name': name, - 'before': round(before,2) if not math.isnan(before) else None, - 'after': round(after,2) if not math.isnan(after) else None, - 'delta': round(delta,2) if not math.isnan(delta) else 0, - }) - return results - -def _evaluate_transport(project_scenario_id: int, city_model: City, scale: em.ScaleType): - logger.info('Evaluating transport') - conn = WeightedConnectivity(city_model=city_model, verbose=False) - conn_gdf = conn.calculate() - file_path = _get_file_path(project_scenario_id, em.EffectType.TRANSPORT, scale) - conn_gdf.to_parquet(file_path) - logger.success('Transport successfully evaluated!') - -def _evaluate_connectivity(project_scenario_id: int, city_model: City, scale: em.ScaleType): - logger.info('Evaluating connectivity') - conn = Connectivity(city_model=city_model, verbose=False) - conn_gdf = conn.calculate() - conn_gdf['connectivity'] = conn_gdf['connectivity'].astype('float32') - conn_gdf['connectivity'] = conn_gdf['connectivity'].apply(lambda v : np.nan if np.isinf(v) else v) - file_path = _get_file_path(project_scenario_id, em.EffectType.CONNECTIVITY, scale) - conn_gdf.to_parquet(file_path) - logger.success('Connectivity successfully evaluated!') - -def _evaluate_provision(project_scenario_id: int, city_model: City, scale: em.ScaleType): - logger.info('Evaluating provision') - blocks_gdf = city_model.get_blocks_gdf()[['geometry']] - - for st in city_model.service_types: - prov = Provision(city_model=city_model, verbose=False) - prov_gdf = prov.calculate(st) - for column in PROVISION_COLUMNS: - blocks_gdf[f'{st.name}_{column}'] = prov_gdf[column] - - file_path = _get_file_path(project_scenario_id, em.EffectType.PROVISION, scale) - blocks_gdf.to_parquet(file_path) - logger.success('Provision successfully evaluated!') - -def _evaluation_exists(project_scenario_id : int, token : str): - exists = True - for effect_type in list(em.EffectType): - for scale_type in list(em.ScaleType): - file_path = _get_file_path(project_scenario_id, effect_type, scale_type) - if not os.path.exists(file_path): - exists = False - return exists - -def delete_evaluation(project_scenario_id : int): - for effect_type in list(em.EffectType): - for scale_type in list(em.ScaleType): - file_path = _get_file_path(project_scenario_id, effect_type, scale_type) - if os.path.exists(file_path): - os.remove(file_path) - -def evaluate_effects(project_scenario_id : int, token: str, reevaluate : bool = True): - logger.info(f'Fetching {project_scenario_id} project info') - - project_info = ps.get_project_info(project_scenario_id, token) - based_scenario_id = ps.get_based_scenario_id(project_info, token) - # if scenario isnt based, evaluate the based scenario - if project_scenario_id != based_scenario_id: - evaluate_effects(based_scenario_id, token, reevaluate=False) - - # if scenario exists and doesnt require reevaluation, we return - exists = _evaluation_exists(project_scenario_id, token) - if exists and not reevaluate: - logger.info(f'{project_scenario_id} evaluation already exists') - return - - logger.info('Fetching region service types') - service_types = sts.get_bn_service_types(project_info['region_id']) - logger.info('Fetching physical object types') - physical_object_types = ps.get_physical_object_types() - logger.info('Fetching scenario objects') - scenario_gdf = ps.get_scenario_objects(project_scenario_id, token, project_info['project_id']) - - logger.info('Fetching project model') - project_model = bs.fetch_city_model( - project_info=project_info, - project_scenario_id=project_scenario_id, - service_types=service_types, - physical_object_types=physical_object_types, - scenario_gdf=scenario_gdf, - scale=em.ScaleType.PROJECT - ) - - logger.info('Fetching context model') - context_model = bs.fetch_city_model( - project_info=project_info, - project_scenario_id=project_scenario_id, - service_types=service_types, - physical_object_types=physical_object_types, - scenario_gdf=scenario_gdf, - scale=em.ScaleType.CONTEXT - ) - - # project_model.to_pickle(f'{project_scenario_id}_project.pickle') - # context_model.to_pickle(f'{project_scenario_id}_context.pickle') - - _evaluate_transport(project_scenario_id, project_model, em.ScaleType.PROJECT) - _evaluate_connectivity(project_scenario_id, project_model, em.ScaleType.PROJECT) - _evaluate_provision(project_scenario_id, project_model, em.ScaleType.PROJECT) - - _evaluate_transport(project_scenario_id, context_model, em.ScaleType.CONTEXT) - _evaluate_connectivity(project_scenario_id, context_model, em.ScaleType.CONTEXT) - _evaluate_provision(project_scenario_id, context_model, em.ScaleType.CONTEXT) - - logger.success(f'{project_scenario_id} evaluated successfully') diff --git a/app/api/routers/effects/services/blocksnet_service.py b/app/api/routers/effects/services/blocksnet_service.py deleted file mode 100644 index e01c585..0000000 --- a/app/api/routers/effects/services/blocksnet_service.py +++ /dev/null @@ -1,282 +0,0 @@ -import random - -import geopandas as gpd -import pandas as pd -import momepy -import networkx as nx -from blocksnet.models.city import Building, BlockService, Block -from pyproj.crs import CRS -from blocksnet import (AccessibilityProcessor, BlocksGenerator, City, ServiceType, LandUseProcessor) -from app.api.utils import const -from .. import effects_models as em -from app.api.routers.effects.services.service_type_service import get_zones - -SPEED_M_MIN = 60 * 1000 / 60 -GAP_TOLERANCE = 5 - -def _get_geoms_by_function(function_name, physical_object_types, scenario_gdf): - valid_type_ids = { - d['physical_object_type_id'] - for d in physical_object_types - if function_name in d['physical_object_function']['name'] - } - return scenario_gdf[scenario_gdf['physical_objects'].apply( - lambda x: any(d.get('physical_object_type').get('id') in valid_type_ids for d in x))] - -def _get_water(scenario_gdf, physical_object_types): - water = _get_geoms_by_function('Водный объект', physical_object_types, scenario_gdf) - water = water.explode(index_parts=True) - water = water.reset_index() - return water - - -def _get_roads(scenario_gdf, physical_object_types): - roads = _get_geoms_by_function('Дорога', physical_object_types, scenario_gdf) - merged = roads.unary_union - if merged.geom_type == 'MultiLineString': - roads = gpd.GeoDataFrame(geometry=list(merged.geoms), crs=roads.crs) - else: - roads = gpd.GeoDataFrame(geometry=[merged], crs=roads.crs) - roads = roads.explode(index_parts=False).reset_index(drop=True) - roads.geometry = momepy.close_gaps(roads, GAP_TOLERANCE) - roads = roads[roads.geom_type.isin(['LineString'])] - return roads - -def _get_geoms_by_object_type_id(scenario_gdf, object_type_id): - return scenario_gdf[scenario_gdf['physical_objects'].apply(lambda x: any(d.get('physical_object_type').get('id') == object_type_id for d in x))] - -def _get_buildings(scenario_gdf, physical_object_types): - LIVING_BUILDINGS_ID = 4 - NON_LIVING_BUILDINGS_ID = 5 - living_building = _get_geoms_by_object_type_id(scenario_gdf, LIVING_BUILDINGS_ID) - living_building['is_living'] = True - # print(living_building) - non_living_buildings = _get_geoms_by_object_type_id(scenario_gdf, NON_LIVING_BUILDINGS_ID) - non_living_buildings['is_living'] = False - - buildings = gpd.GeoDataFrame( pd.concat( [living_building, non_living_buildings], ignore_index=True) ) - # print(buildings) - # buildings = _get_geoms_by_function('Здание', physical_object_types, scenario_gdf) - buildings['number_of_floors'] = 1 - # buildings['is_living'] = True - buildings['footprint_area'] = buildings.geometry.area - buildings['build_floor_area'] = buildings['footprint_area'] * buildings['number_of_floors'] - buildings['living_area'] = buildings.geometry.area - buildings['population'] = 0 - buildings['population'][buildings['is_living']] = 100 - buildings = buildings.reset_index() - buildings = buildings[buildings.geometry.type != 'Point'] - return buildings[['geometry', 'number_of_floors', 'footprint_area', 'build_floor_area', 'living_area', 'population']] - - -def _get_services(scenario_gdf) -> gpd.GeoDataFrame | None: - - def extract_services(row): - if isinstance(row['services'], list) and len(row['services']) > 0: - return [ - { - 'service_id': service['service_id'], - 'service_type_id': service['service_type']['id'], - 'name': service['name'], - 'capacity_real': service['capacity'], - 'geometry': row['geometry'] # Сохраняем геометрию - } - for service in row['services'] - if service.get('capacity') is not None and service['capacity'] > 0 - ] - return [] - - extracted_data = [] - for _, row in scenario_gdf.iterrows(): - extracted_data.extend(extract_services(row)) - - if len(extracted_data) == 0: - return None - - services_gdf = gpd.GeoDataFrame(extracted_data, crs=scenario_gdf.crs) - - services_gdf['capacity'] = services_gdf['capacity_real'] - services_gdf = services_gdf[['geometry', 'service_id', 'service_type_id', 'name', 'capacity']] - - services_gdf['area'] = services_gdf.geometry.area - services_gdf['area'] = services_gdf['area'].apply(lambda a : a if a > 1 else 1) - # services_gdf.loc[services_gdf.area == 0, 'area'] = 100 - # services_gdf['area'] = services_gdf - - return services_gdf - - -def _roads_to_graph(roads): - roads.to_parquet(f'roads_{len(roads)}.parquet') - graph = momepy.gdf_to_nx(roads) - graph.graph['crs'] = CRS.to_epsg(roads.crs) - graph = nx.DiGraph(graph) - for _, _, data in graph.edges(data=True): - geometry = data['geometry'] - data['time_min'] = geometry.length / SPEED_M_MIN - # data['weight'] = data['mm_len'] / 1000 / 1000 - # data['length_meter'] = data['mm_len'] / 1000 - for n, data in graph.nodes(data=True): - graph.nodes[n]['x'] = n[0] # Assign X coordinate to node - graph.nodes[n]['y'] = n[1] - - return graph - -def _get_boundaries(project_info : dict, scale : em.ScaleType) -> gpd.GeoDataFrame: - if scale == em.ScaleType.PROJECT: - boundaries = gpd.GeoDataFrame(geometry=[project_info['geometry']]) - else: - boundaries = gpd.GeoDataFrame(geometry=[project_info['context']]) - boundaries = boundaries.set_crs(const.DEFAULT_CRS) - local_crs = boundaries.estimate_utm_crs() - return boundaries.to_crs(local_crs) - -def _generate_blocks(boundaries_gdf : gpd.GeoDataFrame, roads_gdf : gpd.GeoDataFrame, scenario_gdf : gpd.GeoDataFrame, physical_object_types : dict) -> gpd.GeoDataFrame: - water_gdf = _get_water(scenario_gdf, physical_object_types).to_crs(boundaries_gdf.crs) - - blocks_generator = BlocksGenerator( - boundaries=boundaries_gdf, - roads=roads_gdf if len(roads_gdf)>0 else None, - water=water_gdf if len(water_gdf)>0 else None - ) - blocks = blocks_generator.run() - blocks['land_use'] = None # TODO ЗАмнить на норм land_use?? >> здесь должен быть этап определения лендюза по тому что есть в бд - return blocks - -def _calculate_acc_mx(blocks_gdf : gpd.GeoDataFrame, roads_gdf : gpd.GeoDataFrame) -> pd.DataFrame: - accessibility_processor = AccessibilityProcessor(blocks=blocks_gdf) - graph = _roads_to_graph(roads_gdf) - accessibility_matrix = accessibility_processor.get_accessibility_matrix(graph=graph) - return accessibility_matrix - -def _update_buildings(city : City, scenario_gdf : gpd.GeoDataFrame, physical_object_types : dict) -> None: - buildings_gdf = _get_buildings(scenario_gdf, physical_object_types).copy().to_crs(city.crs) - buildings_gdf = buildings_gdf[buildings_gdf.geom_type.isin(['Polygon', 'MultiPolygon'])] - city.update_buildings(buildings_gdf) - -def _update_services(city : City, service_types : list[ServiceType], scenario_gdf : gpd.GeoDataFrame) -> None: - # reset service types - city._service_types = {} - for st in service_types: - city.add_service_type(st) - # filter services and add to the model if exist - services_gdf = _get_services(scenario_gdf) - if services_gdf is None: - return - services_gdf = services_gdf.to_crs(city.crs).copy() - service_type_dict = {service.code: service for service in service_types} - for service_type_code, st_gdf in services_gdf.groupby('service_type_id'): - gdf = st_gdf.copy().to_crs(city.crs) - gdf.geometry = gdf.representative_point() - service_type = service_type_dict.get(str(service_type_code), None) - if service_type is not None: - city.update_services(service_type, gdf) - -# ToDo handle no service case -def _update_landuse(city: City, zones: gpd.GeoDataFrame): - - def _process_zones(zones: gpd.GeoDataFrame): - db_zones = list(const.mapping.keys()) - return zones[zones.zone.isin(db_zones)] - - def _get_blocks_to_process(blocks, zones): - lup = LandUseProcessor(blocks=blocks, zones=zones, zone_to_land_use=const.mapping) - blocks_with_lu = lup.run(0.5) - return blocks_with_lu[~blocks_with_lu['land_use'].isna()] - - def _update_non_residential_block(block: Block): - for building in block.buildings: - building.population = 0 - - def _update_residential_block(block: Block, pop_per_ha: float, service_types: list[ServiceType]): - pop_per_m2 = pop_per_ha / SQ_M_IN_HA - area = block.site_area - population = round(pop_per_m2 * area) - # удаляем здания и сервисы - block.buildings = [] - block.services = [] - # добавляем dummy здание и даем ему наше население - dummy_building = Building( - block=block, - geometry=block.geometry.buffer(-0.01), - population=population, - **const.DUMMY_BUILDING_PARAMS - ) - block.buildings.append(dummy_building) - # добавляем по каждому типу сервиса большой dummy_service - for service_type in service_types: - capacity = service_type.calculate_in_need(population) - dummy_service = BlockService( - service_type=service_type, - capacity=capacity, - is_integrated=False, - block=block, - geometry=block.geometry.representative_point().buffer(0.01), - ) - block.services.append(dummy_service) - - def _update_block(block: Block, zone: str, service_types: list[ServiceType]): - if zone in const.residential_mapping: # если квартал жилой - pop_min, pop_max = const.residential_mapping[zone] - _update_residential_block(block, random.randint(pop_min, pop_max), service_types) - else: - _update_non_residential_block(block) - - def update_blocks(city: City, blocks_with_lu: gpd.GeoDataFrame, service_types: list[ServiceType]): - for block_id, row in blocks_with_lu.iterrows(): - zone = row['zone'] - block = city[block_id] - _update_block(block, zone, service_types) - - LU_SHARE = 0.5 - SQ_M_IN_HA = 10_000 - zones = _process_zones(zones) - blocks = city.get_blocks_gdf(True) - zones.to_crs(blocks.crs, inplace=True) - blocks_with_lu = _get_blocks_to_process(blocks, zones) - residential_sts = [city[st_name] for st_name in ['school', 'kindergarten', 'polyclinic'] if st_name in city.services] - update_blocks(city, blocks_with_lu, residential_sts) - return city - -# ToDo move zones to preprocessing and pass them to the function -def fetch_city_model( - project_info: dict, - project_scenario_id: int, - scenario_gdf: gpd.GeoDataFrame, - physical_object_types: dict, - service_types: list, - scale: em.ScaleType -): - - # getting boundaries for our model - boundaries_gdf = _get_boundaries(project_info, scale) - local_crs = boundaries_gdf.crs - - # clipping scenario objects - scenario_gdf = scenario_gdf.to_crs(local_crs) - scenario_gdf = scenario_gdf.clip(boundaries_gdf) - - roads_gdf = _get_roads(scenario_gdf, physical_object_types) - - # generating blocks layer - blocks_gdf = _generate_blocks(boundaries_gdf, roads_gdf, scenario_gdf, physical_object_types) - - # calculating accessibility matrix - acc_mx = _calculate_acc_mx(blocks_gdf, roads_gdf) - - # initializing city model - city = City( - blocks=blocks_gdf, - acc_mx=acc_mx, - ) - - # updating buildings layer - _update_buildings(city, scenario_gdf, physical_object_types) - - # updating service types - _update_services(city, service_types, scenario_gdf) - - zones = get_zones(project_scenario_id) - city = _update_landuse(city, zones) - - return city diff --git a/app/api/routers/effects/services/project_service.py b/app/api/routers/effects/services/project_service.py deleted file mode 100644 index 25ab2c3..0000000 --- a/app/api/routers/effects/services/project_service.py +++ /dev/null @@ -1,103 +0,0 @@ -import json - -import requests -import shapely -import geopandas as gpd -from app.api.utils import const -from loguru import logger -from .. import effects_models as em - -def get_scenarios_by_project_id(project_id : int, token : str) -> dict: - res = requests.get(const.URBAN_API + f'/api/v1/projects/{project_id}/scenarios', headers={'Authorization': f'Bearer {token}'}) - res.raise_for_status() - return res.json() - -def get_based_scenario_id(project_info, token): - scenarios = get_scenarios_by_project_id(project_info['project_id'], token) - based_scenario_id = list(filter(lambda x: x['is_based'], scenarios))[0]['scenario_id'] - return based_scenario_id - -def _get_scenario_objects( - scenario_id : int, - token : str, - scale_type : em.ScaleType, - project_id: int, - physical_object_type_id : int | None = None, - service_type_id : int | None = None, - physical_object_function_id : int | None = None, - urban_function_id : int | None = None, - ): - headers = {'Authorization': f'Bearer {token}'} - if scale_type == em.ScaleType.CONTEXT: - url = const.URBAN_API + f'/api/v1/projects/{project_id}/context/geometries_with_all_objects' - else: - url = const.URBAN_API + f'/api/v1/scenarios/{scenario_id}/geometries_with_all_objects' - res = requests.get(url, params={ - 'physical_object_type_id': physical_object_type_id, - 'service_type_id': service_type_id, - 'physical_object_function_id' : physical_object_function_id, - 'urban_function_id' : urban_function_id - }, headers=headers) - return res.json() - -def get_scenario_objects(scenario_id : int, token : str, project_id: int, *args, **kwargs) -> gpd.GeoDataFrame: - collections = [_get_scenario_objects(scenario_id, token, scale_type, project_id, *args, **kwargs) for scale_type in list(em.ScaleType)] - features = [feature for collection in collections for feature in collection['features']] - gdf = gpd.GeoDataFrame.from_features(features).set_crs(const.DEFAULT_CRS) - return gdf.drop_duplicates(subset=['object_geometry_id']) - -def get_physical_object_types(): - res = requests.get(const.URBAN_API + f'/api/v1/physical_object_types', verify=False) - return res.json() - -def _get_scenario_by_id(scenario_id : int, token : str) -> dict: - res = requests.get(const.URBAN_API + f'/api/v1/scenarios/{scenario_id}', headers={'Authorization': f'Bearer {token}'}) - res.raise_for_status() - return res.json() - -def _get_project_territory_by_id(project_id : int, token : str) -> dict: - res = requests.get(const.URBAN_API + f'/api/v1/projects/{project_id}/territory', headers={'Authorization': f'Bearer {token}'}) - res.raise_for_status() - return res.json() - -def _get_project_by_id(project_id : int, token : str) -> dict: - res = requests.get(const.URBAN_API + f'/api/v1/projects/{project_id}', headers={'Authorization': f'Bearer {token}'}) - res.raise_for_status() - return res.json() - -def _get_territory_by_id(territory_id : int) -> dict: - res = requests.get(const.URBAN_API + f'/api/v1/territory/{territory_id}') - res.raise_for_status() - return res.json() - -def _get_context_geometry(territories_ids : list[int]): - geometries = [] - for territory_id in territories_ids: - territory = _get_territory_by_id(territory_id) - geom_json = json.dumps(territory['geometry']) - geometry = shapely.from_geojson(geom_json) - geometries.append(geometry) - return shapely.unary_union(geometries) - -def get_project_info(project_scenario_id : int, token : str) -> dict: - """ - Fetch project data - """ - scenario_info = _get_scenario_by_id(project_scenario_id, token) - is_based = scenario_info['is_based'] # является ли сценарий базовым для проекта - project_id = scenario_info['project']['project_id'] - - project_info = _get_project_by_id(project_id, token) - context_ids = project_info['properties']['context'] - - project_territory = _get_project_territory_by_id(project_id, token) - region_id = project_territory['project']['region']['id'] - project_geometry = json.dumps(project_territory['geometry']) - - return { - 'project_id' : project_id, - 'region_id': region_id, - 'is_based': is_based, - 'geometry': shapely.from_geojson(project_geometry), - 'context': _get_context_geometry(context_ids) - } diff --git a/app/api/routers/effects/services/service_type_service.py b/app/api/routers/effects/services/service_type_service.py deleted file mode 100644 index 1e14866..0000000 --- a/app/api/routers/effects/services/service_type_service.py +++ /dev/null @@ -1,86 +0,0 @@ -import pandas as pd -import geopandas as gpd -import requests -from fastapi import HTTPException - -from app.api.utils import const -from blocksnet.models import ServiceType - -def _get_service_types(region_id : int) -> pd.DataFrame: - res = requests.get(const.URBAN_API + f'/api/v1/territory/{region_id}/service_types') - res.raise_for_status() - df = pd.DataFrame(res.json()) - return df.set_index('service_type_id') - -def _get_normatives(region_id : int) -> pd.DataFrame: - res = requests.get(const.URBAN_API + f'/api/v1/territory/{region_id}/normatives', params={'year': const.NORMATIVES_YEAR}) - res.raise_for_status() - df = pd.DataFrame(res.json()) - df['service_type_id'] = df['service_type'].apply(lambda st : st['id']) - return df.set_index('service_type_id') - -def get_bn_service_types(region_id : int) -> list[ServiceType]: - """ - Befriend normatives and service types into BlocksNet format - """ - db_service_types_df = _get_service_types(region_id) - db_normatives_df = _get_normatives(region_id) - service_types_df = db_service_types_df.merge(db_normatives_df, left_index=True, right_index=True) - # filter by minutes not null - service_types_df = service_types_df[~service_types_df['time_availability_minutes'].isna()] - # filter by capacity not null - service_types_df = service_types_df[~service_types_df['services_capacity_per_1000_normative'].isna()] - - service_types = [] - for _, row in service_types_df.iterrows(): - service_type = ServiceType( - code=row['code'], - name=row['name'], - accessibility=row['time_availability_minutes'], - demand=row['services_capacity_per_1000_normative'], - land_use = [], #TODO - bricks = [] #TODO - ) - service_types.append(service_type) - return service_types - -def get_zones(scenario_id: int) -> gpd.GeoDataFrame: - """ - - Args: - scenario_id (int): scenario id - - Returns: - gpd.GeoDataFrame: geodataframe with zones - - """ - - def _form_source_params(sources: list[dict]) -> dict: - source_names = [i["source"] for i in sources] - source_data_df = pd.DataFrame(sources) - if "PZZ" in source_names: - return source_data_df.loc[ - source_data_df[source_data_df["source"] == "PZZ"]["year"].idxmax() - ].to_dict() - elif "OSM" in source_names: - return source_data_df.loc[ - source_data_df[source_data_df["source"] == "OSM"]["year"].idxmax() - ].to_dict() - elif "User" in source_names: - return source_data_df.loc[ - source_data_df[source_data_df["source"] == "User"]["year"].idxmax() - ].to_dict() - else: - raise HTTPException(status_code=404, detail="Source type not found") - - zones_sources = requests.get( - url=f"{const.URBAN_API}/api/v1/scenarios/{scenario_id}/functional_zone_sources", - ) - zones_params_request = _form_source_params(zones_sources.json()) - target_zones = requests.get( - url=f"{const.URBAN_API}/api/v1/scenarios/{scenario_id}/functional_zones", - params=zones_params_request, - ) - target_zones_gdf = gpd.GeoDataFrame.from_features(target_zones.json(), crs=4326) - target_zones_gdf["zone"] = target_zones_gdf["functional_zone_type"].apply(lambda x: x.get("name")) - return target_zones_gdf diff --git a/app/api/routers/effects/services/task_api_service.py b/app/api/routers/effects/services/task_api_service.py deleted file mode 100644 index e02e94d..0000000 --- a/app/api/routers/effects/services/task_api_service.py +++ /dev/null @@ -1,46 +0,0 @@ -from datetime import datetime -from typing import Optional - -import requests -from fastapi import HTTPException - -from app.api.utils.const import URBAN_API -from app.api.routers.effects.task_schema import TaskInfoSchema - - - -def get_headers(token: Optional[str] = None) -> dict[str, str] | None: - if token: - headers = { - "Authorization": f"Bearer {token}" - } - return headers - return None - -def get_project_id(scenario_id: int, token: Optional[str] = None) -> int: - url = f"{URBAN_API}/api/v1/scenarios/{scenario_id}" - headers = get_headers(token) - - response = requests.get(url, headers=headers) - if response.status_code != 200: - raise HTTPException(response.status_code, response.text) - return response.json()["project"]["project_id"] - -def get_all_project_info(project_id: int, token: Optional[str] = None) -> dict: - url = f"{URBAN_API}/api/v1/projects/{project_id}" - headers = get_headers(token) - - response = requests.get(url, headers=headers) - if response.status_code != 200: - raise HTTPException(response.status_code, response.text) - result = response.json() - return result - -def get_scenario_info(target_scenario_id: int, token) -> dict: - - url = f"{URBAN_API}/api/v1/scenarios/{target_scenario_id}" - headers = get_headers(token) - response = requests.get(url, headers=headers) - if response.status_code != 200: - raise HTTPException(response.status_code, response.text) - return response.json() diff --git a/app/api/routers/effects/task_schema.py b/app/api/routers/effects/task_schema.py deleted file mode 100644 index 073fcbf..0000000 --- a/app/api/routers/effects/task_schema.py +++ /dev/null @@ -1,22 +0,0 @@ -from datetime import datetime -from typing import Literal, Optional - -from pydantic import BaseModel - - -class TaskStatusSchema(BaseModel): - - task_status: Literal["pending", "success", "error", ] - error_info: Optional[str] = None - -class TaskInfoSchema(BaseModel): - - project_id: int - base_scenario_id: int - lust_update: datetime - - -class TaskSchema(BaseModel): - task_status: TaskStatusSchema - target_scenario_id: int - task_info: Optional[TaskInfoSchema] = None diff --git a/app/api/utils/const.py b/app/api/utils/const.py deleted file mode 100644 index bc22328..0000000 --- a/app/api/utils/const.py +++ /dev/null @@ -1,63 +0,0 @@ -import os -from iduconfig import Config -from blocksnet import LandUse - - -config = Config() - -API_TITLE = 'Effects API' -API_DESCRIPTION = 'API for assessing territory transformation effects' -EVALUATION_RESPONSE_MESSAGE = 'Evaluation started' -DEFAULT_CRS = 4326 -NORMATIVES_YEAR = 2024 - -if config.get("DATA_PATH"): - DATA_PATH = os.path.abspath('data') -else: - # DATA_PATH = 'app/data' - raise Exception('No DATA_PATH in env file') -if config.get("URBAN_API"): - URBAN_API = config.get("URBAN_API") -else: - # URBAN_API = 'http://10.32.1.107:5300' - raise Exception('No URBAN_API in env file') - - -LU_SHARE = 0.5 -SQ_M_IN_HA = 10_000 - - -mapping = { - 'residential': LandUse.RESIDENTIAL, - 'recreation': LandUse.RECREATION, - 'special': LandUse.SPECIAL, - 'industrial': LandUse.INDUSTRIAL, - 'agriculture': LandUse.AGRICULTURE, - 'transport': LandUse.TRANSPORT, - 'business': LandUse.BUSINESS, - # 'basic': , - 'residential_individual': LandUse.RESIDENTIAL, - 'residential_lowrise': LandUse.RESIDENTIAL, - 'residential_midrise': LandUse.RESIDENTIAL, - 'residential_multistorey': LandUse.RESIDENTIAL, - # 'unknown': '', - # 'mixed_use': '' - } - - -residential_mapping = { - 'residential': (250,350), - 'residential_individual': (30,35), - 'residential_lowrise': (50,150), - 'residential_midrise': (250,350), - 'residential_multistorey': (350,450), -} - -DUMMY_BUILDING_PARAMS = { - 'id' : -1, - 'build_floor_area' : 0, - 'living_area' : 0, - 'non_living_area' : 0, - 'footprint_area' : 0, - 'number_of_floors' : 1, -} diff --git a/app/api/utils/decorators.py b/app/api/utils/decorators.py deleted file mode 100644 index c2d6254..0000000 --- a/app/api/utils/decorators.py +++ /dev/null @@ -1,47 +0,0 @@ -import json -from functools import wraps - -import geopandas as gpd - -# from shapely import set_precision - -PRECISION_GRID_SIZE = 0.0001 - -def gdf_to_geojson(func): - """ - A decorator that processes a GeoDataFrame returned by an asynchronous function and converts it to GeoJSON format with specified CRS and geometry precision. - - This decorator takes an asynchronous function that returns a GeoDataFrame, transforms its coordinate system to EPSG:4326, - and optionally adjusts the geometry precision based on a defined grid size. The final result is a GeoJSON-compatible dictionary. - - Parameters - ---------- - func : Callable - An asynchronous function that returns a GeoDataFrame. - - Returns - ------- - Callable - A wrapped asynchronous function that returns the GeoDataFrame as a GeoJSON-compatible dictionary. - - Notes - ----- - - The decorator converts the GeoDataFrame to EPSG:4326 (WGS 84). - - Geometry precision is adjusted using the `set_precision` function and a grid size defined by `PRECISION_GRID_SIZE`. - - Commented-out code allows optional rounding for columns containing 'provision' in their name, if enabled. - - Examples - -------- - ``` - @gdf_to_geojson - async def get_geodata(): - # returns a GeoDataFrame - return gdf - ``` - """ - @wraps(func) - def process(*args, **kwargs): - gdf = func(*args, **kwargs).to_crs(4326) - # gdf.geometry = set_precision(gdf.geometry, grid_size=PRECISION_GRID_SIZE) - return json.loads(gdf.to_json()) - return process \ No newline at end of file diff --git a/app/api/__init__.py b/app/broker_handlers/__init__.py similarity index 100% rename from app/api/__init__.py rename to app/broker_handlers/__init__.py diff --git a/app/broker_handlers/cache_invalidation.py b/app/broker_handlers/cache_invalidation.py new file mode 100644 index 0000000..0bc0164 --- /dev/null +++ b/app/broker_handlers/cache_invalidation.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import asyncio +from dataclasses import dataclass +from typing import Any, Callable, Iterable + +from confluent_kafka import Message +from loguru import logger +from otteroad import KafkaProducerClient +import time + +from app.prometheus.metrics import CACHE_INVALIDATION_EVENTS_TOTAL, CACHE_INVALIDATION_ERROR_TOTAL, \ + CACHE_INVALIDATION_DURATION_SECONDS, CACHE_INVALIDATION_SUCCESS_TOTAL + +from app.common.caching.caching_service import FileCache + + +@dataclass(frozen=True) +class CacheInvalidationRule: + """ + Cache invalidation rule. + + method: cache method name (e.g. "social_economical_metrics"). + owner_id_getter: function that returns owner_id from event (e.g. project_id or scenario_id). + """ + method: str + owner_id_getter: Callable[[Any], int] + + +class CacheInvalidationService: + """Applies cache invalidation rules using FileCache.""" + def __init__(self, cache: FileCache) -> None: + self._cache = cache + + def invalidate(self, event: Any, rules: Iterable[CacheInvalidationRule]) -> int: + """ + Invalidate cache for an event using given rules. + + Returns: + Total number of deleted files. + """ + total_deleted = 0 + for rule in rules: + owner_id = int(rule.owner_id_getter(event)) + deleted = self._cache.delete_all(rule.method, owner_id) + total_deleted += deleted + + logger.info( + f"Cache invalidation rule applied: method={rule.method} owner_id={owner_id} deleted_files={deleted}" + ) + + return total_deleted + + +class CacheInvalidationMixin: + """ + Shared handler logic for cache invalidation. + """ + def __init__( + self, + invalidation_service: CacheInvalidationService, + producer: KafkaProducerClient, + rules: list[CacheInvalidationRule], + ) -> None: + self._invalidation_service = invalidation_service + self._producer = producer + self._rules = rules + + async def _handle_cache_invalidation(self, event: Any, ctx: Message | None = None) -> None: + CACHE_INVALIDATION_EVENTS_TOTAL.inc() + start_time = time.perf_counter() + + logger.info(f"Received event: type={type(event)}") + logger.info( + f"Invalidate cache for project_id={getattr(event, 'project_id', None)} " + f"scenario_id={getattr(event, 'scenario_id', None)}" + ) + total_deleted = await asyncio.to_thread( + self._invalidation_service.invalidate, + event, + self._rules, + ) + CACHE_INVALIDATION_SUCCESS_TOTAL.inc() + + logger.info(f"Cache invalidation completed: deleted_files={total_deleted}") + duration = time.perf_counter() - start_time + CACHE_INVALIDATION_DURATION_SECONDS.observe(duration) + return None + diff --git a/app/broker_handlers/scenario_updated_handler.py b/app/broker_handlers/scenario_updated_handler.py new file mode 100644 index 0000000..b5a80b5 --- /dev/null +++ b/app/broker_handlers/scenario_updated_handler.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from confluent_kafka import Message +from otteroad import BaseMessageHandler, KafkaProducerClient +from otteroad.consumer.handlers.base import EventT +from otteroad.models.scenario_events.projects.ScenarioObjectsUpdated import ScenarioObjectsUpdated +from otteroad.models.scenario_events.projects.ScenarioZonesUpdated import ScenarioZonesUpdated + +from app.broker_handlers.cache_invalidation import ( + CacheInvalidationMixin, + CacheInvalidationRule, + CacheInvalidationService, +) + + +_SOCIAL_RULES = [ + CacheInvalidationRule(method="social_economical_metrics", owner_id_getter=lambda e: e.project_id), + CacheInvalidationRule(method="territory_transformation", owner_id_getter=lambda e: e.scenario_id) +] + + +class ScenarioObjectsUpdatedHandler(BaseMessageHandler[ScenarioObjectsUpdated], CacheInvalidationMixin): + def __init__(self, invalidation_service: CacheInvalidationService, producer: KafkaProducerClient) -> None: + CacheInvalidationMixin.__init__(self, invalidation_service, producer, _SOCIAL_RULES) + BaseMessageHandler.__init__(self) + + async def on_startup(self): + pass + + async def on_shutdown(self): + pass + + async def handle(self, event: EventT, ctx: Message = None): + return await self._handle_cache_invalidation(event, ctx) + + +class ScenarioZonesUpdatedHandler(BaseMessageHandler[ScenarioZonesUpdated], CacheInvalidationMixin): + def __init__(self, invalidation_service: CacheInvalidationService, producer: KafkaProducerClient) -> None: + CacheInvalidationMixin.__init__(self, invalidation_service, producer, _SOCIAL_RULES) + BaseMessageHandler.__init__(self) + + async def on_startup(self): + pass + + async def on_shutdown(self): + pass + + async def handle(self, event: EventT, ctx: Message = None): + return await self._handle_cache_invalidation(event, ctx) diff --git a/app/api/routers/__init__.py b/app/clients/__init__.py similarity index 100% rename from app/api/routers/__init__.py rename to app/clients/__init__.py diff --git a/app/clients/urban_api_client.py b/app/clients/urban_api_client.py new file mode 100644 index 0000000..58db271 --- /dev/null +++ b/app/clients/urban_api_client.py @@ -0,0 +1,418 @@ +import json +from typing import Any, Dict, List, Literal + +import geopandas as gpd +import pandas as pd +import shapely +from loguru import logger + +from app.common.api_handlers.json_api_handler import JSONAPIHandler +from app.common.exceptions.http_exception_wrapper import http_exception + + +class UrbanAPIClient: + + def __init__(self, json_handler: JSONAPIHandler) -> None: + self.json_handler = json_handler + self.__name__ = "UrbanAPIClient" + + # TODO context + async def get_physical_objects( + self, + scenario_id: int, + token: str, + **params: Any, + ) -> gpd.GeoDataFrame | None: + res = await self.json_handler.get( + f"/api/v1/scenarios/{scenario_id}/context/physical_objects_with_geometry", + headers={"Authorization": f"Bearer {token}"}, + params=params, + ) + features = (res or {}).get("features") or [] + if not features: + return None + else: + return gpd.GeoDataFrame.from_features(features, crs=4326).set_index( + "physical_object_id" + ) + + async def get_services( + self, scenario_id: int, token: str, **kwargs: Any + ) -> gpd.GeoDataFrame: + headers = {"Authorization": f"Bearer {token}"} + res = await self.json_handler.get( + f"/api/v1/scenarios/{scenario_id}/context/services_with_geometry", + headers=headers, + params=kwargs, + ) + features = res["features"] + return gpd.GeoDataFrame.from_features(features, crs=4326).set_index( + "service_id" + ) + + async def get_functional_zones_sources( + self, scenario_id: int, token: str + ) -> pd.DataFrame: + headers = {"Authorization": f"Bearer {token}"} + res = await self.json_handler.get( + f"/api/v1/scenarios/{scenario_id}/context/functional_zone_sources", + headers=headers, + ) + return pd.DataFrame(res) + + async def get_functional_zones( + self, scenario_id: int, year: int, source: str, token: str + ) -> gpd.GeoDataFrame: + headers = {"Authorization": f"Bearer {token}"} + res = await self.json_handler.get( + f"/api/v1/scenarios/{scenario_id}/context/functional_zones", + params={"year": year, "source": source}, + headers=headers, + ) + features = res["features"] + return gpd.GeoDataFrame.from_features(features, crs=4326).set_index( + "functional_zone_id" + ) + + async def get_project(self, project_id: int, token: str) -> Dict[str, Any]: + res = await self.json_handler.get( + f"/api/v1/projects/{project_id}", + headers={"Authorization": f"Bearer {token}"}, + ) + return res + + async def get_project_geometry(self, project_id: int, token: str): + res = await self.json_handler.get( + f"/api/v1/projects/{project_id}/territory", + headers={"Authorization": f"Bearer {token}"}, + ) + geometry_json = json.dumps(res["geometry"]) + return shapely.from_geojson(geometry_json) + + # TODO scenario + async def get_scenario_info(self, target_scenario_id: int, token: str) -> dict: + + url = f"/api/v1/scenarios/{target_scenario_id}" + headers = {"Authorization": f"Bearer {token}"} + try: + response = await self.json_handler.get(url, headers) + return response + except Exception as e: + logger.exception(e) + raise http_exception( + 404, + f"Scenario info for ID {target_scenario_id} is missing", + _input={"target_scenario_id": target_scenario_id}, + _detail={"error": repr(e)}, + ) from e + + async def get_scenario(self, scenario_id: int, token: str) -> Dict[str, Any]: + headers = {"Authorization": f"Bearer {token}"} + res = await self.json_handler.get( + f"/api/v1/scenarios/{scenario_id}", headers=headers + ) + return res + + async def get_functional_zones_sources_scenario( + self, + scenario_id: int, + token: str, + ) -> pd.DataFrame: + headers = {"Authorization": f"Bearer {token}"} + res = await self.json_handler.get( + f"/api/v1/scenarios/{scenario_id}/functional_zone_sources", + headers=headers, + ) + return pd.DataFrame(res) + + async def get_functional_zones_scenario( + self, scenario_id: int, token: str, year: int, source: str + ) -> gpd.GeoDataFrame: + res = await self.json_handler.get( + f"/api/v1/scenarios/{scenario_id}/functional_zones", + headers={"Authorization": f"Bearer {token}"}, + params={"year": year, "source": source}, + ) + features = res["features"] + return gpd.GeoDataFrame.from_features(features, crs=4326).set_index( + "functional_zone_id" + ) + + async def get_physical_objects_scenario( + self, scenario_id: int, token: str, **kwargs: Any + ) -> gpd.GeoDataFrame | None: + res = await self.json_handler.get( + f"/api/v1/scenarios/{scenario_id}/physical_objects_with_geometry", + headers={"Authorization": f"Bearer {token}"}, + params=kwargs, + ) + if res["features"]: + return gpd.GeoDataFrame.from_features(res, crs=4326).set_index( + "physical_object_id" + ) + return None + + async def get_services_scenario( + self, scenario_id: int, token: str, **kwargs: Any + ) -> dict: + return await self.json_handler.get( + f"/api/v1/scenarios/{scenario_id}/services_with_geometry", + headers={"Authorization": f"Bearer {token}"}, + params=kwargs, + ) + + async def get_optimal_func_zone_request_data( + self, + token: str, + data_id: int, + source: Literal["PZZ", "OSM", "User"] | None, + year: int | None, + project: bool = True, + ) -> tuple[str, int]: + """ + Function retrieves best matching zone sources based on given source and year. + Args: + token (str): user token to access data in Urban API. + data_id (int): id of scenario or project to retrieve data by. If scenario retrieves from project scenario, + otherwise from project + source (Literal["PZZ", "OSM", "User"] | None): Functional zone source from urban api. If None in order + User -> PZZ -> OSM priority is retrieved. + year (int | None): year to retrieve zones for. If None retrieves latest available year. + project (bool): If True retrieves with User source and from project scenario, + otherwise retrieves from context. + Returns: + tuple[str, int]: Tuple with source and year. + """ + + async def _get_optimal_source( + sources_data: pd.DataFrame, + target_year: int | None, + is_project: bool, + ) -> tuple[str, int]: + """ + Function estimates the best source and year + Args: + sources_data (pd.DataFrame): DataFrame containing functional zone sources + target_year (int): year to retrieve zones for. If None retrieves latest available year. + is_project (bool): If True retrieves with User source + Returns: + tuple[str, int]: Tuple with source and year. + Raises: + Any from Urban API + 404, if function couldn't match optimal source + """ + + if project: + sources_priority = ["User", "PZZ", "OSM"] + else: + sources_priority = ["PZZ", "OSM"] + for i in sources_priority: + if i in sources_data["source"].unique(): + sources_data = sources_data[sources_data["source"] == i] + source_name = sources_data["source"].iloc[0] + if year: + source_year = sources_data[ + sources_data["year"] == target_year + ].iloc[0] + else: + source_year = sources_data["year"].max() + logger.info(f"{source_name}, {int(source_year)}") + return source_name, int(source_year) + raise http_exception( + 404, + "No source found", + _input={ + "source": source, + "year": year, + "is_project": is_project, + }, + ) + + if not project and source == "User": + raise http_exception( + 500, + "Unreachable functional zones source for non-project data", + _input={ + "source": source, + "year": year, + "project": project, + }, + _detail={ + "available_sources": ["PZZ", "OSM"], + }, + ) + headers = {"Authorization": f"Bearer {token}"} + if project: + available_sources = await self.json_handler.get( + f"/api/v1/scenarios/{data_id}/functional_zone_sources", headers=headers + ) + else: + available_sources = await self.json_handler.get( + f"/api/v1/scenarios/{data_id}/context/functional_zone_sources", + headers=headers, + ) + sources_df = pd.DataFrame.from_records(available_sources) + if not source: + return await _get_optimal_source(sources_df, year, project) + else: + source_df = sources_df[sources_df["source"] == source] + return await _get_optimal_source(source_df, year, project) + + async def get_project_id( + self, + scenario_id: int, + token: str, + ) -> int: + endpoint = f"/api/v1/scenarios/{scenario_id}" + response = await self.json_handler.get( + endpoint, headers={"Authorization": f"Bearer {token}"} + ) + project_id = response.get("project", {}).get("project_id") + if project_id is None: + raise http_exception( + 404, + "Project ID is missing in scenario data.", + scenario_id, + ) + + return project_id + + async def get_all_project_info(self, project_id: int, token: str) -> dict: + url = f"/api/v1/projects/{project_id}" + try: + response = await self.json_handler.get( + url, headers={"Authorization": f"Bearer {token}"} + ) + return response + except Exception as e: + logger.exception(e) + raise http_exception( + 404, + f"Project info for ID {project_id} is missing", + _input={"project_id": project_id}, + _detail={"error": repr(e)}, + ) from e + + async def get_service_types(self, **kwargs): + + data = await self.json_handler.get("/api/v1/service_types", params=kwargs) + + items = ( + data + if isinstance(data, list) + else data.get("service_types") or data.get("data") or [] + ) + + rows = [ + { + "service_type_id": it.get("service_type_id"), + "name": it.get("name"), + "infrastructure_type": it.get("infrastructure_type"), + "weight_value": it.get("properties", {}).get("weight_value"), + } + for it in items + ] + + return pd.DataFrame(rows).set_index("service_type_id") + + async def get_social_values(self, **kwargs): + res = await self.json_handler.get("/api/v1/social_values", params=kwargs) + return pd.DataFrame(res).set_index("soc_value_id") + + async def get_social_value_service_types(self, soc_value_id: int, **kwargs): + data = await self.json_handler.get( + f"/api/v1/social_values/{soc_value_id}/service_types", params=kwargs + ) + if not data: + return None + + if isinstance(data, list): + items = data + elif isinstance(data, dict): + items = data.get("service_types") or data.get("data") or [] + else: + items = [] + + rows = [] + for it in items: + rows.append( + { + "soc_value_id": it.get("soc_value_id"), + } + ) + df = pd.DataFrame(rows).set_index("soc_value_id") + return df + + async def get_service_type_social_values(self, service_type_id: int, **kwargs): + data = await self.json_handler.get( + f"/api/v1/service_types/{service_type_id}/social_values", + params=kwargs, + ) + + if isinstance(data, dict): + data = data.get("service_types") or data.get("data") or [] + + idx = [it["soc_value_id"] for it in data if "soc_value_id" in it] + if not idx: + return None + + df = pd.DataFrame(index=idx) + df.index.name = "soc_value_id" + return df + + async def get_indicators(self, parent_id: int | None = None, **kwargs): + res = await self.json_handler.get( + "/api/v1/indicators_by_parent", params={"parent_id": parent_id, **kwargs} + ) + return pd.DataFrame(res).set_index("indicator_id") + + async def get_territory_geometry(self, territory_id: int): + res = await self.json_handler.get(f"/api/v1/territory/{territory_id}") + geom = res["geometry"] + if isinstance(geom, dict): + geom = json.dumps(geom) + return shapely.from_geojson(geom) + + async def get_base_scenario_id(self, project_id: int, token: str) -> int: + headers = {"Authorization": f"Bearer {token}"} + scenarios = await self.json_handler.get( + f"/api/v1/projects/{project_id}/scenarios", + headers=headers, + ) + + base = next((s for s in scenarios if s.get("is_based")), None) + if not base: + raise http_exception(404, "base scenario not found", project_id) + + return base["scenario_id"] + + async def get_project_scenarios( + self, project_id: int, token: str + ) -> List[Dict[str, Any]]: + headers = {"Authorization": f"Bearer {token}"} if token else None + res = await self.json_handler.get( + f"/api/v1/projects/{project_id}/scenarios", + headers=headers, + ) + return res + + async def get_social_values_info(self) -> dict[int, str]: + res = await self.json_handler.get("/api/v1/social_values") + return {item["soc_value_id"]: item["name"] for item in res} + + async def get_territory_normatives(self, territory_id: int) -> pd.DataFrame: + res = await self.json_handler.get( + f"/api/v1/territory/{territory_id}/normatives", params={"last_only": True} + ) + df = pd.DataFrame(res) + df["service_type_id"] = df["service_type"].apply(lambda st: st["id"]) + return df.set_index("service_type_id", drop=True) + + async def get_indicator_info(self, indicator_id: int) -> dict: + res = await self.json_handler.get(f"/api/v1/indicators/{indicator_id}") + return res + + async def get_indicator_scenario_value(self, scenario_id: int, token: str) -> dict: + headers = {"Authorization": f"Bearer {token}"} if token else None + res = await self.json_handler.get(f"/api/v1/scenarios/{scenario_id}/indicators_values", headers=headers) + return res diff --git a/app/api/routers/effects/__init__.py b/app/common/__init__.py similarity index 100% rename from app/api/routers/effects/__init__.py rename to app/common/__init__.py diff --git a/app/api/routers/effects/services/__init__.py b/app/common/api_handlers/__init__.py similarity index 100% rename from app/api/routers/effects/services/__init__.py rename to app/common/api_handlers/__init__.py diff --git a/app/common/api_handlers/json_api_handler.py b/app/common/api_handlers/json_api_handler.py new file mode 100644 index 0000000..ff6e7f9 --- /dev/null +++ b/app/common/api_handlers/json_api_handler.py @@ -0,0 +1,262 @@ +import aiohttp + +from app.common.exceptions.http_exception_wrapper import http_exception + + +class JSONAPIHandler: + + def __init__( + self, + base_url: str, + ) -> None: + """Initialisation function + + Args: + base_url (str): Base api url + Returns: + None + """ + + self.__name__ = "UrbanAPIClient" + self.base_url = base_url + + @staticmethod + async def _check_response_status( + response: aiohttp.ClientResponse, + ) -> list | dict | None: + """Function handles response""" + if response.status in (200, 201): + return await response.json(content_type="application/json") + + elif response.status == 500: + content_type = (response.headers.get("Content-Type") or "").lower() + + if "application/json" in content_type: + response_info = await response.json() + err = response_info.get("error", "") + if isinstance(err, (dict, list)): + err = str(err) + if "reset by peer" in err: + return None + else: + response_info = await response.text() + + raise http_exception( + response.status, + "Couldn't get data from API", + _input=str(response.url), + _detail=response_info, + ) + + else: + raise http_exception( + response.status, + "Couldn't get data from API", + _input=str(response.url), + _detail=await response.json(), + ) + + @staticmethod + async def _check_request_params( + params: dict[str, str | int | float | bool] | None, + ) -> dict | None: + """ + Function checks request parameters + Args: + params (dict[str, str | int | float | bool] | None): Request parameters + Returns: + dict | None: Returns modified parameters if they are not empty, otherwise returns None + """ + + if params: + for key, param in params.items(): + if isinstance(param, bool): + params[key] = str(param).lower() + return params + + async def get( + self, + endpoint_url: str, + headers: dict | None = None, + params: dict | None = None, + session: aiohttp.ClientSession | None = None, + ) -> dict | list: + """Function to get data from api + Args: + endpoint_url (str): Endpoint url + headers (dict | None): Headers + params (dict | None): Query parameters + session (aiohttp.ClientSession | None): Session to use + Returns: + dict | list: Response data as python object + """ + + if not session: + async with aiohttp.ClientSession() as session: + return await self.get( + endpoint_url=endpoint_url, + headers=headers, + params=params, + session=session, + ) + url = self.base_url + endpoint_url + params = await self._check_request_params(params) + async with session.get(url=url, headers=headers, params=params) as response: + result = await self._check_response_status(response) + if isinstance(result, list): + return result + elif isinstance(result, dict): + return result + if not result: + if isinstance(result, list): + return result + elif isinstance(result, dict): + return result + return await self.get( + endpoint_url=endpoint_url, + headers=headers, + params=params, + session=session, + ) + return result + + async def post( + self, + endpoint_url: str, + headers: dict | None = None, + params: dict | None = None, + data: dict | None = None, + session: aiohttp.ClientSession | None = None, + ) -> dict | list: + """Function to post data from api + Args: + endpoint_url (str): Endpoint url + headers (dict | None): Headers + params (dict | None): Query parameters + data (dict | None): Request data + session (aiohttp.ClientSession | None): Session to use + Returns: + dict | list: Response data as python object + """ + + if not session: + async with aiohttp.ClientSession() as session: + return await self.post( + endpoint_url=endpoint_url, + headers=headers, + params=params, + data=data, + session=session, + ) + url = self.base_url + endpoint_url + params = await self._check_request_params(params) + async with session.post( + url=url, + headers=headers, + params=params, + data=data, + ) as response: + result = await self._check_response_status(response) + if not result: + return await self.post( + endpoint_url=endpoint_url, + headers=headers, + params=params, + data=data, + session=session, + ) + return result + + async def put( + self, + endpoint_url: str, + headers: dict | None = None, + params: dict | None = None, + data: dict | None = None, + session: aiohttp.ClientSession | None = None, + ) -> dict | list: + """Function to post data from api + Args: + endpoint_url (str): Endpoint url + headers (dict | None): Headers + params (dict | None): Query parameters + data (dict | None): Request data + session (aiohttp.ClientSession | None): Session to use + Returns: + dict | list: Response data as python object + """ + + if not session: + async with aiohttp.ClientSession() as session: + return await self.put( + endpoint_url=endpoint_url, + headers=headers, + params=params, + data=data, + session=session, + ) + url = self.base_url + endpoint_url + params = await self._check_request_params(params) + async with session.put( + url=url, + headers=headers, + params=params, + data=data, + ) as response: + result = await self._check_response_status(response) + if not result: + return await self.put( + endpoint_url=endpoint_url, + headers=headers, + params=params, + data=data, + session=session, + ) + return result + + async def delete( + self, + endpoint_url: str, + headers: dict | None = None, + params: dict | None = None, + data: dict | None = None, + session: aiohttp.ClientSession | None = None, + ) -> dict | list: + """Function to post data from api + Args: + endpoint_url (str): Endpoint url + headers (dict | None): Headers + params (dict | None): Query parameters + data (dict | None): Request data + session (aiohttp.ClientSession | None): Session to use + Returns: + dict | list: Response data as python object + """ + + if not session: + async with aiohttp.ClientSession() as session: + return await self.delete( + endpoint_url=endpoint_url, + headers=headers, + params=params, + data=data, + session=session, + ) + url = self.base_url + endpoint_url + params = await self._check_request_params(params) + async with session.delete( + url=url, + headers=headers, + params=params, + data=data, + ) as response: + result = await self._check_response_status(response) + if not result: + return await self.delete( + endpoint_url=endpoint_url, + headers=headers, + params=params, + data=data, + session=session, + ) + return result diff --git a/app/api/utils/__init__.py b/app/common/auth/__init__.py similarity index 100% rename from app/api/utils/__init__.py rename to app/common/auth/__init__.py diff --git a/app/api/utils/auth.py b/app/common/auth/auth.py similarity index 66% rename from app/api/utils/auth.py rename to app/common/auth/auth.py index 79e8c6b..f42f41b 100644 --- a/app/api/utils/auth.py +++ b/app/common/auth/auth.py @@ -3,6 +3,7 @@ http_bearer = HTTPBearer() + def _get_token_from_header(credentials: HTTPAuthorizationCredentials) -> str: if not credentials: raise HTTPException( @@ -14,11 +15,13 @@ def _get_token_from_header(credentials: HTTPAuthorizationCredentials) -> str: if not token: raise HTTPException( - status_code=400, - detail="Token is missing in the authorization header" + status_code=400, detail="Token is missing in the authorization header" ) - + return token -async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(http_bearer)): - return _get_token_from_header(credentials) \ No newline at end of file + +async def verify_token( + credentials: HTTPAuthorizationCredentials = Depends(http_bearer), +): + return _get_token_from_header(credentials) diff --git a/app/common/caching/__init__.py b/app/common/caching/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/common/caching/caching_service.py b/app/common/caching/caching_service.py new file mode 100644 index 0000000..e8ce8e1 --- /dev/null +++ b/app/common/caching/caching_service.py @@ -0,0 +1,243 @@ +import hashlib +import json +import re +from datetime import datetime, timedelta +from pathlib import Path +from typing import Any, Literal + +import geopandas as gpd +import pandas as pd +from loguru import logger + +_CACHE_DIR = Path().absolute() / "__effects_cache__" +_CACHE_DIR.mkdir(parents=True, exist_ok=True) + +_FILENAME_RE = re.compile(r"[^A-Za-z0-9_-]+") + + +def _safe(s: str) -> str: + return _FILENAME_RE.sub("", s) + + +PROJECT_BASED_METHODS: set[str] = { + "social_economical_metrics", + "urbanomy_metrics", +} + + +def _owner_prefix(method: str) -> str: + """Return cache key prefix based on method semantics.""" + return "project" if method in PROJECT_BASED_METHODS else "scenario" + + +def _file_name(method: str, owner_id: int, phash: str, day: str) -> Path: + prefix = _owner_prefix(method) + name = f"{day}__{prefix}_{owner_id}__{_safe(method)}__{phash}.json" + return _CACHE_DIR / name + + +def _to_dt(dt_str: str) -> datetime: + if dt_str.endswith("Z"): + dt_str = dt_str[:-1] + "+00:00" + return datetime.fromisoformat(dt_str) + + +class FileCache: + """Service for caching files.""" + + def params_hash(self, params: dict[str, Any]) -> str: + """ + 8-symbol md5-hash from params dict. + """ + raw = json.dumps(params, sort_keys=True, separators=(",", ":")) + return hashlib.md5(raw.encode()).hexdigest()[:8] + + def save( + self, + method: str, + owner_id: int, + params: dict[str, Any], + data: dict[str, Any], + scenario_updated_at: str | None = None, + ) -> Path: + """ + Always write (or overwrite) the cache file so that both + 'before' and 'after' can be stored in the same JSON. + """ + phash = self.params_hash(params) + day = datetime.now().strftime("%Y%m%d") + + path = _file_name(method, owner_id, phash, day) + to_save = { + "meta": { + "timestamp": datetime.now().isoformat(), + "scenario_updated_at": scenario_updated_at, + "params_hash": phash, + }, + "data": data, + } + path.write_text(json.dumps(to_save, ensure_ascii=False), encoding="utf-8") + return path + + def _latest_path(self, method: str, owner_id: int) -> Path | None: + prefix = _owner_prefix(method) + pattern = f"*__{prefix}_{owner_id}__{_safe(method)}__*.json" + files = sorted(_CACHE_DIR.glob(pattern), reverse=True) + return files[0] if files else None + + def load( + self, + method: str, + owner_id: int, + params_hash: str, + max_age: timedelta | None = None, + ) -> dict[str, Any] | None: + prefix = _owner_prefix(method) + pattern = f"*__{prefix}_{owner_id}__{_safe(method)}__{params_hash}.json" + files = sorted(_CACHE_DIR.glob(pattern), reverse=True) + if not files: + return None + + path = files[0] + if max_age: + mtime = datetime.fromtimestamp(path.stat().st_mtime) + if datetime.now() - mtime > max_age: + return None + + return json.loads(path.read_text(encoding="utf-8")) + + def load_latest(self, method: str, owner_id: int) -> dict[str, Any] | None: + path = self._latest_path(method, owner_id) + if not path: + return None + return json.loads(path.read_text(encoding="utf-8")) + + def has( + self, + method: str, + owner_id: int, + params_hash: str, + max_age: timedelta | None = None, + ) -> bool: + return self.load(method, owner_id, params_hash, max_age=max_age) is not None + + def parse_task_id(self, task_id: str): + parts = task_id.split("_") + if len(parts) < 3: + return None, None, None + + tail = parts[-1] + scenario_id_raw = parts[-2] + method = "_".join(parts[:-2]) + + if len(tail) == 8 and tail.lower().strip("0123456789abcdef") == "": + phash = tail + else: + phash = self.params_hash(tail) + + scenario_id = ( + int(scenario_id_raw) if scenario_id_raw.isdigit() else scenario_id_raw + ) + return method, scenario_id, phash + + def _artifact_path( + self, + method: str, + owner_id: int, + phash: str, + name: str, + ext: Literal["parquet", "pkl"], + ) -> Path: + """Build path for a heavy artifact near JSON cache directory.""" + fname = f"artifact__{_safe(method)}__{owner_id}__{phash}__{_safe(name)}.{ext}" + return _CACHE_DIR / fname + + def save_df_artifact( + self, + df: pd.DataFrame, + method: str, + owner_id: int, + params: dict[str, Any], + name: str, + fmt: Literal["parquet", "pkl"] = "parquet", + ) -> Path: + """ + Save a pandas DataFrame as a heavy artifact. + fmt='parquet' (default) is compact and fast; fmt='pkl' as a fallback. + """ + phash = self.params_hash(params) + path = self._artifact_path( + method, owner_id, phash, name, "parquet" if fmt == "parquet" else "pkl" + ) + + if fmt == "parquet": + df.to_parquet(path, index=True) + else: + df.to_pickle(path) + + return path + + def load_df_artifact(self, path: Path) -> pd.DataFrame: + """Load a pandas DataFrame artifact by file extension.""" + ext = path.suffix.lower() + if ext == ".parquet": + return pd.read_parquet(path) + elif ext == ".pkl": + return pd.read_pickle(path) + raise ValueError(f"Unsupported artifact extension: {ext}") + + def save_gdf_artifact( + self, + gdf: gpd.GeoDataFrame, + method: str, + owner_id: int, + params: dict[str, Any], + name: str, + fmt: Literal["parquet", "pkl"] = "parquet", + ) -> Path: + phash = self.params_hash(params) + ext = "parquet" if fmt == "parquet" else "pkl" + path = self._artifact_path(method, owner_id, phash, name, ext) + + if fmt == "parquet": + gdf.to_parquet(path, index=True) + else: + gdf.to_pickle(path) + + return path + + def load_gdf_artifact(self, path: Path) -> "gpd.GeoDataFrame": + """Load a GeoDataFrame artifact by file extension.""" + ext = path.suffix.lower() + if ext == ".parquet": + return gpd.read_parquet(path) + elif ext == ".pkl": + return pd.read_pickle(path) + raise ValueError(f"Unsupported artifact extension: {ext}") + + def delete_all(self, method: str, owner_id: int) -> int: + """ + Delete all cached JSON files and heavy artifacts for given method and owner_id. + + Returns: + Number of deleted files. + """ + prefix = _owner_prefix(method) + + json_pattern = f"*__{prefix}_{owner_id}__{_safe(method)}__*.json" + json_files = list(_CACHE_DIR.glob(json_pattern)) + + deleted = 0 + for path in json_files: + try: + path.unlink(missing_ok=True) + deleted += 1 + except Exception as e: + logger.warning( + f"Failed to delete cache file: path={path.as_posix()} err={e}" + ) + + logger.info( + f"Cache invalidated: method={method} owner_id={owner_id} deleted_files={deleted}" + ) + return deleted diff --git a/app/common/consumer_wrapper.py b/app/common/consumer_wrapper.py new file mode 100644 index 0000000..c52afe8 --- /dev/null +++ b/app/common/consumer_wrapper.py @@ -0,0 +1,18 @@ +from otteroad import KafkaConsumerSettings, KafkaConsumerService, BaseMessageHandler + + +class ConsumerWrapper: + def __init__(self): + self.consumer_settings = KafkaConsumerSettings.from_env() + self.consumer_service = KafkaConsumerService(self.consumer_settings) + + def register_handler(self, handler: BaseMessageHandler) -> None: + self.consumer_service.register_handler(handler) + + async def start(self, topics: list[str]): + self.consumer_service.add_worker(topics=topics) + await self.consumer_service.start() + + async def stop(self) -> None: + """Gracefully stop all consumer workers.""" + await self.consumer_service.stop() diff --git a/app/common/dto/__init__.py b/app/common/dto/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/common/dto/models.py b/app/common/dto/models.py new file mode 100644 index 0000000..be8b36a --- /dev/null +++ b/app/common/dto/models.py @@ -0,0 +1,20 @@ +from typing import List, Literal, Union + +from pydantic import BaseModel +from pydantic_geojson import FeatureModel, MultiPolygonModel, PolygonModel +from pydantic_geojson._base import FeatureCollectionFieldType + + +class SourceYear(BaseModel): + source: Literal["PZZ", "OSM", "User"] + year: int + + +class ServiceType(BaseModel): + id: int + name: str + + +class FeatureCollectionModel(BaseModel): + type: str = FeatureCollectionFieldType + features: List[Union[PolygonModel, MultiPolygonModel, FeatureModel],] diff --git a/app/common/dto/types.py b/app/common/dto/types.py new file mode 100644 index 0000000..e69de29 diff --git a/app/common/exceptions/__init__.py b/app/common/exceptions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/common/exceptions/errors.py b/app/common/exceptions/errors.py new file mode 100644 index 0000000..a3be38a --- /dev/null +++ b/app/common/exceptions/errors.py @@ -0,0 +1,13 @@ +class DomainError(Exception): + pass + + +class UpstreamApiError(DomainError): + def __init__(self, msg, *, status=None, payload=None): + super().__init__(msg) + self.status = status + self.payload = payload or {} + + +class NoFeaturesError(DomainError): + pass diff --git a/app/common/exceptions/exception_handler.py b/app/common/exceptions/exception_handler.py new file mode 100644 index 0000000..b42acfc --- /dev/null +++ b/app/common/exceptions/exception_handler.py @@ -0,0 +1,85 @@ +"""Exception handling middleware is defined here.""" + +import itertools +import json +import traceback + +from fastapi import FastAPI, HTTPException, Request +from loguru import logger +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.responses import JSONResponse + +from .http_exception_wrapper import http_exception + + +class ExceptionHandlerMiddleware( + BaseHTTPMiddleware +): # pylint: disable=too-few-public-methods + """Handle exceptions, so they become http response code 500 - Internal Server Error if not handled as HTTPException + previously. + Attributes: + app (FastAPI): The FastAPI application instance. + """ + + def __init__(self, app: FastAPI): + """ + Universal exception handler middleware init function. + Args: + app (FastAPI): The FastAPI application instance. + """ + + super().__init__(app) + + async def dispatch(self, request: Request, call_next): + """ + Dispatch function for sending errors to user from API + Args: + request (Request): The incoming request object. + call_next: function to extract. + """ + + try: + return await call_next(request) + except Exception as e: + request_info = { + "method": request.method, + "url": str(request.url), + "path_params": dict(request.path_params), + "query_params": dict(request.query_params), + "headers": dict(request.headers), + } + try: + request_info["body"] = await request.json() + except: + try: + request_info["body"] = str(await request.body()) + except: + request_info["body"] = "Could not read request body" + if isinstance(e, HTTPException): + return JSONResponse( + status_code=e.status_code, + content={ + "message": ( + e.detail.get("msg") + if isinstance(e.detail, dict) + else str(e.detail) + ), + "error_type": e.__class__.__name__, + "request": request_info, + "detail": ( + e.detail.get("detail") + if isinstance(e.detail, dict) + else None + ), + }, + ) + return JSONResponse( + status_code=500, + content={ + "message": "Internal server error", + "error_type": e.__class__.__name__, + "request": request_info, + "detail": str(e), + "traceback": traceback.format_exc().splitlines(), + }, + ) diff --git a/app/common/exceptions/http_exception_wrapper.py b/app/common/exceptions/http_exception_wrapper.py new file mode 100644 index 0000000..07ccf5d --- /dev/null +++ b/app/common/exceptions/http_exception_wrapper.py @@ -0,0 +1,9 @@ +from fastapi import HTTPException + + +def http_exception( + status_code: int, msg: str, _input=None, _detail=None +) -> HTTPException: + return HTTPException( + status_code=status_code, detail={"msg": msg, "input": _input, "detail": _detail} + ) diff --git a/app/common/producer_wrapper.py b/app/common/producer_wrapper.py new file mode 100644 index 0000000..56f4c25 --- /dev/null +++ b/app/common/producer_wrapper.py @@ -0,0 +1,15 @@ +from otteroad import KafkaProducerSettings, KafkaProducerClient + + +class ProducerWrapper: + def __init__(self): + self.producer_settings = KafkaProducerSettings.from_env() + self.producer_service = KafkaProducerClient(self.producer_settings, init_loop=False) + + async def start(self): + self.producer_service.init_loop() + await self.producer_service.start() + + async def stop(self) -> None: + """Gracefully stop producer service (flush + stop polling thread).""" + await self.producer_service.close() diff --git a/app/common/utils/__init__.py b/app/common/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/common/utils/effects_utils.py b/app/common/utils/effects_utils.py new file mode 100644 index 0000000..c8d2107 --- /dev/null +++ b/app/common/utils/effects_utils.py @@ -0,0 +1,58 @@ +from typing import Any, Dict, Optional + +from app.clients.urban_api_client import UrbanAPIClient + + +class EffectsUtils: + def __init__( + self, + urban_api_client: UrbanAPIClient, + ): + self.__name__ = "EffectsUtils" + self.urban_api_client = urban_api_client + + def truthy_is_based(self, v: Any) -> bool: + return v is True or v == 1 or (isinstance(v, str) and v.lower() == "true") + + def parent_id(self, s: Dict[str, Any]) -> Optional[int]: + p = s.get("parent_scenario") + return p.get("id") if isinstance(p, dict) else p + + def get_service_id(self, s: Dict[str, Any]) -> Optional[int]: + try: + return int(s.get("scenario_id")) + except Exception: + return None + + async def resolve_base_id(self, token: str, scenario_id: int) -> int: + info = await self.urban_api_client.get_scenario_info(scenario_id, token) + project_id = (info.get("project") or {}).get("project_id") + regional_id = (info.get("parent_scenario") or {}).get("id") + + if not project_id or not regional_id: + return scenario_id + + scenarios = await self.urban_api_client.get_project_scenarios(project_id, token) + matches = [ + s + for s in scenarios + if self.truthy_is_based(s.get("is_based")) + and self.parent_id(s) == regional_id + and self.get_service_id(s) is not None + ] + if not matches: + only_based = [ + s + for s in scenarios + if self.truthy_is_based(s.get("is_based")) + and self.get_service_id(s) is not None + ] + if not only_based: + return scenario_id + matches = only_based + + matches.sort( + key=lambda x: (x.get("updated_at") is not None, x.get("updated_at")), + reverse=True, + ) + return self.get_service_id(matches[0]) or scenario_id diff --git a/app/common/utils/geodata.py b/app/common/utils/geodata.py new file mode 100644 index 0000000..60fdfe1 --- /dev/null +++ b/app/common/utils/geodata.py @@ -0,0 +1,146 @@ +import asyncio +import json + +import geopandas as gpd +import pandas as pd +from blocksnet.relations import calculate_distance_matrix +from loguru import logger +from shapely.geometry.base import BaseGeometry +from shapely.wkt import dumps, loads + +from app.common.exceptions.http_exception_wrapper import http_exception +from app.effects_api.constants.const import COL_RU, ROADS_ID, SPEED + + +async def gdf_to_ru_fc_rounded(gdf: gpd.GeoDataFrame, ndigits: int = 6) -> dict: + if "provision_weak" in gdf.columns: + gdf = gdf.drop(columns="provision_weak") + gdf = gdf.rename( + columns={k: v for k, v in COL_RU.items() if k in gdf.columns}, + errors="raise", + ) + gdf = gdf.to_crs(4326) + + gdf_copy = gdf.copy() + gdf_copy.geometry = await asyncio.to_thread( + round_coords, gdf_copy.geometry, ndigits + ) + + return json.loads(gdf_copy.to_json(drop_id=True)) + + +def safe_gdf_to_geojson( + gdf: gpd.GeoDataFrame, + to_epsg: int = 4326, + round_ndigits: int = 6, + drop_cols: tuple[str, ...] = (), +) -> dict: + """Project, round, sanitize and serialize GeoDataFrame to GeoJSON. + + Steps: + - Drop unwanted columns (e.g., non-serializable). + - Project to EPSG (default 4326). + - Round geometry coordinates to given precision. + - Ensure all properties are JSON-serializable. + - Return parsed dict (FeatureCollection). + """ + logger.info( + f"Serializing GeoDataFrame to GeoJSON (EPSG:{to_epsg}, round={round_ndigits})" + ) + gdf2 = gdf.drop(columns=[c for c in drop_cols if c in gdf.columns]).copy() + gdf2 = gdf2.to_crs(to_epsg) + gdf2.geometry = round_coords(gdf2.geometry, round_ndigits) + return json.loads(gdf2.to_json(drop_id=True)) + + +def fc_to_gdf(fc: dict) -> gpd.GeoDataFrame: + return gpd.GeoDataFrame.from_features(fc["features"], crs="EPSG:4326") + + +def is_fc(obj: dict) -> bool: + return ( + isinstance(obj, dict) + and obj.get("type") == "FeatureCollection" + and "features" in obj + ) + + +def round_coords( + geometry: gpd.GeoSeries | BaseGeometry, ndigits: int = 6 +) -> gpd.GeoSeries | BaseGeometry: + if isinstance(geometry, gpd.GeoSeries): + return geometry.map(lambda geom: loads(dumps(geom, rounding_precision=ndigits))) + elif isinstance(geometry, BaseGeometry): + return loads(dumps(geometry, rounding_precision=ndigits)) + else: + raise TypeError("geometry must be GeoSeries or Shapely geometry") + + +async def get_best_functional_zones_source( + sources_df: pd.DataFrame, + source: str | None = None, + year: int | None = None, +) -> tuple[int | None, str | None]: + sources_priority = ["OSM", "PZZ", "User"] + if source and year: + row = sources_df.query("source == @source and year == @year") + if not row.empty: + return year, source + return await get_best_functional_zones_source(sources_df, None, year) + elif source and not year: + rows = sources_df.query("source == @source") + if not rows.empty: + return int(rows["year"].max()), source + return await get_best_functional_zones_source(sources_df, None, year) + elif year and not source: + for s in sources_priority: + row = sources_df.query("source == @s and year == @year") + if not row.empty: + return year, s + for s in sources_priority: + rows = sources_df.query("source == @s") + if not rows.empty: + return int(rows["year"].max()), s + + raise http_exception(404, "No available functional zone sources to choose from") + + +def gdf_join_on_block_id( + left: gpd.GeoDataFrame, right: pd.DataFrame, how: str = "left" +) -> gpd.GeoDataFrame: + """Join two frames by block_id index safely. + + - Ensures both indices are int. + - Keeps geometry from the left GeoDataFrame. + """ + gdf = left.copy() + gdf.index = gdf.index.astype(int) + r = right.copy() + r.index = r.index.astype(int) + return gdf.join(r, how=how) + + +def _ensure_block_index(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """Ensure index is integer 'block_id'.""" + if "block_id" in gdf.columns: + gdf = gdf.copy() + gdf["block_id"] = gdf["block_id"].astype(int) + if gdf.index.name == "block_id": + gdf = gdf.reset_index(drop=True) + gdf = ( + gdf.drop_duplicates(subset="block_id", keep="last") + .set_index("block_id") + .sort_index() + ) + else: + gdf = gdf.copy() + gdf.index = gdf.index.astype(int) + gdf = gdf[~gdf.index.duplicated(keep="last")].sort_index() + gdf.index.name = "block_id" + return gdf + + +def get_accessibility_matrix(blocks: gpd.GeoDataFrame) -> pd.DataFrame: + crs = blocks.estimate_utm_crs() + dist_mx = calculate_distance_matrix(blocks.to_crs(crs)) + return dist_mx // SPEED diff --git a/app/dependencies.py b/app/dependencies.py new file mode 100644 index 0000000..4f7da0b --- /dev/null +++ b/app/dependencies.py @@ -0,0 +1,55 @@ +import sys +from pathlib import Path + +from iduconfig import Config +from loguru import logger + +from app.broker_handlers.cache_invalidation import CacheInvalidationService +from app.broker_handlers.scenario_updated_handler import ( + ScenarioObjectsUpdatedHandler, + ScenarioZonesUpdatedHandler, +) +from app.clients.urban_api_client import UrbanAPIClient +from app.common.api_handlers.json_api_handler import JSONAPIHandler +from app.common.caching.caching_service import FileCache +from app.common.consumer_wrapper import ConsumerWrapper +from app.common.producer_wrapper import ProducerWrapper +from app.common.utils.effects_utils import EffectsUtils +from app.effects_api.effects_service import EffectsService +from app.effects_api.modules.context_service import ContextService +from app.effects_api.modules.scenario_service import ScenarioService + +absolute_app_path = Path().absolute() +config = Config() + +logger.remove() +log_level = "INFO" +log_format = "{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {message}" +logger.add(sys.stderr, format=log_format, level=log_level, colorize=True) +logger.add( + absolute_app_path / f"{config.get('LOG_NAME')}", + format=log_format, + level="INFO", +) + +json_api_handler = JSONAPIHandler(config.get("URBAN_API")) +urban_api_client = UrbanAPIClient(json_api_handler) +file_cache = FileCache() +scenario_service = ScenarioService(urban_api_client) +effects_utils = EffectsUtils(urban_api_client) +context_service = ContextService(urban_api_client, file_cache) +effects_service = EffectsService( + urban_api_client, file_cache, scenario_service, context_service, effects_utils +) + +consumer = ConsumerWrapper() +producer = ProducerWrapper() + +cache_invalidator = CacheInvalidationService(file_cache) + +consumer.register_handler( + ScenarioObjectsUpdatedHandler(cache_invalidator, producer.producer_service) +) +consumer.register_handler( + ScenarioZonesUpdatedHandler(cache_invalidator, producer.producer_service) +) diff --git a/app/effects_api/__init__.py b/app/effects_api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/effects_api/constants/__init__.py b/app/effects_api/constants/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/effects_api/constants/const.py b/app/effects_api/constants/const.py new file mode 100644 index 0000000..05732e2 --- /dev/null +++ b/app/effects_api/constants/const.py @@ -0,0 +1,415 @@ +from typing import Final + +from blocksnet.analysis.indicators.socio_economic import ( + DemographicIndicator, + EcologicalIndicator, + EconomicIndicator, + EngineeringIndicator, + GeneralIndicator, + SettlementIndicator, + SocialCountIndicator, + SocialIndicator, + SocialProvisionIndicator, + TransportIndicator, +) +from blocksnet.enums import LandUse + +# UrbanDB to Blocksnet land use types mapping +LAND_USE_RULES = { + "residential": LandUse.RESIDENTIAL, + "recreation": LandUse.RECREATION, + "special": LandUse.SPECIAL, + "industrial": LandUse.INDUSTRIAL, + "agriculture": LandUse.AGRICULTURE, + "transport": LandUse.TRANSPORT, + "business": LandUse.BUSINESS, + "residential_individual": LandUse.RESIDENTIAL, + "residential_lowrise": LandUse.RESIDENTIAL, + "residential_midrise": LandUse.RESIDENTIAL, + "residential_multistorey": LandUse.RESIDENTIAL, +} + + +# TODO add map autogeneration +SERVICE_TYPES_MAPPING = { + 1: "park", + 5: "beach", + 21: "kindergarten", + 22: "school", + 23: None, # доп образование + 24: None, # доп образование + 26: "college", + 27: "university", + 28: "polyclinic", + 29: None, # детская поликлиника + 30: None, # стоматология + 31: None, # фельдшерско-акушерский пункт + 32: None, # женская консультация + 34: "pharmacy", + 35: "hospital", + 36: None, # роддом + 37: None, # детская больница + 38: None, # хоспис + 39: None, # скорая помощь + 40: None, # травматология + 41: None, # морг + 42: None, # диспансер + 43: None, # центры соц обслуживания + 44: "social_facility", # дом престарелых + 45: "recruitment", + 46: None, # детский дом + 47: "multifunctional_center", + 48: "library", + 49: None, # дворцы культуры + 50: "museum", + 51: "theatre", + 53: None, # концертный зал + 55: "zoo", + 56: "cinema", + 57: "mall", + 59: "stadium", + 60: None, # ледовая арена + 61: "cafe", + 62: "restaurant", + 63: "bar", + 64: "cafe", + 65: "bakery", + 66: "pitch", + 67: "swimming_pool", + 68: None, # спортивный зал + 69: None, # каток + 70: None, # футбольное поле + 72: None, # эко тропа + 74: "playground", + 75: None, # парк аттракционов + 77: None, # скейт парк + 78: "police", + 79: None, # пожарная станция + 80: "train_station", + 81: "train_building", + 82: "aeroway_terminal", + 84: "fuel", + 86: "bus_station", + 88: "subway_entrance", + 89: "supermarket", + 91: "market", + 93: None, # одежда и обувь + 94: None, # бытовая техника + 95: None, # книжный магазин + 96: None, # детские товары + 97: None, # спортивный магазин + 98: "post", + 99: None, # пункт выдачи + 100: "bank", + 102: "lawyer", + 103: "notary", + 107: "veterinary", + 108: None, # зоомагазин + 109: "dog_park", + 110: "hotel", + 111: "hostel", + 112: None, # база отдыха + 113: None, # памятник + 114: "religion", # религиозный объект + # электростанции -- start + 118: "substation", # Атомная электростанция + 119: "substation", # Гидро-электростанция + 120: "substation", # Тепловая электростанция + # электростанции -- end + 124: "water_works", + # водоочистные сооружения -- start + 126: "wastewater_plant", # Сооружения для очистки воды + 128: "wastewater_plant", # Водоочистные сооружения + # водоочистные сооружения -- end + 143: "sanatorium", +} + +# Rules for agregating building properties from UrbanDB API +BUILDINGS_RULES = { + "number_of_floors": [ + ["floors"], + ["properties", "storeys_count"], + ["properties", "osm_data", "building:levels"], + ], + "footprint_area": [ + ["building_area_official"], + ["building_area_modeled"], + ["properties", "area_total"], + ], + "build_floor_area": [ + ["properties", "area_total"], + ], + "living_area": [ + ["properties", "living_area_official"], + ["properties", "living_area"], + ["properties", "living_area_modeled"], + ], + "non_living_area": [ + ["properties", "area_non_residential"], + ], + "population": [["properties", "population_balanced"]], +} + +# For each Infrastructure_type we will also add a weighting factor to give preference to the basic service, and another switch for capabilities. +INFRASTRUCTURES_WEIGHTS = {"basic": 0.5714, "additional": 0.2857, "comfort": 0.1429} + +# Mapping for translation of english provision properties +COL_RU = { + "demand": "Спрос", + "capacity": "Емкость сервисов", + "demand_left": "Неудовлетворенный спрос", + "demand_within": "Спрос в пределах нормативной доступности", + "demand_without": "Спрос за пределами нормативной доступности", + "capacity_left": "Оставшаяся емкость сервисов", + "capacity_within": "Емкость сервисов в пределах нормативной доступности", + "capacity_without": "Емкость сервисов за пределами нормативной доступности", + "provision_strong": "Обеспеченность сервисами", +} +# ID of living building physical_object_type_id +LIVING_BUILDINGS_ID = 4 + +# ID of road physical_object_function_id +ROADS_ID = 26 + +# ID of water objects physical_object_function_id +WATER_ID = 4 + +# Maximum number of function evaluations +MAX_EVALS = 1000 + +# Maximum number of runs for optimization +MAX_RUNS = 1000 + +PRED_VALUE_RU = { + "urban": "Жилой или смешанный (бизнес)", + "industrial": "Промышленный", + "non_urban": "Рекреация", +} + +PROB_COLS_EN_TO_RU = { + "prob_urban": "Вероятность жилого или бизнес видов использования", + "prob_non_urban": "Вероятность рекреационного вида использования", + "prob_industrial": "Вероятность промышленного вида использования", +} + +SOCIAL_INDICATORS_MAPPING = { + SocialIndicator.EXTRACURRICULAR: [23, 24], + SocialIndicator.AMBULANCE: [39, 40], + SocialIndicator.SPECIAL_MEDICAL: [41], + SocialIndicator.PREVENTIVE_MEDICAL: [42], + SocialIndicator.GYM: [68], + SocialIndicator.ORPHANAGE: [46], + SocialIndicator.SOCIAL_SERVICE_CENTER: [43], + SocialIndicator.CULTURAL_CENTER: [49], + SocialIndicator.CONCERT_HALL: [53], + SocialIndicator.ICE_ARENA: [60], + SocialIndicator.ECO_TRAIL: [72], + SocialIndicator.FIRE_STATION: [79], + SocialIndicator.TOURIST_BASE: [112], +} + +INDICATORS_MAPPING = { + # общие + GeneralIndicator.AREA: 4, + GeneralIndicator.URBANIZATION: 16, + # демография + DemographicIndicator.POPULATION: 1, + DemographicIndicator.DENSITY: 37, + # транспорт + TransportIndicator.ROAD_NETWORK_DENSITY: 60, + TransportIndicator.SETTLEMENTS_CONNECTIVITY: 59, + TransportIndicator.ROAD_NETWORK_LENGTH: 65, + TransportIndicator.FUEL_STATIONS_COUNT: 71, + TransportIndicator.AVERAGE_FUEL_STATION_ACCESSIBILITY: 72, + TransportIndicator.RAILWAY_STOPS_COUNT: 75, + TransportIndicator.AVERAGE_RAILWAY_STOP_ACCESSIBILITY: 76, + TransportIndicator.AIRPORTS_COUNT: 78, + TransportIndicator.AVERAGE_AIRPORT_ACCESSIBILITY: None, # TODO Средняя доступность аэропортов (без разделения на международные и местные) + # инженерная инфраструктура + EngineeringIndicator.INFRASTRUCTURE_OBJECT: 88, + EngineeringIndicator.SUBSTATION: 89, + EngineeringIndicator.WATER_WORKS: 90, + EngineeringIndicator.WASTEWATER_PLANT: 91, + EngineeringIndicator.RESERVOIR: 92, + EngineeringIndicator.GAS_DISTRIBUTION: 93, + # социальная инфраструктура + # образование + SocialCountIndicator.KINDERGARTEN: 309, + SocialProvisionIndicator.KINDERGARTEN: 207, # Обеспеченность детскими садами + SocialCountIndicator.SCHOOL: 338, + SocialProvisionIndicator.SCHOOL: 208, # Обеспеченность школами + SocialCountIndicator.COLLEGE: 310, + SocialProvisionIndicator.COLLEGE: None, # FIXME Обеспеченность образовательными учреждениями СПО (нет их) + SocialCountIndicator.UNIVERSITY: 311, + SocialProvisionIndicator.UNIVERSITY: 350, + SocialCountIndicator.EXTRACURRICULAR: None, # FIXME Организации дополнительного образования детей (нет их) + SocialProvisionIndicator.EXTRACURRICULAR: None, # FIXME Обеспеченность организациями дополнительного образования детей (нет их) + # здравоохранение + SocialCountIndicator.HOSPITAL: 341, + SocialProvisionIndicator.HOSPITAL: 361, # Обеспеченность больницами + SocialCountIndicator.POLYCLINIC: 342, + SocialProvisionIndicator.POLYCLINIC: 362, # Обеспеченность поликлиниками + SocialCountIndicator.AMBULANCE: 343, + SocialProvisionIndicator.AMBULANCE: None, # FIXME Обеспеченность объектами скорой медицинской помощи + SocialCountIndicator.SANATORIUM: 312, + SocialProvisionIndicator.SANATORIUM: None, # FIXME Обеспеченность объектами санаторного назначения + SocialCountIndicator.SPECIAL_MEDICAL: None, # FIXME Медицинские учреждения особого типа + SocialProvisionIndicator.SPECIAL_MEDICAL: None, # FIXME Обеспеченность медицинскими учреждениями особого типа + SocialCountIndicator.PREVENTIVE_MEDICAL: 346, + SocialProvisionIndicator.PREVENTIVE_MEDICAL: None, # FIXME Обеспеченность лечебно-профилактическими медицинскими учреждениями + SocialCountIndicator.PHARMACY: 345, + SocialProvisionIndicator.PHARMACY: 213, # Обеспеченность аптеками + # спорт + SocialCountIndicator.GYM: 313, + SocialProvisionIndicator.GYM: 243, # Обеспеченность спортзалами ОП / фитнес-центрами + SocialCountIndicator.SWIMMING_POOL: 314, + SocialProvisionIndicator.SWIMMING_POOL: 245, # Обеспеченность ФОК / бассейнами + SocialCountIndicator.PITCH: 340, + SocialProvisionIndicator.PITCH: 357, + SocialCountIndicator.STADIUM: 315, + SocialProvisionIndicator.STADIUM: 356, + # социальная помощь + SocialCountIndicator.ORPHANAGE: 316, + SocialProvisionIndicator.ORPHANAGE: None, # FIXME Обеспеченность детскими домами-интернатами + SocialCountIndicator.SOCIAL_FACILITY: 317, + SocialProvisionIndicator.SOCIAL_FACILITY: None, # FIXME Обеспеченность домами престарелых + SocialCountIndicator.SOCIAL_SERVICE_CENTER: 318, + SocialProvisionIndicator.SOCIAL_SERVICE_CENTER: None, # FIXME Обеспеченность центрами социального обслуживания + # услуги + SocialCountIndicator.POST: 319, + SocialProvisionIndicator.POST: 247, # Обеспеченность пунктами доставки / почтовыми отделениями + SocialCountIndicator.BANK: 320, + SocialProvisionIndicator.BANK: 250, # Обеспеченность отделениями банков + SocialCountIndicator.MULTIFUNCTIONAL_CENTER: 321, + SocialProvisionIndicator.MULTIFUNCTIONAL_CENTER: 351, + # культура и отдых + SocialCountIndicator.LIBRARY: 322, + SocialProvisionIndicator.LIBRARY: 232, # Обеспеченность медиатеками / библиотеками + SocialCountIndicator.MUSEUM: 323, + SocialProvisionIndicator.MUSEUM: 352, + SocialCountIndicator.THEATRE: 324, + SocialProvisionIndicator.THEATRE: 353, + SocialCountIndicator.CULTURAL_CENTER: 325, + SocialProvisionIndicator.CULTURAL_CENTER: 231, # Обеспеченность комьюнити-центрами / домами культуры + SocialCountIndicator.CINEMA: 326, + SocialProvisionIndicator.CINEMA: 354, + SocialCountIndicator.CONCERT_HALL: 327, + SocialProvisionIndicator.CONCERT_HALL: None, # FIXME Обеспеченность концертными залами + # SocialCountIndicator.STADIUM : 315, ПОВТОР + # SocialProvisionIndicator.STADIUM : 356, ПОВТОР + SocialCountIndicator.ICE_ARENA: 328, + SocialProvisionIndicator.ICE_ARENA: None, # FIXME Обеспеченность ледовыми аренами + SocialCountIndicator.MALL: 329, + SocialProvisionIndicator.MALL: 355, + SocialCountIndicator.PARK: 330, + SocialProvisionIndicator.PARK: 238, # Обеспеченность парками + SocialCountIndicator.BEACH: 331, + SocialProvisionIndicator.BEACH: None, # FIXME Обеспеченность пляжами + SocialCountIndicator.ECO_TRAIL: 332, + SocialProvisionIndicator.ECO_TRAIL: None, # FIXME Обеспеченность экологическими тропами + # безопасность + SocialCountIndicator.FIRE_STATION: 333, + SocialProvisionIndicator.FIRE_STATION: 260, # Обеспеченность пожарными депо + SocialCountIndicator.POLICE: 334, + SocialProvisionIndicator.POLICE: 258, # Обеспеченность пунктами полиции + # туризм + SocialCountIndicator.HOTEL: 335, + SocialProvisionIndicator.HOTEL: 358, + SocialCountIndicator.HOSTEL: 336, + SocialProvisionIndicator.HOSTEL: 359, + SocialCountIndicator.TOURIST_BASE: 337, + SocialProvisionIndicator.TOURIST_BASE: 360, + SocialCountIndicator.CATERING: 344, + SocialProvisionIndicator.CATERING: 226, # Обеспеченность кафе / кофейнями +} + +SPEED = 5 * 1_000 / 60 + +URBANOMY_LAND_USE_RULES: Final[dict[str, LandUse]] = { + 'Потенциал развития среднеэтажной жилой застройки': LandUse.RESIDENTIAL, + "Потенциал развития застройки общественно-деловой зоны": LandUse.BUSINESS, + "Потенциал развития застройки рекреационной зоны": LandUse.RECREATION, + "Потенциал развития застройки зоны специального назначения": LandUse.SPECIAL, + "Потенциал развития застройки промышленной зоны": LandUse.INDUSTRIAL, + "Потенциал развития застройки сельскохозяйственной зоны": LandUse.AGRICULTURE, + "Потенциал развития застройки транспортной зоны": LandUse.TRANSPORT, +} + +benchmarks_demo = { + LandUse.RESIDENTIAL: { + "cost_build": 45_000, + "price_sale": 120_000, + "construction_years": 3, + "sale_years": 3, + "opex_rate": 800, + }, + LandUse.BUSINESS: { + "cost_build": 55_000, + "rent_annual": 25_000, + "rent_years": 12, + "construction_years": 4, + "opex_rate": 1_300, + }, + LandUse.RECREATION: { + "cost_build": 20_000, + "rent_annual": 4_500, + "rent_years": 15, + "construction_years": 3, + "opex_rate": 1_000, + }, + LandUse.SPECIAL: { + "cost_build": 35_000, + "rent_annual": 11_000, + "rent_years": 15, + "construction_years": 3, + "opex_rate": 1_500, + }, + LandUse.INDUSTRIAL: { + "cost_build": 38_000, + "rent_annual": 14_800, + "rent_years": 12, + "construction_years": 3, + "opex_rate": 700, + }, + LandUse.AGRICULTURE: { + "cost_build": 25_000, + "rent_annual": 6_500, + "rent_years": 15, + "construction_years": 3, + "opex_rate": 300, + }, + LandUse.TRANSPORT: { + "cost_build": 18_000, + "rent_annual": 6_200, + "rent_years": 15, + "construction_years": 3, + "opex_rate": 600, + }, +} + +deafaut_cfg = { + "population": 300_000, +} + +discount_rate: float = 0.18 + +URBANOMY_INDICATORS_MAPPING: dict[str, int] = { + "Объём инвестиций в основной капитал на душу населения": 152, + "Валовый региональный продукт на душу населения": 154, + "Доходы бюджета территории": 368, + "Средний уровень заработной платы": 170, + "Износ основного фонда (тыс. руб.)": 367, +} + +URBANOMY_BLOCK_COLS = [ + "geometry", + "residential", + "business", + "recreation", + "industrial", + "transport", + "special", + "agriculture", + "land_use", + "share", +] diff --git a/app/effects_api/dto/__init__.py b/app/effects_api/dto/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/effects_api/dto/development_dto.py b/app/effects_api/dto/development_dto.py new file mode 100644 index 0000000..a628f4e --- /dev/null +++ b/app/effects_api/dto/development_dto.py @@ -0,0 +1,44 @@ +from typing import Literal, Optional + +from pydantic import BaseModel, Field + + +class DevelopmentDTO(BaseModel): + force: bool = Field( + default=False, description="flag for recalculating the scenario" + ) + + scenario_id: int = Field( + ..., + examples=[822], + description="Project-scenario ID to retrieve data from.", + ) + proj_func_zone_source: Optional[Literal["PZZ", "OSM", "User"]] = Field( + None, + examples=["User", "PZZ", "OSM"], + description=( + "Preferred source for project functional zones. " + "Default priority: User → PZZ → OSM." + ), + ) + proj_func_source_year: Optional[int] = Field( + None, + examples=[2023, 2024], + description="Year of the chosen project functional-zone source.", + ) + + +class ContextDevelopmentDTO(DevelopmentDTO): + context_func_zone_source: Optional[Literal["PZZ", "OSM", "User"]] = Field( + None, + examples=["PZZ", "OSM"], + description=( + "Preferred source for context functional zones. " + "Default priority: PZZ → OSM." + ), + ) + context_func_source_year: Optional[int] = Field( + None, + examples=[2023, 2024], + description="Year of the chosen context functional-zone source.", + ) diff --git a/app/effects_api/dto/socio_economic_project_dto.py b/app/effects_api/dto/socio_economic_project_dto.py new file mode 100644 index 0000000..81c1021 --- /dev/null +++ b/app/effects_api/dto/socio_economic_project_dto.py @@ -0,0 +1,19 @@ +from pydantic import BaseModel, Field + + +class SocioEconomicByProjectDTO(BaseModel): + project_id: int = Field( + ..., + examples=[120], + description="Project ID to retrieve data from.", + ) + + regional_scenario_id: int = Field( + ..., + examples=[122], + description="Regional scenario ID using for filtering.", + ) + + force: bool = Field( + default=False, description="flag for recalculating the scenario" + ) diff --git a/app/effects_api/dto/socio_economic_scenario_dto.py b/app/effects_api/dto/socio_economic_scenario_dto.py new file mode 100644 index 0000000..d2e9648 --- /dev/null +++ b/app/effects_api/dto/socio_economic_scenario_dto.py @@ -0,0 +1,12 @@ +from pydantic import Field + +from app.effects_api.dto.development_dto import ContextDevelopmentDTO + + +class SocioEconomicByScenarioDTO(ContextDevelopmentDTO): + + split: bool = Field( + default=False, + examples=[False, True], + description="If split will return additional evaluation for each context mo", + ) diff --git a/app/effects_api/dto/transformation_effects_dto.py b/app/effects_api/dto/transformation_effects_dto.py new file mode 100644 index 0000000..0afa936 --- /dev/null +++ b/app/effects_api/dto/transformation_effects_dto.py @@ -0,0 +1,11 @@ +from pydantic import Field + +from app.effects_api.dto.development_dto import ContextDevelopmentDTO + + +class TerritoryTransformationDTO(ContextDevelopmentDTO): + required_service: str = Field( + ..., + examples=["school"], + description="Service type to get response on", + ) diff --git a/app/effects_api/effects_service.py b/app/effects_api/effects_service.py new file mode 100644 index 0000000..3bf0b31 --- /dev/null +++ b/app/effects_api/effects_service.py @@ -0,0 +1,1993 @@ +import asyncio +import json +import math +import re +import time +from pathlib import Path +from typing import Any, Dict, Literal + +import geopandas as gpd +import numpy as np +import pandas as pd +from blocksnet.analysis.indicators import calculate_development_indicators +from blocksnet.analysis.indicators.socio_economic import ( + calculate_demographic_indicators, + calculate_engineering_indicators, + calculate_general_indicators, + calculate_social_indicators, + calculate_transport_indicators, +) +from blocksnet.analysis.land_use.prediction import SpatialClassifier +from blocksnet.analysis.provision import competitive_provision, provision_strong_total +from blocksnet.blocks.assignment import assign_objects +from blocksnet.config import service_types_config +from blocksnet.enums import LandUse +from blocksnet.machine_learning.regression import DensityRegressor +from blocksnet.optimization.services import ( + AreaSolution, + Facade, + GradientChooser, + TPEOptimizer, + WeightedConstraints, + WeightedObjective, +) +from blocksnet.relations import ( + calculate_distance_matrix, + generate_adjacency_graph, +) +from catboost import CatBoostRegressor +from loguru import logger +from urbanomy.methods.investment_potential import InvestmentAttractivenessAnalyzer +from urbanomy.methods.land_value_modeling import LandDataPreparator, LandPriceEstimator +from urbanomy.methods.socio_economic_indicators.sei_calculate import SEREstimator +from urbanomy.utils.investment_input import prepare_investment_input + +from app.effects_api.modules.scenario_service import ScenarioService +from app.effects_api.modules.service_type_service import ( + adapt_service_types, + build_en_to_ru_map, + ensure_missing_id_and_name_columns, + generate_blocksnet_columns, +) + +from ..clients.urban_api_client import UrbanAPIClient +from ..common.caching.caching_service import FileCache +from ..common.exceptions.http_exception_wrapper import http_exception +from ..common.utils.effects_utils import EffectsUtils +from ..common.utils.geodata import ( + _ensure_block_index, + fc_to_gdf, + gdf_to_ru_fc_rounded, + get_accessibility_matrix, + is_fc, + round_coords, +) +from .constants.const import ( + INDICATORS_MAPPING, + INFRASTRUCTURES_WEIGHTS, + MAX_EVALS, + MAX_RUNS, + PRED_VALUE_RU, + PROB_COLS_EN_TO_RU, + ROADS_ID, URBANOMY_LAND_USE_RULES, benchmarks_demo, URBANOMY_INDICATORS_MAPPING, URBANOMY_BLOCK_COLS, +) +from .dto.development_dto import ( + ContextDevelopmentDTO, + DevelopmentDTO, +) +from .dto.socio_economic_project_dto import ( + SocioEconomicByProjectDTO, +) +from .dto.transformation_effects_dto import TerritoryTransformationDTO +from .modules.context_service import ContextService +from ..prometheus.metrics import ( + EFFECTS_TERRITORY_TRANSFORMATION_TOTAL, + EFFECTS_TERRITORY_TRANSFORMATION_ERROR_TOTAL, + EFFECTS_TERRITORY_TRANSFORMATION_DURATION_SECONDS, + EFFECTS_VALUES_TRANSFORMATION_TOTAL, + EFFECTS_VALUES_TRANSFORMATION_ERROR_TOTAL, + EFFECTS_VALUES_TRANSFORMATION_DURATION_SECONDS, + EFFECTS_VALUES_ORIENTED_REQUIREMENTS_TOTAL, + EFFECTS_VALUES_ORIENTED_REQUIREMENTS_ERROR_TOTAL, + EFFECTS_VALUES_ORIENTED_REQUIREMENTS_DURATION_SECONDS, + EFFECTS_SOCIO_ECONOMICAL_METRICS_TOTAL, + EFFECTS_SOCIO_ECONOMICAL_METRICS_ERROR_TOTAL, + EFFECTS_SOCIO_ECONOMICAL_METRICS_DURATION_SECONDS) + + +class EffectsService: + def __init__( + self, + urban_api_client: UrbanAPIClient, + cache: FileCache, + scenario_service: ScenarioService, + context_service: ContextService, + effects_utils: EffectsUtils, + _land_price_model_lock: asyncio.Lock | None = None, + _indicator_name_cache: dict[int, str] | None = None, + _indicator_name_cache_lock: asyncio.Lock | None = None, + _land_price_model: CatBoostRegressor | None = None, + _catboost_model_path: str = "./catboost_model.cbm", + _urbanomy_indicator_name_cache: dict[int, str] | None = None, + _urbanomy_indicator_name_cache_lock: asyncio.Lock | None = None, + ): + self._land_price_model = _land_price_model + self._land_price_model_lock = _land_price_model_lock or asyncio.Lock() + + self._indicator_name_cache = _indicator_name_cache or {} + self._indicator_name_cache_lock = _indicator_name_cache_lock or asyncio.Lock() + + self._urbanomy_indicator_name_cache = _urbanomy_indicator_name_cache or {} + self._urbanomy_indicator_name_cache_lock = _urbanomy_indicator_name_cache_lock or asyncio.Lock() + + self._catboost_model_path = _catboost_model_path + + self.urban_api_client = urban_api_client + self.cache = cache + self.scenario = scenario_service + self.context = context_service + self.effects_utils = effects_utils + self.__name__ = "EffectsService" + + async def build_hash_params( + self, + params: ContextDevelopmentDTO | DevelopmentDTO, + token: str, + ) -> dict: + project_id = ( + await self.urban_api_client.get_scenario_info(params.scenario_id, token) + )["project"]["project_id"] + base_scenario_id = await self.urban_api_client.get_base_scenario_id(project_id, token) + base_src, base_year = ( + await self.urban_api_client.get_optimal_func_zone_request_data( + token, base_scenario_id, None, None + ) + ) + p = params.model_dump() + p.pop("force", None) + return p | { + "base_func_zone_source": base_src, + "base_func_zone_year": base_year, + } + + async def get_optimal_func_zone_data( + self, + params: ( + DevelopmentDTO + | ContextDevelopmentDTO + | SocioEconomicByProjectDTO + | TerritoryTransformationDTO + ), + token: str, + ) -> DevelopmentDTO: + """ + Get optimal functional zone source and year for the project scenario. + If not provided, fetches the best available source and year. + + Params: + params (DevelopmentDTO): DTO with scenario ID and optional + Returns: + DevelopmentDTO: DTO with updated functional zone source and year. + """ + + if not params.proj_func_zone_source or not params.proj_func_source_year: + (params.proj_func_zone_source, params.proj_func_source_year) = ( + await self.urban_api_client.get_optimal_func_zone_request_data( + token, + params.scenario_id, + params.proj_func_zone_source, + params.proj_func_source_year, + ) + ) + if isinstance(params, ContextDevelopmentDTO): + if ( + not params.context_func_zone_source + or not params.context_func_source_year + ): + ( + params.context_func_zone_source, + params.context_func_source_year, + ) = await self.urban_api_client.get_optimal_func_zone_request_data( + token, + params.scenario_id, + params.context_func_zone_source, + params.context_func_source_year, + project=False, + ) + return params + return params + + async def _assess_provision( + self, blocks: pd.DataFrame, acc_mx: pd.DataFrame, service_type: str + ) -> gpd.GeoDataFrame: + _, demand, accessibility = service_types_config[service_type].values() + blocks["is_project"] = blocks["is_project"].fillna(False).astype(bool) + context_ids = await self.context.get_accessibility_context( + blocks, acc_mx, accessibility + ) + capacity_column = f"capacity_{service_type}" + if capacity_column in blocks.columns: + blocks_df = ( + blocks[["geometry", "population", capacity_column]] + .rename(columns={capacity_column: "capacity"}) + .fillna(0) + ) + else: + blocks_df = blocks[["geometry", "population"]].copy().fillna(0) + blocks_df["capacity"] = 0 + prov_df, _ = competitive_provision(blocks_df, acc_mx, accessibility, demand) + prov_df = prov_df.loc[context_ids].copy() + return blocks[["geometry"]].join(prov_df, how="right") + + async def calculate_provision_totals( + self, + provision_gdfs_dict: dict[str, gpd.GeoDataFrame], + ndigits: int = 2, + ) -> dict[str, float | None]: + prov_totals: dict[str, float | None] = {} + for st_name, prov_gdf in provision_gdfs_dict.items(): + if prov_gdf.demand.sum() == 0: + prov_totals[st_name] = None + else: + try: + total = float(provision_strong_total(prov_gdf)) + except Exception as e: + logger.exception("Provision total calculation failed") + raise http_exception( + 500, + "Provision total calculation failed", + _input={"service_type": st_name}, + _detail=str(e), + ) + prov_totals[st_name] = round(total, ndigits) + return prov_totals + + async def _compute_provision_layers( + self, + blocks: gpd.GeoDataFrame, + service_types: pd.DataFrame, + *, + section_label: str, + ) -> tuple[dict[str, gpd.GeoDataFrame], dict[str, float | None]]: + """Compute provision layers (GeoDataFrames) and totals for a blocks layer. + + Args: + blocks: Blocks GeoDataFrame (must include 'geometry' and 'population'). + service_types: Service types dataframe filtered to infrastructure services. + section_label: Human-readable label for logging (e.g. 'BEFORE', 'AFTER'). + + Returns: + Tuple of: + - dict[service_name, GeoDataFrame] with provision columns + - dict[service_name, total_provision] where total_provision may be None + """ + blocks = blocks.copy() + + if "is_project" in blocks.columns: + blocks["is_project"] = ( + blocks["is_project"] + .infer_objects(copy=False) + .fillna(False) + .astype(bool) + ) + else: + blocks["is_project"] = False + + try: + acc_mx = get_accessibility_matrix(blocks) + except Exception as exc: + logger.exception( + f"Accessibility matrix calculation failed ({section_label}): {exc}" + ) + raise http_exception( + 500, "Accessibility matrix calculation failed", _detail=str(exc) + ) + + prov_gdfs: dict[str, gpd.GeoDataFrame] = {} + + for st_id in service_types.index: + st_name = service_types.loc[st_id, "name"] + prov_gdf = await self._assess_provision(blocks, acc_mx, st_name) + prov_gdf = prov_gdf.join( + blocks[["is_project"]].reindex(prov_gdf.index), how="left" + ) + prov_gdf["is_project"] = prov_gdf["is_project"].fillna(False).astype(bool) + prov_gdf = prov_gdf.to_crs(4326).drop( + columns="provision_weak", errors="ignore" + ) + + num_cols = [ + c for c in prov_gdf.select_dtypes(include=["number"]).columns + if c != "is_project" + ] + if num_cols: + prov_gdf[num_cols] = prov_gdf[num_cols].fillna(0) + + prov_gdfs[st_name] = gpd.GeoDataFrame( + prov_gdf, geometry="geometry", crs="EPSG:4326" + ) + + prov_totals = await self.calculate_provision_totals(prov_gdfs) + logger.info( + f"Provision layers computed ({section_label}): services={len(prov_gdfs)}" + ) + return prov_gdfs, prov_totals + + + async def territory_transformation_scenario_before( + self, + token: str, + params: ContextDevelopmentDTO, + context_blocks: gpd.GeoDataFrame | None = None, + ): + """Compute and cache provision layers for territory transformation. + + Semantics: + - 'before' is always computed for the *base* scenario of the project. + - 'after' is computed for the requested scenario_id (only for non-base scenarios). + + Cache: + Stored under method 'territory_transformation' in a single JSON with sections: + data.before.{service_name, ..., provision_total_before} + data.after.{service_name, ..., provision_total_after} (only for non-base) + + Returns: + - For base scenarios: dict[str, GeoDataFrame] (only BEFORE layers) + - For non-base scenarios: {"before": {...}, "after": {...}} + """ + + method_name = "territory_transformation" + + info = await self.urban_api_client.get_scenario_info(params.scenario_id, token) + updated_at = info["updated_at"] + is_based = bool(info.get("is_based")) + project_id = info["project"]["project_id"] + base_id_response = await self.urban_api_client.get_all_project_info(project_id, token) + base_scenario_id = base_id_response["base_scenario"]["id"] + + params = await self.get_optimal_func_zone_data(params, token) + params_for_hash = await self.build_hash_params(params, token) + phash = self.cache.params_hash(params_for_hash) + + force = bool(getattr(params, "force", False)) + cached = None if force else self.cache.load(method_name, params.scenario_id, phash) + + if cached and cached.get("meta", {}).get("scenario_updated_at") == updated_at: + data = cached.get("data") or {} + has_before = isinstance(data.get("before"), dict) and any( + is_fc(v) for v in (data.get("before") or {}).values() + ) + has_after = isinstance(data.get("after"), dict) and any( + is_fc(v) for v in (data.get("after") or {}).values() + ) + + if has_before and (is_based or has_after): + before_gdfs = { + n: fc_to_gdf(fc) + for n, fc in (data.get("before") or {}).items() + if is_fc(fc) + } + if is_based: + return before_gdfs + + after_gdfs = { + n: fc_to_gdf(fc) + for n, fc in (data.get("after") or {}).items() + if is_fc(fc) + } + return {"before": before_gdfs, "after": after_gdfs} + + logger.info("Cache stale, missing or forced: calculating TERRITORY_TRANSFORMATION provisions") + + service_types = await self.urban_api_client.get_service_types() + service_types = await adapt_service_types(service_types, self.urban_api_client) + service_types = service_types[~service_types["infrastructure_type"].isna()].copy() + + base_src, base_year = await self.urban_api_client.get_optimal_func_zone_request_data( + token, base_scenario_id, None, None + ) + base_scenario_blocks, _ = await self.scenario.aggregate_blocks_layer_scenario( + base_scenario_id, base_src, base_year, token + ) + + if context_blocks is None: + context_blocks = gpd.GeoDataFrame(geometry=[], crs=base_scenario_blocks.crs) + + before_blocks = pd.concat([context_blocks, base_scenario_blocks]).reset_index( + drop=True + ) + + prov_gdfs_before, prov_totals_before = await self._compute_provision_layers( + before_blocks, + service_types=service_types, + section_label="BEFORE", + ) + + existing_data = (cached.get("data") if cached else {}) or {} + + existing_data["before"] = { + name: await gdf_to_ru_fc_rounded(gdf, ndigits=6) + for name, gdf in prov_gdfs_before.items() + } + existing_data["before"]["provision_total_before"] = prov_totals_before + + prov_gdfs_after: dict[str, gpd.GeoDataFrame] = {} + prov_totals_after: dict[str, float | None] = {} + + if not is_based: + scenario_blocks, _ = await self.scenario.aggregate_blocks_layer_scenario( + params.scenario_id, + params.proj_func_zone_source, + params.proj_func_source_year, + token, + ) + + after_blocks = pd.concat([context_blocks, scenario_blocks]).reset_index( + drop=True + ) + + if ( + "population" not in after_blocks.columns + or after_blocks["population"].isna().any() + ): + dev_df = await self.run_development_parameters(after_blocks) + after_blocks["population"] = pd.to_numeric( + dev_df["population"], errors="coerce" + ).fillna(0) + else: + after_blocks["population"] = pd.to_numeric( + after_blocks["population"], errors="coerce" + ).fillna(0) + + prov_gdfs_after, prov_totals_after = await self._compute_provision_layers( + after_blocks, + service_types=service_types, + section_label="AFTER", + ) + + existing_data["after"] = { + name: await gdf_to_ru_fc_rounded(gdf, ndigits=6) + for name, gdf in prov_gdfs_after.items() + } + existing_data["after"]["provision_total_after"] = prov_totals_after + + self.cache.save( + method_name, + params.scenario_id, + params_for_hash, + existing_data, + scenario_updated_at=updated_at, + ) + + if is_based: + return prov_gdfs_before + + return {"before": prov_gdfs_before, "after": prov_gdfs_after} + + @staticmethod + async def run_development_parameters( + blocks_gdf: gpd.GeoDataFrame, + ) -> pd.DataFrame: + """ + Compute core *development* indicators (FSI, GSI, MXI, etc.) for each + block and derive population estimates. + + The routine: + 1. Clips every land-use share to [0, 1]. + 2. Generates an adjacency graph (10 m tolerance). + 3. Uses DensityRegressor to predict density indices. + 4. Converts indices into built-area, footprint, living area, etc. + 5. Estimates population by living_area // 20. + + Params: + blocks_gdf : gpd.GeoDataFrame + Block layer already containing per-land-use **shares** + (0 ≤ share ≤ 1) and `site_area`. + + Returns: + pd.DataFrame with added columns: + `build_floor_area`, `footprint_area`, `living_area`, + `non_living_area`, `population`, plus the original density indices. + """ + for lu in LandUse: + blocks_gdf[lu.value] = blocks_gdf[lu.value].apply(lambda v: min(v, 1)) + + try: + adjacency_graph = generate_adjacency_graph(blocks_gdf, 10) + except Exception as e: + logger.exception("Adjacency graph generation failed") + raise http_exception( + 500, "Adjacency graph generation failed", _detail=str(e) + ) + + dr = DensityRegressor() + + try: + density_df = dr.evaluate(blocks_gdf, adjacency_graph) + except Exception as e: + logger.exception("Density evaluation failed") + raise http_exception(500, "Density evaluation failed", _detail=str(e)) + + density_df.loc[density_df["fsi"] < 0, "fsi"] = 0 + + density_df.loc[density_df["gsi"] < 0, "gsi"] = 0 + density_df.loc[density_df["gsi"] > 1, "gsi"] = 1 + + density_df.loc[density_df["mxi"] < 0, "mxi"] = 0 + density_df.loc[density_df["mxi"] > 1, "mxi"] = 1 + + density_df.loc[blocks_gdf["residential"] == 0, "mxi"] = 0 + density_df["site_area"] = blocks_gdf["site_area"] + + try: + development_df = calculate_development_indicators(density_df) + except Exception as e: + logger.exception("Development indicator calculation failed") + raise http_exception( + 500, "Development indicator calculation failed", _detail=str(e) + ) + + development_df["population"] = development_df["living_area"] // 20 + + return development_df + + def _build_facade( + self, + after_blocks: gpd.GeoDataFrame, + acc_mx: pd.DataFrame, + service_types: pd.DataFrame, + ) -> Facade: + blocks_lus = after_blocks.loc[after_blocks["is_project"], "land_use"] + blocks_lus = blocks_lus[~blocks_lus.isna()].to_dict() + + var_adapter = AreaSolution(blocks_lus) + + facade = Facade( + blocks_lu=blocks_lus, + blocks_df=after_blocks, + accessibility_matrix=acc_mx, + var_adapter=var_adapter, + ) + + for st_id, row in service_types.iterrows(): + st_name = row["name"] + st_weight = row["infrastructure_weight"] + st_column = f"capacity_{st_name}" + + if st_column in after_blocks.columns: + df = after_blocks.rename(columns={st_column: "capacity"})[ + ["capacity"] + ].fillna(0) + else: + df = after_blocks[[]].copy() + df["capacity"] = 0 + facade.add_service_type(st_name, st_weight, df) + + return facade + + async def territory_transformation_scenario_after( + self, + token: str, + params: ContextDevelopmentDTO | DevelopmentDTO, + context_blocks: gpd.GeoDataFrame, + save_cache: bool = True, + ) -> dict[str, Any]: + """Compute and (optionally) cache optimization result for values transformation. + + This method no longer persists provision layers. It is only responsible for + producing `best_x` (service placement optimization vector) which is later + used by `values_transformation`. + + Cache: + Stored under method 'territory_transformation_opt' with payload: {"best_x": best_x} + + Returns: + {"best_x": best_x} + """ + + opt_method = "territory_transformation_opt" + + info = await self.urban_api_client.get_scenario_info(params.scenario_id, token) + updated_at = info["updated_at"] + is_based = bool(info.get("is_based")) + + if is_based: + logger.exception("Base scenario has no optimization 'after' context") + raise http_exception( + 400, "Base scenario has no optimization 'after' context" + ) + + params = await self.get_optimal_func_zone_data(params, token) + params_for_hash = await self.build_hash_params(params, token) + phash = self.cache.params_hash(params_for_hash) + + force = bool(getattr(params, "force", False)) + cached = None if force else self.cache.load(opt_method, params.scenario_id, phash) + + if ( + cached + and cached.get("meta", {}).get("scenario_updated_at") == updated_at + and isinstance(cached.get("data"), dict) + and "best_x" in cached["data"] + ): + return {"best_x": cached["data"]["best_x"]} + + logger.info("Cache stale, missing or forced: running service placement optimization") + + service_types = await self.urban_api_client.get_service_types() + service_types = await adapt_service_types(service_types, self.urban_api_client) + service_types = service_types[~service_types["infrastructure_type"].isna()].copy() + + scenario_blocks, _ = await self.scenario.aggregate_blocks_layer_scenario( + params.scenario_id, + params.proj_func_zone_source, + params.proj_func_source_year, + token, + ) + + after_blocks = pd.concat([context_blocks, scenario_blocks]).reset_index(drop=True) + + if "is_project" in after_blocks.columns: + after_blocks["is_project"] = ( + after_blocks["is_project"].infer_objects(copy=False).fillna(False).astype(bool) + ) + else: + after_blocks["is_project"] = False + + try: + acc_mx = get_accessibility_matrix(after_blocks) + except Exception as exc: + logger.exception("Accessibility matrix calculation failed") + raise http_exception(500, "Accessibility matrix calculation failed", _detail=str(exc)) + + service_types["infrastructure_weight"] = ( + service_types["infrastructure_type"].map(INFRASTRUCTURES_WEIGHTS) + * service_types["infrastructure_weight"] + ) + + if ( + "population" not in after_blocks.columns + or after_blocks["population"].isna().any() + ): + dev_df = await self.run_development_parameters(after_blocks) + after_blocks["population"] = pd.to_numeric( + dev_df["population"], errors="coerce" + ).fillna(0) + else: + after_blocks["population"] = pd.to_numeric( + after_blocks["population"], errors="coerce" + ).fillna(0) + + facade = self._build_facade(after_blocks, acc_mx, service_types) + + services_weights = service_types.set_index("name")["infrastructure_weight"].to_dict() + + objective = WeightedObjective( + num_params=facade.num_params, + facade=facade, + weights=services_weights, + max_evals=MAX_EVALS, + ) + constraints = WeightedConstraints(num_params=facade.num_params, facade=facade) + tpe_optimizer = TPEOptimizer( + objective=objective, + constraints=constraints, + vars_chooser=GradientChooser(facade, facade.num_params, num_top=5), + ) + + try: + best_x, best_val, perc, func_evals = tpe_optimizer.run( + max_runs=MAX_RUNS, timeout=10, initial_runs_num=1 + ) + except Exception as e: + logger.exception("Optimization (TPE) failed") + raise http_exception( + 500, "Service placement optimization failed", _detail=str(e) + ) + + if save_cache: + self.cache.save( + opt_method, + params.scenario_id, + params_for_hash, + {"best_x": best_x}, + scenario_updated_at=updated_at, + ) + + return {"best_x": best_x} + + async def territory_transformation( + self, + token: str, + params: ContextDevelopmentDTO, + ) -> dict[str, Any] | dict[str, dict[str, Any]]: + """Compute territory transformation provision layers. + + NOTE: + Provision layers for both 'before' (base scenario) and 'after' (requested scenario) + are computed inside `territory_transformation_scenario_before`. The 'after' section + is omitted for base scenarios. + """ + project_id = ( + await self.urban_api_client.get_scenario_info(params.scenario_id, token) + )["project"]["project_id"] + + # context_blocks, context_territories_gdf, service_types = await self.context.get_shared_context(project_id, + # token) + context_blocks, _ = await self.context.aggregate_blocks_layer_context( + params.scenario_id, + params.context_func_zone_source, + params.context_func_source_year, + token, + ) + EFFECTS_TERRITORY_TRANSFORMATION_TOTAL.inc() + start_time = time.perf_counter() + try: + return await self.territory_transformation_scenario_before(token, params, context_blocks) + except Exception: + EFFECTS_TERRITORY_TRANSFORMATION_ERROR_TOTAL.inc() + raise + finally: + EFFECTS_TERRITORY_TRANSFORMATION_DURATION_SECONDS.observe( + time.perf_counter() - start_time + ) + + async def values_transformation( + self, + token: str, + params: TerritoryTransformationDTO, + ) -> dict: + EFFECTS_VALUES_TRANSFORMATION_TOTAL.inc() + start_time = time.perf_counter() + try: + start_time = time.perf_counter() + + opt_method = "territory_transformation_opt" + + params = await self.get_optimal_func_zone_data(params, token) + + params_for_hash = await self.build_hash_params(params, token) + phash = self.cache.params_hash(params_for_hash) + force = getattr(params, "force", False) + + info = await self.urban_api_client.get_scenario_info(params.scenario_id, token) + updated_at = info["updated_at"] + + context_blocks, _ = await self.context.aggregate_blocks_layer_context( + params.scenario_id, + params.context_func_zone_source, + params.context_func_source_year, + token, + ) + + opt_cached = ( + None if force else self.cache.load(opt_method, params.scenario_id, phash) + ) + need_refresh = ( + force + or not opt_cached + or opt_cached["meta"]["scenario_updated_at"] != updated_at + or "best_x" not in opt_cached["data"] + ) + if need_refresh: + res = await self.territory_transformation_scenario_after( + token, params, context_blocks, save_cache=False + ) + best_x_val = res["best_x"] + + self.cache.save( + opt_method, + params.scenario_id, + params_for_hash, + {"best_x": best_x_val}, + scenario_updated_at=updated_at, + ) + opt_cached = self.cache.load(opt_method, params.scenario_id, phash) + + best_x = opt_cached["data"]["best_x"] + + scenario_blocks, _ = await self.scenario.aggregate_blocks_layer_scenario( + params.scenario_id, + params.proj_func_zone_source, + params.proj_func_source_year, + token, + ) + + after_blocks = pd.concat([context_blocks, scenario_blocks], ignore_index=False) + if "block_id" in after_blocks.columns: + after_blocks["block_id"] = after_blocks["block_id"].astype(int) + if after_blocks.index.name == "block_id": + after_blocks = after_blocks.reset_index(drop=True) + after_blocks = ( + after_blocks.drop_duplicates(subset="block_id", keep="last") + .set_index("block_id") + .sort_index() + ) + else: + after_blocks.index = after_blocks.index.astype(int) + after_blocks = after_blocks[ + ~after_blocks.index.duplicated(keep="last") + ].sort_index() + after_blocks.index.name = "block_id" + + if "is_project" in after_blocks.columns: + after_blocks["is_project"] = ( + after_blocks["is_project"].fillna(False).astype(bool) + ) + else: + after_blocks["is_project"] = False + + try: + acc_mx = get_accessibility_matrix(after_blocks) + except Exception as e: + logger.exception("Accessibility matrix calculation failed") + raise http_exception( + 500, "Accessibility matrix calculation failed", _detail=str(e) + ) + + service_types = await self.urban_api_client.get_service_types() + service_types = await adapt_service_types(service_types, self.urban_api_client) + service_types = service_types[ + ~service_types["infrastructure_type"].isna() + ].copy() + service_types["infrastructure_weight"] = ( + service_types["infrastructure_type"].map(INFRASTRUCTURES_WEIGHTS) + * service_types["infrastructure_weight"] + ) + + facade = self._build_facade(after_blocks, acc_mx, service_types) + test_blocks: gpd.GeoDataFrame = after_blocks.loc[ + list(facade._blocks_lu.keys()) + ].copy() + test_blocks.index = test_blocks.index.astype(int) + + try: + solution_df = facade.solution_to_services_df(best_x).copy() + except Exception as e: + logger.exception("Solution calculation failed") + raise http_exception(500, "Solution calculation failed", _detail=str(e)) + + solution_df["block_id"] = solution_df["block_id"].astype(int) + metrics = [ + c + for c in ["site_area", "build_floor_area", "capacity", "count"] + if c in solution_df.columns + ] + + if metrics: + non_zero_mask = (solution_df[metrics].fillna(0) != 0).any(axis=1) + solution_df = solution_df[non_zero_mask].copy() + + if len(metrics): + agg = ( + solution_df.groupby(["block_id", "service_type"])[metrics] + .sum() + .sort_index() + ) + else: + agg = ( + solution_df.groupby(["block_id", "service_type"]) + .size() + .to_frame(name="__dummy__") + .drop(columns="__dummy__") + ) + + def _row_to_dict(s: pd.Series) -> dict: + d = {m: (0 if pd.isna(s.get(m)) else s.get(m)) for m in metrics} + for k, v in d.items(): + try: + fv = float(v) + d[k] = int(fv) if fv.is_integer() else fv + except Exception: + pass + return d + + cells = ( + agg.apply(_row_to_dict, axis=1) + if len(metrics) + else agg.apply(lambda _: {}, axis=1) + ) + wide = cells.unstack("service_type").reindex(index=test_blocks.index) + + all_services = sorted(solution_df["service_type"].dropna().unique().tolist()) + for s in all_services: + if s not in wide.columns: + wide[s] = np.nan + + cells = ( + agg.apply(_row_to_dict, axis=1) + if len(metrics) + else agg.apply(lambda _: {}, axis=1) + ) + wide = cells.unstack("service_type").reindex(index=test_blocks.index) + + all_services = sorted(solution_df["service_type"].dropna().unique().tolist()) + for s in all_services: + if s not in wide.columns: + wide[s] = np.nan + + wide = wide[all_services] + test_blocks_with_services: gpd.GeoDataFrame = test_blocks.join(wide, how="left") + + logger.info("Values transformed complete") + + geom_col = test_blocks_with_services.geometry.name + service_cols = all_services + base_cols = [ + c for c in ["is_project"] if c in test_blocks_with_services.columns + ] + + gdf_out = test_blocks_with_services[base_cols + service_cols + [geom_col]] + + try: + logger.info("Running land-use prediction on 'after_blocks'") + + ab = after_blocks[ + after_blocks.geometry.notna() & ~after_blocks.geometry.is_empty + ].copy() + ab.geometry = ab.geometry.buffer(0) + + try: + utm_crs = ab.estimate_utm_crs() + ab = ab.to_crs(utm_crs) + except Exception: + ab = ab.to_crs("EPSG:3857") + + clf = SpatialClassifier.default() + lu = clf.run(ab) + + lu = lu.drop(columns=["category"], errors="ignore") + + keep_cols = ["pred_name", "prob_urban", "prob_non_urban", "prob_industrial"] + for c in keep_cols: + if c not in lu.columns: + lu[c] = np.nan + lu = lu[keep_cols] + + lu = _ensure_block_index(lu) + gdf_out = _ensure_block_index(gdf_out) + gdf_out = gdf_out.join(lu, how="left") + + logger.info( + "Attached land-use predictions to gdf_out (cols: {})", keep_cols + ) + + if "pred_name" in gdf_out.columns: + gdf_out["Предсказанный вид использования"] = ( + gdf_out["pred_name"] + .str.lower() + .map(PRED_VALUE_RU) + .fillna(gdf_out["pred_name"]) + ) + gdf_out = gdf_out.drop(columns=["pred_name"]) + + prob_cols = [ + c + for c in ["prob_urban", "prob_non_urban", "prob_industrial"] + if c in gdf_out.columns + ] + for col in prob_cols: + gdf_out[col] = gdf_out[col].astype(float).round(1) + + rename_map = { + k: v for k, v in PROB_COLS_EN_TO_RU.items() if k in gdf_out.columns + } + gdf_out = gdf_out.rename(columns=rename_map) + + except Exception as e: + raise http_exception(500, "Failed to attach land-use predictions: {}", e) + + gdf_out = gdf_out.to_crs("EPSG:4326") + gdf_out.geometry = round_coords(gdf_out.geometry, 6) + + service_types = await self.urban_api_client.get_service_types() + try: + en2ru = await build_en_to_ru_map(service_types) + rename_map = {k: v for k, v in en2ru.items() if k in gdf_out.columns} + if rename_map: + gdf_out = gdf_out.rename(columns=rename_map) + + geom_col = gdf_out.geometry.name + non_geom = [c for c in gdf_out.columns if c != geom_col] + + pin_first = [ + c + for c in ["is_project", "Предсказанный вид использования"] + if c in non_geom + ] + + rest = [c for c in non_geom if c not in pin_first] + rest_sorted = sorted(rest, key=lambda s: s.casefold()) + + gdf_out = gdf_out[pin_first + rest_sorted + [geom_col]] + + geojson = json.loads(gdf_out.to_json()) + except Exception as e: + logger.exception("Failed to attach land-use predictions to gdf_out") + raise http_exception(500, "Failed to attach land-use predictions", e) + + self.cache.save( + "values_transformation", + params.scenario_id, + params_for_hash, + geojson, + scenario_updated_at=updated_at, + ) + + logger.info("Values transformed complete (with land-use predictions)") + return geojson + except Exception: + EFFECTS_VALUES_TRANSFORMATION_ERROR_TOTAL.inc() + raise + finally: + EFFECTS_VALUES_TRANSFORMATION_DURATION_SECONDS.observe(time.perf_counter() - start_time) + + def _get_value_level(self, provisions: list[float | None]) -> float: + vals = [p for p in provisions if p is not None] + return float(np.mean(vals)) if vals else np.nan + + async def values_oriented_requirements( + self, + token: str, + params: TerritoryTransformationDTO | DevelopmentDTO, + persist: Literal["full", "table_only"] = "full", + ): + EFFECTS_VALUES_ORIENTED_REQUIREMENTS_TOTAL.inc() + start_time = time.perf_counter() + try: + method_name = "values_oriented_requirements" + + force: bool = bool(getattr(params, "force", False)) + + base_id = await self.effects_utils.resolve_base_id(token, params.scenario_id) + logger.info( + f"Using base scenario_id={base_id} (requested={params.scenario_id})" + ) + + params_base = params.model_copy( + update={ + "scenario_id": base_id, + "proj_func_zone_source": None, + "proj_func_source_year": None, + "context_func_zone_source": None, + "context_func_source_year": None, + } + ) + params_base = await self.get_optimal_func_zone_data(params_base, token) + + params_for_hash_base = await self.build_hash_params(params_base, token) + phash_base = self.cache.params_hash(params_for_hash_base) + info_base = await self.urban_api_client.get_scenario_info(base_id, token) + updated_at_base = info_base["updated_at"] + + def _result_to_df(payload: Any) -> pd.DataFrame: + if isinstance(payload, dict) and "data" not in payload: + items = sorted( + ((int(k), v.get("value", 0.0)) for k, v in payload.items()), + key=lambda t: t[0], + ) + idx = [k for k, _ in items] + vals = [float(v) if v is not None else 0.0 for _, v in items] + return pd.DataFrame({"social_value_level": vals}, index=idx) + df = pd.DataFrame( + data=payload["data"], index=payload["index"], columns=payload["columns"] + ) + df.index.name = payload.get("index_name", None) + return df + + if not force: + cached_base = self.cache.load(method_name, base_id, phash_base) + if ( + cached_base + and cached_base["meta"].get("scenario_updated_at") == updated_at_base + and "result" in cached_base["data"] + ): + return _result_to_df(cached_base["data"]["result"]) + + context_blocks, _ = await self.context.aggregate_blocks_layer_context( + params.scenario_id, + params_base.context_func_zone_source, + params_base.context_func_source_year, + token, + ) + + scenario_blocks, _ = await self.scenario.aggregate_blocks_layer_scenario( + params_base.scenario_id, + params_base.proj_func_zone_source, + params_base.proj_func_source_year, + token, + ) + scenario_blocks = scenario_blocks.to_crs(context_blocks.crs) + + cap_cols = [c for c in scenario_blocks.columns if c.startswith("capacity_")] + scenario_blocks.loc[ + scenario_blocks["is_project"], ["population"] + cap_cols + ] = 0 + if "capacity" in scenario_blocks.columns: + scenario_blocks = scenario_blocks.drop(columns="capacity") + + blocks = gpd.GeoDataFrame( + pd.concat([context_blocks, scenario_blocks], ignore_index=True), + crs=context_blocks.crs, + ) + + service_types = await self.urban_api_client.get_service_types() + service_types = await adapt_service_types(service_types, self.urban_api_client) + service_types = service_types[~service_types["social_values"].isna()].copy() + + try: + acc_mx = get_accessibility_matrix(blocks) + except Exception as e: + logger.exception("Accessibility matrix calculation failed") + raise http_exception( + 500, "Accessibility matrix calculation failed", _detail=str(e) + ) + + prov_gdfs: Dict[str, gpd.GeoDataFrame] = {} + for st_id in service_types.index: + st_name = service_types.loc[st_id, "name"] + prov_gdf = await self._assess_provision(blocks, acc_mx, st_name) + prov_gdf = prov_gdf.to_crs(4326).drop( + columns="provision_weak", errors="ignore" + ) + num_cols = prov_gdf.select_dtypes(include="number").columns + prov_gdf[num_cols] = prov_gdf[num_cols].fillna(0) + prov_gdfs[st_name] = prov_gdf + + social_values_provisions: Dict[str, list[float | None]] = {} + for st_id in service_types.index: + st_name = service_types.loc[st_id, "name"] + social_values = service_types.loc[st_id, "social_values"] + prov_gdf = prov_gdfs.get(st_name) + if prov_gdf is None or prov_gdf.empty: + continue + prov_total = ( + None + if prov_gdf["demand"].sum() == 0 + else float(provision_strong_total(prov_gdf)) + ) + for sv in social_values: + social_values_provisions.setdefault(sv, []).append(prov_total) + + soc_values_map = await self.urban_api_client.get_social_values_info() + index = list(social_values_provisions.keys()) + result_df = pd.DataFrame( + data=[self._get_value_level(social_values_provisions[sv]) for sv in index], + index=index, + columns=["social_value_level"], + ) + values_table = { + int(sv_id): { + "name": soc_values_map.get(sv_id, str(sv_id)), + "value": round(float(val), 2) if val else 0.0, + } + for sv_id, val in result_df["social_value_level"].to_dict().items() + } + + raw_services_df = await self.urban_api_client.get_service_types() + en2ru = await build_en_to_ru_map(raw_services_df) + + demand_left_col = "demand_left" + social_values_table: list[dict] = [] + + for st_id in service_types.index: + st_en = service_types.loc[st_id, "name"] + st_ru = en2ru.get(st_en, st_en) + + linked_ids = list( + map(int, (service_types.loc[st_id, "social_values"] or [])) + ) + linked_ru = [soc_values_map.get(sv_id, str(sv_id)) for sv_id in linked_ids] + + gdf = prov_gdfs.get(st_en) + total_unsatisfied = 0.0 + if gdf is not None and not gdf.empty: + if demand_left_col not in gdf.columns: + raise RuntimeError( + f"Колонка '{demand_left_col}' отсутствует для сервиса '{st_en}'" + ) + total_unsatisfied = float(gdf[demand_left_col].sum()) + + social_values_table.append( + { + "service": st_ru, + "unsatisfied_demand_sum": round(total_unsatisfied, 2), + "social_values": linked_ru, + } + ) + + if persist == "full": + payload = { + "provision": { + name: await gdf_to_ru_fc_rounded(gdf, ndigits=6) + for name, gdf in prov_gdfs.items() + }, + "result": values_table, + "social_values_table": social_values_table, + "services_type_deficit": social_values_table, + } + else: + payload = { + "result": values_table, + "social_values_table": social_values_table, + "services_type_deficit": social_values_table, + } + + self.cache.save( + method_name, + base_id, + params_for_hash_base, + payload, + scenario_updated_at=updated_at_base, + ) + + return result_df + except Exception: + EFFECTS_VALUES_ORIENTED_REQUIREMENTS_ERROR_TOTAL.inc() + raise + finally: + EFFECTS_VALUES_ORIENTED_REQUIREMENTS_DURATION_SECONDS.observe(time.perf_counter() - start_time) + + def _clean_number(self, v): + """ + Normalize numeric-like values to built-in Python types. + + Converts numpy numeric types (e.g. np.int64, np.float32) to plain `int` or `float`, + safely handling `None`, `NaN`, and infinite values. + + Returns: + int | float | Any | None: + - int or float for finite numeric inputs + - None for NaN, None, or ±inf + - unchanged value for non-numeric inputs + """ + if v is None or (isinstance(v, float) and np.isnan(v)): + return None + try: + if isinstance(v, (np.floating, float, np.integer, int)) and not np.isfinite( + float(v) + ): + return None + except Exception: + pass + if isinstance(v, np.integer): + return int(v) + if isinstance(v, np.floating): + return float(v) + return v + + def _format_indicator_label(self, ind_info: dict[str, Any]) -> str: + """Build display label: 'name_full (unit)' if measurement_unit exists.""" + name = (ind_info.get("name_full") or "").strip() + if not name: + name = (ind_info.get("name_short") or "").strip() + + mu = ind_info.get("measurement_unit") or {} + unit = (mu.get("name") or "").strip() + + return f"{name} ({unit})" if unit else name + + async def _load_indicator_name_cache(self) -> dict[int, str]: + """Load indicator_id -> formatted label mapping once, based on INDICATORS_MAPPING.""" + if self._indicator_name_cache: + return self._indicator_name_cache + + async with self._indicator_name_cache_lock: + if self._indicator_name_cache: + return self._indicator_name_cache + + indicator_ids: set[int] = set() + for v in INDICATORS_MAPPING.values(): + if v is None: + continue + try: + indicator_ids.add(int(v)) + except (TypeError, ValueError): + logger.warning("Skipping invalid indicator id in INDICATORS_MAPPING: %r", v) + + logger.info(f"Preloading indicator names for {len(indicator_ids)} indicators") + + id_to_name: dict[int, str] = {} + for ind_id in sorted(indicator_ids): + try: + ind_info = await self.urban_api_client.get_indicator_info(ind_id) + id_to_name[ind_id] = self._format_indicator_label(ind_info) + except Exception as exc: + logger.warning(f"Failed to fetch indicator info for id={ind_id}: {exc}") + + self._indicator_name_cache = id_to_name + logger.info(f"Indicator name cache loaded: {len(self._indicator_name_cache)} entries") + return self._indicator_name_cache + + async def _load_urbanomy_indicator_name_cache(self) -> dict[int, str]: + """Load Urbanomy indicator_id -> formatted label mapping once.""" + async with self._urbanomy_indicator_name_cache_lock: + if self._urbanomy_indicator_name_cache: + return self._urbanomy_indicator_name_cache + + indicator_ids = {int(v) for v in URBANOMY_INDICATORS_MAPPING.values() if v is not None} + logger.info(f"Preloading Urbanomy indicator names for {len(indicator_ids)} indicators") + + id_to_name: dict[int, str] = {} + for ind_id in sorted(indicator_ids): + try: + ind_info = await self.urban_api_client.get_indicator_info(ind_id) + id_to_name[ind_id] = self._format_indicator_label(ind_info) + except Exception as exc: + logger.warning(f"Failed to fetch Urbanomy indicator info for id={ind_id}: {exc}") + + self._urbanomy_indicator_name_cache = id_to_name + logger.info( + f"Urbanomy indicator name cache loaded: {len(self._urbanomy_indicator_name_cache)} entries" + ) + return self._urbanomy_indicator_name_cache + + async def _attach_urbanomy_indicator_names(self, df: pd.DataFrame) -> pd.DataFrame: + """Attach Urbanomy indicator full names based on numeric indicator_id.""" + if df.empty or "indicator_id" not in df.columns: + logger.warning("Urbanomy df is empty or has no 'indicator_id' column") + return df + + df = df.copy() + id_to_name = await self._load_urbanomy_indicator_name_cache() + if not id_to_name: + logger.warning("Urbanomy indicator name cache is empty, leaving df as is") + return df + + def _map_name(v: Any) -> str | None: + if pd.isna(v): + return None + try: + return id_to_name.get(int(v)) + except (TypeError, ValueError): + return None + + df["indicator_name"] = df["indicator_id"].astype("float64").map(_map_name) + before = len(df) + df = df[df["indicator_name"].notna()].copy() + logger.info( + f"Attached Urbanomy indicator names for {len(df)} rows (filtered out {before - len(df)} rows without names)" + ) + return df + + async def _attach_indicator_names( + self, + df: pd.DataFrame, + ) -> pd.DataFrame: + """Attach indicator full names based on numeric indicator_id. + + Expects column 'indicator_id' with numeric IDs. + """ + if df.empty or "indicator_id" not in df.columns: + logger.warning("DataFrame is empty or has no 'indicator_id' column") + return df + + df = df.copy() + + id_to_name = await self._load_indicator_name_cache() + if not id_to_name: + logger.warning("Indicator name cache is empty, leaving dataframe as is") + return df + + def _map_name(v: Any) -> str | None: + if pd.isna(v): + return None + try: + return id_to_name.get(int(v)) + except (TypeError, ValueError): + return None + + df["indicator_name"] = ( + df["indicator_id"] + .astype("float64") + .map(_map_name) + ) + + before = len(df) + df = df[df["indicator_name"].notna()].copy() + logger.info( + f"Attached indicator names for {len(df)} rows (filtered out {before - len(df)} rows without names)" + ) + + return df + + async def _get_land_price_model(self) -> CatBoostRegressor: + """Load CatBoost model once and reuse it.""" + async with self._land_price_model_lock: + if self._land_price_model is not None: + return self._land_price_model + + path = Path(self._catboost_model_path) + if not path.exists(): + raise FileNotFoundError(f"CatBoost model not found at: {path}") + + model = CatBoostRegressor() + await asyncio.to_thread(model.load_model, str(path)) + + self._land_price_model = model + logger.info("CatBoost land price model loaded") + return model + + async def _fetch_land_use_potentials(self, scenario_id: int, token: str) -> pd.DataFrame: + scenario_indicators = await self.urban_api_client.get_indicator_scenario_value(scenario_id, token) + + indicator_attributes = { + (item.get("indicator") or {}).get("name_full"): item.get("value") + for item in scenario_indicators + } + + records: list[dict[str, object]] = [] + for indicator_name, land_use in URBANOMY_LAND_USE_RULES.items(): + potential = indicator_attributes.get(indicator_name) + if potential is None: + continue + records.append({"land_use": land_use, "potential": potential}) + + return pd.DataFrame(records).reset_index(drop=True) + + async def _compute_for_single_scenario( + self, + scenario_id: int, + context_blocks: gpd.GeoDataFrame, + context_territories_gdf: gpd.GeoDataFrame, + service_types_df: pd.DataFrame, + proj_src: str, + proj_year: int, + token: str, + only_parent_ids: set[int] | None = None, + ) -> list[dict]: + """ + Compute indicators for ONE scenario with shared context. + Returns JSON-serializable list of records: [{territory_id, indicator_id, value}, ...] + """ + logger.info(f"Computing indicators for scenario_id={scenario_id}") + + scenario_blocks, _ = await self.scenario.aggregate_blocks_layer_scenario( + scenario_id, proj_src, proj_year, token + ) + before_blocks = pd.concat([context_blocks, scenario_blocks], ignore_index=True) + + svc_cols = [ + c for c in before_blocks.columns if c.startswith(("count_", "capacity_")) + ] + if svc_cols: + before_blocks[svc_cols] = ( + before_blocks[svc_cols] + .apply(pd.to_numeric, errors="coerce") + .fillna(0) + .astype("int64") + ) + + context_territories_gdf = context_territories_gdf.to_crs(before_blocks.crs) + try: + assigned = assign_objects( + before_blocks, + context_territories_gdf.rename(columns={"parent": "name"}), + ) + except Exception as e: + logger.exception("Error assigning objects") + raise http_exception(500, "Error assigning objects", _detail=str(e)) + before_blocks["parent"] = assigned["name"].astype(int) + + if only_parent_ids: + before_blocks = before_blocks[ + before_blocks["parent"].isin(only_parent_ids) + ].copy() + + before_blocks = generate_blocksnet_columns(before_blocks, service_types_df) + before_blocks = ensure_missing_id_and_name_columns(before_blocks) + if "population" in before_blocks.columns: + s = pd.to_numeric(before_blocks["population"], errors="coerce").fillna(0) + if pd.api.types.is_float_dtype(s): + s = s.round() + before_blocks["population"] = s.astype("int64") + else: + before_blocks["population"] = 0 + + roads_gdf = await self.urban_api_client.get_physical_objects_scenario( + scenario_id, token=token, physical_object_function_id=ROADS_ID + ) + if roads_gdf is not None and not roads_gdf.empty: + roads_gdf = roads_gdf.to_crs(before_blocks.crs).overlay(before_blocks) + else: + roads_gdf = gpd.GeoDataFrame(geometry=[], crs=before_blocks.crs) + + try: + acc_mx = get_accessibility_matrix(before_blocks) + except Exception as e: + logger.exception("Accessibility matrix calculation failed") + raise http_exception( + 500, "Accessibility matrix calculation failed", _detail=str(e) + ) + dist_mx = calculate_distance_matrix(before_blocks) + + st_for_social = service_types_df[ + service_types_df["infrastructure_type"].notna() + & service_types_df["blocksnet"].notna() + ].copy() + + general = calculate_general_indicators(before_blocks) + demo = calculate_demographic_indicators(before_blocks) + eng = calculate_engineering_indicators(before_blocks) + sc, sp = calculate_social_indicators( + before_blocks, acc_mx, dist_mx, st_for_social + ) + + frames = [general, demo, eng, sc, sp] + + has_roads = ( + roads_gdf is not None + and not roads_gdf.empty + and len(roads_gdf) > 1 + ) + + if has_roads: + try: + transp = calculate_transport_indicators( + before_blocks, acc_mx, roads_gdf + ) + frames.append(transp) + except Exception as exc: + logger.warning( + "Transport indicators skipped: %s", exc + ) + else: + logger.info( + "Transport indicators skipped: roads_gdf is empty or insufficient" + ) + + indicators_df = pd.concat(frames) + + long_df = ( + indicators_df.reset_index() + .rename(columns={"index": "indicator"}) + .melt(id_vars=["indicator"], var_name="territory_id", value_name="value") + ) + long_df = long_df[long_df["territory_id"] != "total"].copy() + long_df["indicator_id"] = long_df["indicator"].map(INDICATORS_MAPPING) + + long_df["territory_id"] = pd.to_numeric( + long_df["territory_id"], errors="coerce" + ).apply(self._clean_number) + long_df["indicator_id"] = long_df["indicator_id"].apply(self._clean_number) + long_df["value"] = long_df["value"].apply(self._clean_number) + long_df["value"] = long_df["value"].round(2) + long_df = long_df[ + long_df["indicator_id"].notna() & long_df["territory_id"].notna() + ].fillna(0) + + long_df = await self._attach_indicator_names(long_df) + + territory_id_hint: int | None = None + if "is_project" in before_blocks.columns: + proj_mask = ( + before_blocks["is_project"] + .infer_objects(copy=False) + .fillna(False) + .astype(bool) + ) + territory_id_hint = self._pick_single_territory_id(before_blocks.loc[proj_mask, "parent"]) + + urbanomy_records: list[dict] = [] + try: + if territory_id_hint is not None: + urbanomy_records = await self._compute_urbanomy_for_single_scenario( + scenario_id=scenario_id, + scenario_blocks=scenario_blocks, + context_blocks=context_blocks, + context_territories_gdf=context_territories_gdf, + token=token, + only_parent_ids=only_parent_ids, + territory_id_hint=territory_id_hint, + ) + except Exception as exc: + logger.warning(f"Urbanomy failed for scenario={scenario_id}: {exc}") + + records = long_df[["territory_id", "indicator_name", "value"]].to_dict(orient="records") + + if urbanomy_records: + for r in urbanomy_records: + records.append( + { + "territory_id": self._clean_number(r.get("territory_id")), + "indicator_name": r.get("indicator_name"), + "value": self._clean_number(r.get("value")), + } + ) + + return records + + def _json_safe_number(self, v: Any) -> float | int | None: + """Convert any numeric-like value to a JSON-safe primitive (no NaN/Inf). + + Supports strings with thousand separators like '12 438 136 946' or '2\u00A0339\u00A0984'. + """ + if v is None: + return None + + if isinstance(v, np.generic): + v = v.item() + + if isinstance(v, bool): + return int(v) + + if isinstance(v, int): + return v + + if isinstance(v, float): + return v if math.isfinite(v) else None + + if isinstance(v, str): + s = v.strip() + if not s: + return None + + s = re.compile(r"[\s\u00A0\u202F]").sub("", s) + s = s.replace(",", ".") + s = re.sub(r"[^0-9\.\-]+", "", s) + + if s in {"", "-", ".", "-."}: + return None + + try: + f = float(s) + except ValueError: + return None + + return f if math.isfinite(f) else None + + try: + f = float(v) + except (TypeError, ValueError): + return None + + return f if math.isfinite(f) else None + + async def _pivot_results_by_territory( + self, + results: dict[int, list[dict]], + ) -> dict[int, dict[str, dict[int, float]]]: + """ + Transform scenario-first results to territory-first pivot. + + Input: + results: { + scenario_id: [ + {"territory_id": int, "indicator_name": str, "value": number}, + ... + ], + ... + } + + Output: + { + territory_id: { + indicator_name: { + scenario_id: value | None, + ... + }, + ... + }, + ... + } + """ + pivot: dict[int, dict[str, dict[int, float]]] = {} + + for scenario_id, records in results.items(): + + for rec in records: + if not isinstance(rec, dict): + logger.warning( + f"[Effects] Skip non-dict record in scenario {scenario_id}: {rec}" + ) + continue + + try: + t_id = int(rec["territory_id"]) + ind_name = str(rec["indicator_name"]) + except (KeyError, TypeError, ValueError) as exc: + logger.warning( + f"[Effects] Skip record without proper territory/indicator " + f"in scenario {scenario_id}: {rec} ({exc})" + ) + continue + + val_raw = rec.get("value") + val = self._json_safe_number(val_raw) + if val_raw is not None and val is None: + logger.warning( + f"[Effects] Failed to parse value for scenario {scenario_id}, " + f"territory {t_id}, indicator '{ind_name}': {val_raw}" + ) + val = None + + terr_dict = pivot.setdefault(t_id, {}) + ind_dict = terr_dict.setdefault(ind_name, {}) + ind_dict[int(scenario_id)] = val + + logger.info(f"[Effects] Pivoted to nested format (names): {len(pivot)} territories.") + + all_scenario_ids = list(results.keys()) + if all_scenario_ids: + logger.info( + f"[Effects] Normalizing scenario coverage for {len(all_scenario_ids)} scenarios" + ) + for t_id, terr_dict in pivot.items(): + for ind_name, scenario_dict in terr_dict.items(): + for sid in all_scenario_ids: + scenario_dict.setdefault(int(sid), None) + + return pivot + + def _sanitize_for_json(self, obj: Any) -> Any: + """Recursively replace NaN/Inf and numpy types with JSON-safe values.""" + if isinstance(obj, dict): + return {k: self._sanitize_for_json(v) for k, v in obj.items()} + if isinstance(obj, list): + return [self._sanitize_for_json(v) for v in obj] + if isinstance(obj, tuple): + return [self._sanitize_for_json(v) for v in obj] + + if isinstance(obj, np.generic): + return self._sanitize_for_json(obj.item()) + + if isinstance(obj, float): + return obj if math.isfinite(obj) else None + + return obj + + def _pick_single_territory_id(self, parents: pd.Series) -> int | None: + """Pick a single territory_id from assigned parents; prefer mode if multiple.""" + s = pd.to_numeric(parents, errors="coerce").dropna().astype("int64") + if s.empty: + return None + uniq = s.unique() + if len(uniq) == 1: + return int(uniq[0]) + + mode = int(s.mode().iat[0]) + logger.warning( + f"Multiple territory_ids detected for scenario project blocks {sorted(map(int, uniq))}. Using mode={mode}", + ) + return mode + + def _urbanomy_se_result_to_indicator_values(self, result: Any) -> pd.DataFrame: + """Normalize SEREstimator output to dataframe with columns: indicator, value.""" + if isinstance(result, pd.DataFrame): + df = result.copy() + if "delta_total" in df.columns and "value" not in df.columns: + df = df.rename(columns={"delta_total": "value"}) + if {"indicator", "value"}.issubset(df.columns): + return df[["indicator", "value"]].copy() + + raise ValueError(f"Unsupported Urbanomy result dataframe columns: {list(df.columns)}") + + if isinstance(result, dict): + return pd.DataFrame([{"indicator": str(k), "value": v} for k, v in result.items()]) + + if isinstance(result, pd.Series): + out = result.reset_index() + out.columns = ["indicator", "value"] + return out + + raise TypeError(f"Unsupported SEREstimator result type: {type(result)!r}") + + async def _compute_urbanomy_for_single_scenario( + self, + scenario_id: int, + scenario_blocks: gpd.GeoDataFrame, + context_blocks: gpd.GeoDataFrame, + context_territories_gdf: gpd.GeoDataFrame, + token: str, + only_parent_ids: set[int] | None = None, + territory_id_hint: int | None = None, + ) -> list[dict]: + """Compute Urbanomy metrics for one scenario and return records: + [{territory_id, indicator_id, indicator_name, value}, ...] + """ + s_cols = [c for c in URBANOMY_BLOCK_COLS if c in scenario_blocks.columns] + c_cols = [c for c in URBANOMY_BLOCK_COLS if c in context_blocks.columns] + if "geometry" not in s_cols or "geometry" not in c_cols: + logger.warning("Urbanomy skipped: geometry column missing") + return [] + + scenario_blocks_cut = scenario_blocks[s_cols].copy() + context_blocks_cut = context_blocks[c_cols].copy() + + preparator = LandDataPreparator( + scenario_blocks_source=scenario_blocks_cut, + context_blocks_source=context_blocks_cut, + ) + prepared = await asyncio.to_thread(preparator.prepare) + + model = await self._get_land_price_model() + estimator = LandPriceEstimator(model=model, blocks=prepared) + blocks_with_land_value = await asyncio.to_thread(estimator.predict) + + if "is_project" in blocks_with_land_value.columns: + project_blocks = blocks_with_land_value.loc[blocks_with_land_value["is_project"] == True].copy() + else: + logger.warning("Urbanomy: 'is_project' column not found; using all blocks") + project_blocks = blocks_with_land_value.copy() + + if project_blocks.empty: + logger.info(f"Urbanomy: no project blocks for scenario={scenario_id}") + return [] + + territory_id: int | None = None + + if territory_id_hint is not None: + territory_id = int(territory_id_hint) + if only_parent_ids and territory_id not in only_parent_ids: + logger.info( + f"Urbanomy: territory_id={territory_id} not in only_parent_ids, skipping scenario={scenario_id}") + return [] + else: + territories = context_territories_gdf.to_crs(project_blocks.crs) + assigned = assign_objects(project_blocks, territories.rename(columns={"parent": "name"})) + project_blocks["parent"] = pd.to_numeric(assigned["name"], errors="coerce") + + if only_parent_ids: + project_blocks = project_blocks[project_blocks["parent"].isin(only_parent_ids)].copy() + + territory_id = self._pick_single_territory_id(project_blocks["parent"]) + if territory_id is None: + logger.warning(f"Urbanomy: failed to detect territory_id for scenario={scenario_id}") + return [] + + project_blocks = project_blocks[project_blocks["parent"] == territory_id].copy() + if project_blocks.empty: + return [] + + potential_df = await self._fetch_land_use_potentials(scenario_id=scenario_id, token=token) + + investment_input = prepare_investment_input(gdf=project_blocks, project_potential=potential_df) + + analyzer = InvestmentAttractivenessAnalyzer(benchmarks=benchmarks_demo) + summary = analyzer.calculate_investment_metrics(investment_input, discount_rate=0.18) + scn = project_blocks[["geometry"]].join(summary) + + total_pop = 0 + if "population" in project_blocks.columns: + total_pop = int(pd.to_numeric(project_blocks["population"], errors="coerce").fillna(0).sum()) + + est = SEREstimator({"population": max(total_pop, 0) or 300_000}) + result = est.compute(scn, pretty=True) + + df = self._urbanomy_se_result_to_indicator_values(result) + df["indicator_id"] = df["indicator"].map(URBANOMY_INDICATORS_MAPPING) + df = df[df["indicator_id"].notna()].copy() + df["indicator_id"] = df["indicator_id"].astype("int64") + + df = df[df["value"].notna()].copy() + + df["territory_id"] = int(territory_id) + df = df.rename(columns={"indicator": "indicator_name"}) + + df = await self._attach_urbanomy_indicator_names(df) + + return df[["territory_id", "indicator_id", "indicator_name", "value"]].to_dict(orient="records") + + def _pivot_urbanomy_by_territory_and_indicator( + self, + results: dict[int, list[dict]], + ) -> dict[int, dict[int, dict[int, float | None]]]: + """Pivot scenario-first records to territory->indicator_id->scenario_id.""" + pivot: dict[int, dict[int, dict[int, float | None]]] = {} + + for scenario_id, records in results.items(): + for rec in records: + try: + t_id = int(rec["territory_id"]) + ind_id = int(rec["indicator_id"]) + except Exception: + continue + + + terr = pivot.setdefault(t_id, {}) + ind = terr.setdefault(ind_id, {}) + ind[int(scenario_id)] = rec.get("value") + + sids = [int(s) for s in results.keys()] + for t_id, terr in pivot.items(): + for ind_id, scn_map in terr.items(): + for sid in sids: + scn_map.setdefault(sid, None) + + return pivot + + def _filter_by_territories(self, results: dict, territory_ids: set[int] | None) -> dict: + """Filter cached results by territory ids if provided.""" + if not territory_ids: + return results + return {tid: results[tid] for tid in territory_ids if tid in results} + + async def evaluate_social_economical_metrics(self, token: str, params: SocioEconomicByProjectDTO): + """ + Project-level multi-scenario calculation with a shared context. + Return: {territory_id: {indicator_name: {scenario_id: value}}} + """ + EFFECTS_SOCIO_ECONOMICAL_METRICS_TOTAL.inc() + start_time = time.perf_counter() + try: + project_id = params.project_id + parent_id = params.regional_scenario_id + method_name = "social_economical_metrics" + + requested_ids = {int(x) for x in getattr(params, "territory_ids", [])} or None + + params_for_hash = { + "project_id": project_id, + "regional_scenario_id": parent_id, + } + + if not params.force: + phash = self.cache.params_hash(params_for_hash) + cached = self.cache.load(method_name, project_id, phash) + if cached: + logger.info(f"[Effects] cache hit for project {project_id}, parent={parent_id}") + data = cached.get("data", cached) + results_all = self._sanitize_for_json(data["results"]) + return self._filter_by_territories(results_all, requested_ids) + else: + logger.info(f"[Effects] force=True, recalculating metrics for project {project_id}, parent={parent_id}") + + context_blocks, context_territories_gdf, service_types = await self.context.get_shared_context(project_id, + token) + + scenarios = await self.urban_api_client.get_project_scenarios(project_id, token) + target = [s for s in scenarios if (s.get("parent_scenario") or {}).get("id") == parent_id] + logger.info(f"[Effects] matched {len(target)} scenarios in project {project_id} (parent={parent_id})") + + results: dict[int, list[dict]] = {} + + only_parent_ids = None + + for s in target: + sid = int(s["scenario_id"]) + try: + proj_src, proj_year = await self.urban_api_client.get_optimal_func_zone_request_data( + token=token, + data_id=sid, + source=None, + year=None, + project=True, + ) + + records = await self._compute_for_single_scenario( + sid, + context_blocks=context_blocks, + context_territories_gdf=context_territories_gdf, + service_types_df=service_types, + proj_src=proj_src, + proj_year=proj_year, + token=token, + only_parent_ids=only_parent_ids, + ) + results[sid] = records + except Exception: + logger.error(f"[Effects] Scenario {sid} failed during socio-economic computation") + results[sid] = [] + + results_all = await self._pivot_results_by_territory(results) + results_all = self._sanitize_for_json(results_all) + + project_info = await self.urban_api_client.get_project(project_id, token) + updated_at = project_info.get("updated_at") + + self.cache.save( + method_name, + project_id, + params_for_hash, + {"results": results_all}, + scenario_updated_at=updated_at, + ) + + logger.success(f"[Effects] socio-economic metrics cached for project_id={project_id}, parent={parent_id}") + return self._filter_by_territories(results_all, requested_ids) + except Exception: + EFFECTS_SOCIO_ECONOMICAL_METRICS_ERROR_TOTAL.inc() + raise + finally: + EFFECTS_SOCIO_ECONOMICAL_METRICS_DURATION_SECONDS.observe(time.perf_counter() - start_time) + diff --git a/app/effects_api/modules/__init__.py b/app/effects_api/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/effects_api/modules/buildings_service.py b/app/effects_api/modules/buildings_service.py new file mode 100644 index 0000000..4ff5c44 --- /dev/null +++ b/app/effects_api/modules/buildings_service.py @@ -0,0 +1,36 @@ +import geopandas as gpd +import pandas as pd + +from app.effects_api.constants.const import BUILDINGS_RULES + + +def _parse(data: dict | None, *args): + key = args[0] + args = args[1:] + if data is not None and key in data and data[key] is not None: + if len(args) == 0: + value = data[key] + if isinstance(value, str): + value = value.replace(",", ".") + return value + return _parse(data[key], *args) + return None + + +def _adapt(data: dict, rules: list): + for rule in rules: + value = _parse(data, *rule) + if value is not None: + return value + return None + + +def adapt_buildings(buildings_gdf: gpd.GeoDataFrame): + gdf = buildings_gdf[["geometry"]].copy() + gdf["is_living"] = buildings_gdf["physical_object_type"].apply( + lambda pot: pot["physical_object_type_id"] == 4 + ) + for column, rules in BUILDINGS_RULES.items(): + series = buildings_gdf["building"].apply(lambda b: _adapt(b, rules)) + gdf[column] = pd.to_numeric(series, errors="coerce") + return gdf diff --git a/app/effects_api/modules/context_service.py b/app/effects_api/modules/context_service.py new file mode 100644 index 0000000..10d2922 --- /dev/null +++ b/app/effects_api/modules/context_service.py @@ -0,0 +1,428 @@ +import asyncio +from pathlib import Path +from typing import Dict, Tuple + +import geopandas as gpd +import pandas as pd +from blocksnet.blocks.aggregation import aggregate_objects +from blocksnet.blocks.assignment import assign_land_use +from blocksnet.blocks.cutting import cut_urban_blocks, preprocess_urban_objects +from blocksnet.preprocessing.imputing import impute_buildings, impute_services +from blocksnet.relations import get_accessibility_context +from loguru import logger + +from app.clients.urban_api_client import UrbanAPIClient +from app.common.caching.caching_service import FileCache +from app.common.utils.geodata import get_best_functional_zones_source +from app.effects_api.constants.const import ( + LAND_USE_RULES, + LIVING_BUILDINGS_ID, + ROADS_ID, + SOCIAL_INDICATORS_MAPPING, + WATER_ID, +) +from app.effects_api.modules.buildings_service import adapt_buildings +from app.effects_api.modules.functional_sources_service import adapt_functional_zones +from app.effects_api.modules.scenario_service import close_gaps +from app.effects_api.modules.service_type_service import ( + adapt_service_types, + adapt_social_service_types_df, +) +from app.effects_api.modules.services_service import adapt_services + + +class ContextService: + """Context layer orchestration (blocks, buildings, services, fzones).""" + + def __init__(self, urban_api_client: UrbanAPIClient, cache: FileCache): + self.client = urban_api_client + self.cache = cache + + async def _get_project_boundaries( + self, project_id: int, token: str + ) -> gpd.GeoDataFrame: + """Return project boundary polygon as GeoDataFrame (EPSG:4326).""" + geom = await self.client.get_project_geometry(project_id, token=token) + return gpd.GeoDataFrame(geometry=[geom], crs=4326) + + async def _get_context_boundaries( + self, project_id: int, token: str + ) -> gpd.GeoDataFrame: + """Return union of context territories as GeoDataFrame (EPSG:4326).""" + project = await self.client.get_project(project_id, token) + context_ids = project["properties"]["context"] + geometries = [ + await self.client.get_territory_geometry(tid) for tid in context_ids + ] + return gpd.GeoDataFrame(geometry=geometries, crs=4326) + + async def _get_context_roads( + self, scenario_id: int, token: str + ) -> gpd.GeoDataFrame | None: + """Return roads geometry for context cut (only geometry column).""" + gdf = await self.client.get_physical_objects( + scenario_id, token, physical_object_function_id=ROADS_ID + ) + if gdf is None: + return None + return gdf[["geometry"]].reset_index(drop=True) + + async def _get_context_water( + self, scenario_id: int, token: str + ) -> gpd.GeoDataFrame | None: + """Return water geometry for context cut (only geometry column).""" + gdf = await self.client.get_physical_objects( + scenario_id, token=token, physical_object_function_id=WATER_ID + ) + if gdf is None: + return None + return gdf[["geometry"]].reset_index(drop=True) + + async def _get_context_blocks( + self, + scenario_id: int, + boundaries: gpd.GeoDataFrame, + token: str, + ) -> gpd.GeoDataFrame: + """Construct context blocks by cutting boundaries with roads/water.""" + crs = boundaries.crs + boundaries.geometry = boundaries.buffer(-1) + + water, roads = await asyncio.gather( + self._get_context_water(scenario_id, token), + self._get_context_roads(scenario_id, token), + ) + + if water is not None and not water.empty: + water = water.to_crs(crs).explode().reset_index(drop=True) + water_geoms = ['Polygon', 'MultiPolygon', 'LineString', 'MultiLineString'] + water = water[water.geom_type.isin(water_geoms)].reset_index(drop=True) + + if roads is not None and not roads.empty: + roads = roads.to_crs(crs).explode().reset_index(drop=True) + roads.geometry = close_gaps(roads, 1) + roads = roads.explode(column="geometry") + roads_geoms = ['LineString', 'MultiLineString'] + + roads = roads[roads.geom_type.isin(roads_geoms)].reset_index(drop=True) + else: + roads = gpd.GeoDataFrame(geometry=[], crs=boundaries.crs) + water = None + + lines, polygons = preprocess_urban_objects(roads, None, water.reset_index(drop=True)) + blocks = cut_urban_blocks(boundaries, lines, polygons) + return blocks + + async def get_context_blocks( + self, project_id: int, scenario_id: int, token: str + ) -> gpd.GeoDataFrame: + """ + Build context blocks (outside project boundary but inside context territories). + """ + project_boundaries, context_boundaries = await asyncio.gather( + self._get_project_boundaries(project_id, token), + self._get_context_boundaries(project_id, token), + ) + + crs = context_boundaries.estimate_utm_crs() + context_boundaries = context_boundaries.to_crs(crs) + project_boundaries = project_boundaries.to_crs(crs) + + context_boundaries = context_boundaries.overlay( + project_boundaries, how="difference" + ) + return await self._get_context_blocks(scenario_id, context_boundaries, token) + + async def get_context_functional_zones( + self, + scenario_id: int, + source: str | None, + year: int | None, + token: str, + ) -> gpd.GeoDataFrame: + """ + Fetch + adapt functional zones for context by best source/year if not given. + """ + sources_df = await self.client.get_functional_zones_sources(scenario_id, token) + year, source = await get_best_functional_zones_source(sources_df, source, year) + functional_zones = await self.client.get_functional_zones( + scenario_id, year, source, token + ) + functional_zones = functional_zones.loc[ + functional_zones.geometry.geom_type.isin({"Polygon", "MultiPolygon"}) + ].reset_index(drop=True) + return adapt_functional_zones(functional_zones) + + async def get_context_buildings( + self, scenario_id: int, token: str + ) -> gpd.GeoDataFrame | None: + """ + Fetch, adapt and impute living buildings for context. + Returns EPSG:4326 GeoDataFrame or None if not found. + """ + gdf = await self.client.get_physical_objects( + scenario_id, + token, + physical_object_type_id=LIVING_BUILDINGS_ID, + centers_only=True, + ) + if gdf is None or gdf.empty: + return None + + gdf = adapt_buildings(gdf.reset_index(drop=True)) + crs = gdf.estimate_utm_crs() + return impute_buildings(gdf.to_crs(crs)).to_crs(4326) + + async def get_context_services( + self, scenario_id: int, service_types: pd.DataFrame, token: str + ) -> Dict[str, gpd.GeoDataFrame]: + """ + Fetch and adapt services by service type (dict of GeoDataFrames). + """ + gdf = await self.client.get_services(scenario_id, token, centers_only=True) + gdf = gdf.to_crs(gdf.estimate_utm_crs()) + gdfs = adapt_services(gdf.reset_index(drop=True), service_types) + return {st: impute_services(gdf, st) for st, gdf in gdfs.items()} + + async def get_context_territories( + self, project_id: int, token: str + ) -> gpd.GeoDataFrame: + """ + Return context territories as polygons with column 'parent' = territory_id (EPSG:4326). + """ + project = await self.client.get_all_project_info(project_id, token) + context_ids = project["properties"]["context"] + data = [ + { + "parent": territory_id, + "geometry": await self.client.get_territory_geometry(territory_id), + } + for territory_id in context_ids + ] + return gpd.GeoDataFrame(data=data, crs=4326) + + async def load_context_blocks( + self, scenario_id: int, token: str + ) -> Tuple[gpd.GeoDataFrame, int]: + """ + Load raw context blocks and compute site_area. + """ + project_id = await self.client.get_project_id(scenario_id, token) + blocks = await self.get_context_blocks(project_id, scenario_id, token) + blocks["site_area"] = blocks.area + return blocks, project_id + + async def assign_land_use_context( + self, + blocks: gpd.GeoDataFrame, + scenario_id: int, + source: str | None, + year: int | None, + token: str, + ) -> gpd.GeoDataFrame: + """ + Assign land use to blocks via functional zones and LAND_USE_RULES. + """ + fzones = await self.get_context_functional_zones( + scenario_id, source, year, token + ) + fzones = fzones.to_crs(blocks.crs) + lu = assign_land_use(blocks, fzones, LAND_USE_RULES) + return blocks.join(lu.drop(columns=["geometry"])) + + async def enrich_with_context_buildings( + self, blocks: gpd.GeoDataFrame, scenario_id: int, token: str + ) -> Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame | None]: + """ + Aggregate living buildings on blocks (count_buildings), keep 'is_living' column. + """ + buildings = await self.get_context_buildings(scenario_id, token) + if buildings is None: + blocks["count_buildings"] = 0 + blocks["is_living"] = None + return blocks, None + + buildings = buildings.to_crs(blocks.crs) + agg, _ = aggregate_objects(blocks, buildings) + + blocks = blocks.join( + agg.drop(columns=["geometry"]).rename(columns={"count": "count_buildings"}) + ) + blocks["count_buildings"] = blocks["count_buildings"].fillna(0).astype(int) + if "is_living" not in blocks.columns: + blocks["is_living"] = None + + return blocks, buildings + + async def enrich_with_context_services( + self, blocks: gpd.GeoDataFrame, scenario_id: int, token: str + ) -> gpd.GeoDataFrame: + """ + Aggregate services on blocks: add capacity_{st} / count_{st} columns. + """ + stypes = await self.client.get_service_types() + stypes = await adapt_service_types(stypes, self.client) + + sdict = await self.get_context_services(scenario_id, stypes, token) + if not sdict: + logger.info( + f"No context services to aggregate for scenario_id={scenario_id}" + ) + return blocks + + for stype, services in sdict.items(): + services = services.to_crs(blocks.crs) + b_srv, _ = aggregate_objects(blocks, services) + b_srv[["capacity", "count"]] = ( + b_srv[["capacity", "count"]].fillna(0).astype(int) + ) + + blocks = blocks.join( + b_srv.drop(columns=["geometry"]).rename( + columns={"capacity": f"capacity_{stype}", "count": f"count_{stype}"} + ) + ) + return blocks + + async def aggregate_blocks_layer_context( + self, + scenario_id: int, + source: str | None = None, + year: int | None = None, + token: str | None = None, + ) -> Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame | None]: + """ + Build full context blocks layer: + 1) load blocks + 2) assign land use + 3) enrich with buildings + 4) enrich with services + """ + logger.info(f"[Context {scenario_id}] load blocks") + blocks, _project_id = await self.load_context_blocks(scenario_id, token) + + logger.info("Assigning land-use for context") + blocks = await self.assign_land_use_context( + blocks, scenario_id, source, year, token + ) + + logger.info("Aggregating buildings for context") + blocks, buildings = await self.enrich_with_context_buildings( + blocks, scenario_id, token + ) + + logger.info("Aggregating services for context") + blocks = await self.enrich_with_context_services(blocks, scenario_id, token) + + logger.success(f"[Context {scenario_id}] blocks layer ready", scenario_id) + return blocks, buildings + + async def get_accessibility_context( + self, blocks: pd.DataFrame, acc_mx: pd.DataFrame, accessibility: float + ) -> list[int]: + blocks["population"] = blocks["population"].fillna(0) + project_blocks = blocks.copy() + context_blocks = get_accessibility_context( + acc_mx, project_blocks, accessibility, out=False, keep=True + ) + return list(context_blocks.index) + + async def get_shared_context( + self, + project_id: int, + token: str, + ) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame, pd.DataFrame]: + """ + Get cached context (blocks, territories, service_types: artifacts) by project_id, + or build and cache if missing/corrupted. + + JSON cache stores only paths to artifacts. If any artifact file is missing, + the cache is treated as stale and context is recomputed. + """ + method = "shared_context" + params = {"project_id": int(project_id)} + phash = self.cache.params_hash(params) + + cached = self.cache.load(method, project_id, phash) + if cached: + logger.info(f"Shared context cache hit for project_id={project_id}") + data = cached["data"] + + try: + ctx_blocks = self.cache.load_gdf_artifact(Path(data["context_blocks_path"])) + ctx_territories = self.cache.load_gdf_artifact(Path(data["context_territories_path"])) + service_types = self.cache.load_df_artifact(Path(data["service_types_path"])) + return ctx_blocks, ctx_territories, service_types + + except (FileNotFoundError, OSError, KeyError) as exc: + # KeyError — если в JSON вдруг нет нужного ключа + logger.warning( + f"Shared context cache is corrupted/stale for project_id={project_id}. " + f"Rebuilding. Reason: {exc}" + ) + # optional: если у тебя есть метод точечной инвалидции: + # self.cache.invalidate(method, project_id, phash) + + logger.info(f"Shared context cache miss for project_id={project_id} — is building") + + territory_id = (await self.client.get_all_project_info(project_id, token))["territory"]["id"] + base_sid = await self.client.get_base_scenario_id(project_id, token) + ctx_src, ctx_year = await self.client.get_optimal_func_zone_request_data( + token=token, data_id=base_sid, source=None, year=None, project=False + ) + + normatives = (await self.client.get_territory_normatives(territory_id))[ + [ + "radius_availability_meters", + "time_availability_minutes", + "services_per_1000_normative", + "services_capacity_per_1000_normative", + ] + ].copy() + + service_types = await self.client.get_service_types() + service_types = await adapt_service_types(service_types, self.client) + service_types = service_types[service_types["infrastructure_type"].notna()].copy() + service_types = adapt_social_service_types_df( + service_types, SOCIAL_INDICATORS_MAPPING + ).join(normatives) + + ctx_blocks, _ = await self.aggregate_blocks_layer_context(base_sid, ctx_src, ctx_year, token) + ctx_territories = await self.get_context_territories(project_id, token) + + ctx_blocks_path = self.cache.save_gdf_artifact( + ctx_blocks, + method=method, + owner_id=project_id, + params=params, + name="context_blocks", + fmt="pkl", + ) + ctx_territories_path = self.cache.save_gdf_artifact( + ctx_territories, + method=method, + owner_id=project_id, + params=params, + name="context_territories", + fmt="parquet", + ) + service_types_path = self.cache.save_df_artifact( + service_types, + method=method, + owner_id=project_id, + params=params, + name="service_types", + fmt="parquet", + ) + + self.cache.save( + method, + project_id, + params, + { + "context_blocks_path": str(ctx_blocks_path), + "context_territories_path": str(ctx_territories_path), + "service_types_path": str(service_types_path), + }, + ) + return ctx_blocks, ctx_territories, service_types diff --git a/app/effects_api/modules/functional_sources_service.py b/app/effects_api/modules/functional_sources_service.py new file mode 100644 index 0000000..6b96a81 --- /dev/null +++ b/app/effects_api/modules/functional_sources_service.py @@ -0,0 +1,14 @@ +import geopandas as gpd + + +def _adapt_functional_zone(data: dict): + functional_zone_type_id = data["name"] + return functional_zone_type_id + + +def adapt_functional_zones(functional_zones_gdf: gpd.GeoDataFrame): + gdf = functional_zones_gdf[["geometry"]].copy() + gdf["functional_zone"] = functional_zones_gdf["functional_zone_type"].apply( + _adapt_functional_zone + ) + return gdf diff --git a/app/effects_api/modules/scenario_service.py b/app/effects_api/modules/scenario_service.py new file mode 100644 index 0000000..f982163 --- /dev/null +++ b/app/effects_api/modules/scenario_service.py @@ -0,0 +1,302 @@ +import asyncio + +import geopandas as gpd +import numpy as np +import pandas as pd +import shapely +from blocksnet.blocks.aggregation import aggregate_objects +from blocksnet.blocks.assignment import assign_land_use +from blocksnet.blocks.cutting import cut_urban_blocks, preprocess_urban_objects +from blocksnet.preprocessing.imputing import impute_buildings, impute_services +from loguru import logger + +from app.clients.urban_api_client import UrbanAPIClient +from app.common.exceptions.http_exception_wrapper import http_exception +from app.effects_api.constants.const import ( + LAND_USE_RULES, + LIVING_BUILDINGS_ID, + ROADS_ID, + WATER_ID, +) +from app.effects_api.modules.buildings_service import adapt_buildings +from app.effects_api.modules.functional_sources_service import adapt_functional_zones +from app.effects_api.modules.service_type_service import adapt_service_types +from app.effects_api.modules.services_service import adapt_services + + + +def close_gaps(gdf, tolerance): # taken from momepy + geom = gdf.geometry.array + coords = shapely.get_coordinates(geom) + indices = shapely.get_num_coordinates(geom) + + edges = [0] + i = 0 + for ind in indices: + ix = i + ind + edges.append(ix - 1) + edges.append(ix) + i = ix + edges = edges[:-1] + points = shapely.points(np.unique(coords[edges], axis=0)) + + buffered = shapely.buffer(points, tolerance / 2) + dissolved = shapely.union_all(buffered) + exploded = [ + shapely.get_geometry(dissolved, i) + for i in range(shapely.get_num_geometries(dissolved)) + ] + centroids = shapely.centroid(exploded) + snapped = shapely.snap(geom, shapely.union_all(centroids), tolerance) + return gpd.GeoSeries(snapped, crs=gdf.crs) + + +class ScenarioService: + + def __init__(self, urban_api_client: UrbanAPIClient): + self.client = urban_api_client + + async def _get_project_boundaries( + self, project_id: int, token: str + ) -> gpd.GeoDataFrame: + geom = await self.client.get_project_geometry(project_id, token) + return gpd.GeoDataFrame(geometry=[geom], crs=4326) + + async def _get_scenario_roads(self, scenario_id: int, token: str): + gdf = await self.client.get_physical_objects_scenario( + scenario_id, token, physical_object_function_id=ROADS_ID + ) + if gdf is None: + return None + return gdf[["geometry"]].reset_index(drop=True) + + async def _get_scenario_water(self, scenario_id: int, token: str): + gdf = await self.client.get_physical_objects_scenario( + scenario_id, token, physical_object_function_id=WATER_ID + ) + if gdf is None: + return None + return gdf[["geometry"]].reset_index(drop=True) + + async def _get_scenario_blocks( + self, + user_scenario_id: int, + boundaries: gpd.GeoDataFrame, + token: str, + ) -> gpd.GeoDataFrame: + crs = boundaries.crs + boundaries.geometry = boundaries.buffer(-1) + + ( + water, + user_roads, + ) = await asyncio.gather( + self._get_scenario_water(user_scenario_id, token), + self._get_scenario_roads(user_scenario_id, token), + ) + + if water is not None and not water.empty: + water = water.to_crs(crs).explode().reset_index(drop=True) + water_geoms = ['Polygon', 'MultiPolygon', 'LineString', 'MultiLineString'] + water = water[water.geom_type.isin(water_geoms)].reset_index(drop=True) + + if user_roads is not None and not user_roads.empty: + user_roads = user_roads.to_crs(crs).explode().reset_index(drop=True) + + if user_roads is not None and not user_roads.empty: + user_roads.geometry = close_gaps(user_roads, 1) + roads = user_roads.explode(column="geometry") + roads_geoms = ['LineString', 'MultiLineString'] + + roads = roads[roads.geom_type.isin(roads_geoms)].reset_index(drop=True) + + else: + roads = gpd.GeoDataFrame(geometry=[], crs=boundaries.crs) + water = None + + lines, polygons = preprocess_urban_objects(roads, None, water) + blocks = cut_urban_blocks(boundaries, lines, polygons) + return blocks + + async def _get_scenario_info(self, scenario_id: int, token: str) -> tuple[int, int]: + scenario = await self.client.get_scenario(scenario_id, token) + project_id = scenario["project"]["project_id"] + project = await self.client.get_project(project_id, token) + base_scenario_id = project["base_scenario"]["id"] + return project_id, base_scenario_id + + async def get_scenario_blocks( + self, user_scenario_id: int, token: str + ) -> gpd.GeoDataFrame: + project_id, base_scenario_id = await self._get_scenario_info( + user_scenario_id, token + ) + project_boundaries = await self._get_project_boundaries(project_id, token) + crs = project_boundaries.estimate_utm_crs() + project_boundaries = project_boundaries.to_crs(crs) + return await self._get_scenario_blocks( + user_scenario_id, project_boundaries, token + ) + + async def get_scenario_functional_zones( + self, + scenario_id: int, + token: str, + source: str | None = None, + year: int | None = None, + ) -> gpd.GeoDataFrame: + functional_zones = await self.client.get_functional_zones_scenario( + scenario_id, token, year, source + ) + functional_zones = functional_zones.loc[ + functional_zones.geometry.geom_type.isin({"Polygon", "MultiPolygon"}) + ].reset_index(drop=True) + return adapt_functional_zones(functional_zones) + + async def get_scenario_buildings(self, scenario_id: int, token: str): + try: + gdf = await self.client.get_physical_objects_scenario( + scenario_id, + token, + physical_object_type_id=LIVING_BUILDINGS_ID, + centers_only=False, + ) + if gdf is None: + return None + gdf = adapt_buildings(gdf.reset_index(drop=True)) + crs = gdf.estimate_utm_crs() + return impute_buildings(gdf.to_crs(crs)).to_crs(4326) + except Exception as e: + logger.exception(e) + raise http_exception( + 404, + f"No buildings found for scenario {scenario_id}", + _input={"scenario_id": scenario_id}, + _detail={"error": repr(e)}, + ) from e + + async def get_scenario_services( + self, scenario_id: int, service_types: pd.DataFrame, token: str + ): + try: + res = await self.client.get_services_scenario( + scenario_id, centers_only=True, token=token + ) + features = res.get("features") or [] + + if not features: + logger.info( + f"Scenario {scenario_id}: no services (features=[]) -> returning empty dict" + ) + return {} + + gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326").set_index( + "service_id", drop=False + ) + gdf = gdf.to_crs(gdf.estimate_utm_crs()) + + gdfs = adapt_services(gdf.reset_index(drop=True), service_types) + return {st: impute_services(g, st) for st, g in gdfs.items()} + + except Exception as e: + logger.exception( + f"Failed to fetch/process services for scenario {scenario_id}: {str(e)}" + ) + raise http_exception( + 404, + f"No services found for scenario {scenario_id}", + _input={"scenario_id": scenario_id}, + _detail={"error": repr(e)}, + ) from e + + async def load_blocks_scenario( + self, scenario_id: int, token: str + ) -> gpd.GeoDataFrame: + gdf = await self.get_scenario_blocks(scenario_id, token) + gdf["site_area"] = gdf.area + return gdf + + async def assign_land_use_to_blocks_scenario( + self, + blocks: gpd.GeoDataFrame, + scenario_id: int, + source: str | None, + year: int | None, + token: str, + ) -> gpd.GeoDataFrame: + fzones = await self.get_scenario_functional_zones( + scenario_id, token, source, year + ) + fzones = fzones.to_crs(blocks.crs) + lu = assign_land_use(blocks, fzones, LAND_USE_RULES) + return blocks.join(lu.drop(columns=["geometry"])) + + async def enrich_with_buildings_scenario( + self, blocks: gpd.GeoDataFrame, scenario_id: int, token: str + ) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame | None]: + buildings = await self.get_scenario_buildings(scenario_id, token) + if buildings is None: + blocks["count_buildings"] = 0 + return blocks, None + + buildings = buildings.to_crs(blocks.crs) + blocks_bld, _ = aggregate_objects(blocks, buildings) + + blocks = blocks.join( + blocks_bld.drop(columns=["geometry"]).rename( + columns={"count": "count_buildings"} + ) + ) + blocks["count_buildings"] = blocks["count_buildings"].fillna(0).astype(int) + if "is_living" not in blocks.columns: + blocks["is_living"] = None + return blocks, buildings + + async def enrich_with_services_scenario( + self, blocks: gpd.GeoDataFrame, scenario_id: int, token: str + ) -> gpd.GeoDataFrame: + stypes = await self.client.get_service_types() + stypes = await adapt_service_types(stypes, self.client) + sdict = await self.get_scenario_services(scenario_id, stypes, token) + + for stype, services in sdict.items(): + services = services.to_crs(blocks.crs) + b_srv, _ = aggregate_objects(blocks, services) + b_srv[["capacity", "count"]] = ( + b_srv[["capacity", "count"]].fillna(0).astype(int) + ) + blocks = blocks.join( + b_srv.drop(columns=["geometry"]).rename( + columns={"capacity": f"capacity_{stype}", "count": f"count_{stype}"} + ) + ) + return blocks + + async def aggregate_blocks_layer_scenario( + self, + scenario_id: int, + source: str | None = None, + year: int | None = None, + token: str | None = None, + ) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame | None]: + + logger.info(f"[Scenario {scenario_id}] load blocks") + blocks = await self.load_blocks_scenario(scenario_id, token) + + logger.info("Assigning land-use for scenario") + blocks = await self.assign_land_use_to_blocks_scenario( + blocks, scenario_id, source, year, token + ) + + logger.info("Aggregating buildings for scenario") + blocks, buildings = await self.enrich_with_buildings_scenario( + blocks, scenario_id, token + ) + + logger.info("Aggregating services for scenario") + blocks = await self.enrich_with_services_scenario(blocks, scenario_id, token) + + blocks["is_project"] = True + logger.success(f"[scenario {scenario_id}] blocks layer ready") + + return blocks, buildings diff --git a/app/effects_api/modules/service_type_service.py b/app/effects_api/modules/service_type_service.py new file mode 100644 index 0000000..8cec4b7 --- /dev/null +++ b/app/effects_api/modules/service_type_service.py @@ -0,0 +1,409 @@ +import asyncio +import re +from typing import Dict, Iterable, List, Mapping, Optional, cast + +import geopandas as gpd +import pandas as pd +from blocksnet.analysis.indicators.socio_economic import SocialIndicator +from blocksnet.config import service_types_config +from loguru import logger + +from app.clients.urban_api_client import UrbanAPIClient +from app.common.caching.caching_service import FileCache +from app.common.utils.effects_utils import EffectsUtils +from app.effects_api.constants.const import SERVICE_TYPES_MAPPING + +_SOCIAL_VALUES_BY_ST: Dict[int, Optional[List[int]]] = {} +_SOCIAL_VALUES_LOCK = asyncio.Lock() +_SERVICE_NAME_TO_ID: dict[str, int] = { + name: sid for sid, name in SERVICE_TYPES_MAPPING.items() +} +_VALID_SERVICE_NAMES: set[str] = set(_SERVICE_NAME_TO_ID.keys()) +_NUM_SUFFIX_RE = re.compile(r"^\d+$") + +for st_id, st_name in SERVICE_TYPES_MAPPING.items(): + if st_name is None: + continue + assert st_name in service_types_config, f"{st_id}:{st_name} not in config" + + +async def _adapt_name(service_type_id: int) -> Optional[str]: + return SERVICE_TYPES_MAPPING.get(service_type_id) + + +async def _warmup_social_values( + service_type_ids: List[int], client: UrbanAPIClient +) -> None: + missing = [sid for sid in service_type_ids if sid not in _SOCIAL_VALUES_BY_ST] + if not missing: + return + async with _SOCIAL_VALUES_LOCK: + missing = [sid for sid in service_type_ids if sid not in _SOCIAL_VALUES_BY_ST] + if not missing: + return + results = await asyncio.gather( + *(client.get_service_type_social_values(sid) for sid in missing) + ) + for sid, df in zip(missing, results): + _SOCIAL_VALUES_BY_ST[sid] = None if df is None else list(df.index) + + +async def _adapt_social_values( + service_type_id: int, client: UrbanAPIClient +) -> Optional[List[int]]: + await _warmup_social_values([service_type_id], client) + return _SOCIAL_VALUES_BY_ST.get(service_type_id) + + +async def adapt_service_types( + service_types_df: pd.DataFrame, client: UrbanAPIClient +) -> pd.DataFrame: + df = service_types_df[["infrastructure_type"]].copy() + df["infrastructure_weight"] = service_types_df["weight_value"] + + service_type_ids: List[int] = df.index.tolist() + + names = await asyncio.gather(*(_adapt_name(st_id) for st_id in service_type_ids)) + df["name"] = names + df = df.dropna(subset=["name"]).copy() + + await _warmup_social_values(list(df.index), client) + df["social_values"] = [_SOCIAL_VALUES_BY_ST.get(st_id) for st_id in df.index] + df["blocksnet"] = df.apply(lambda s: SERVICE_TYPES_MAPPING.get(s.name), axis=1) + + # return df[["name", "infrastructure_type", "infrastructure_weight", "social_values"]] + return df + + +def _map_services(names: list[str]) -> list[dict]: + out = [] + get_id = _SERVICE_NAME_TO_ID.get + for n in names: + sid = get_id(n) + out.append({"id": sid, "name": n}) + return out + + +def _filter_service_keys(d: dict | None) -> list[str]: + if not isinstance(d, dict): + return [] + return [k for k in d.keys() if k in _VALID_SERVICE_NAMES] + + +async def get_services_with_ids_from_layer( + scenario_id: int, + method: str, + cache: FileCache, + utils: EffectsUtils, + token: str | None = None, +) -> dict: + if method == "values_oriented_requirements": + scenario_id = await utils.resolve_base_id(token, scenario_id) + + cached: dict | None = cache.load_latest(method, scenario_id) + if not cached or "data" not in cached: + return {"before": [], "after": []} + + data: dict = cached["data"] + + if "before" in data or "after" in data: + before_names = _filter_service_keys(data.get("before")) + after_names = _filter_service_keys(data.get("after")) + return { + "before": _map_services(before_names), + "after": _map_services(after_names), + } + + if "provision" in data: + prov_names = _filter_service_keys(data["provision"]) + return {"services": _map_services(prov_names)} + + return {"before": [], "after": []} + + +async def build_en_to_ru_map(service_types_df: pd.DataFrame) -> dict[str, str]: + russian_names_dict = {} + for st_id, en_key in SERVICE_TYPES_MAPPING.items(): + if not en_key: + continue + if st_id in service_types_df.index: + ru_name = service_types_df.loc[st_id, "name"] + if isinstance(ru_name, pd.Series): + ru_name = ru_name.iloc[0] + if isinstance(ru_name, str) and ru_name.strip(): + russian_names_dict[en_key] = ru_name + return russian_names_dict + + +async def remap_properties_keys_in_geojson( + geojson: dict, en2ru: dict[str, str] +) -> dict: + feats = geojson.get("features", []) + for f in feats: + props = f.get("properties", {}) + to_rename = [(k, en2ru[k]) for k in props.keys() if k in en2ru] + for old_k, new_k in to_rename: + if ( + new_k in props + and isinstance(props[new_k], dict) + and isinstance(props[old_k], dict) + ): + merged = {**props[old_k], **props[new_k]} + props[new_k] = merged + else: + props[new_k] = props[old_k] + del props[old_k] + return geojson + + +def adapt_social_service_types_df( + service_types_df: pd.DataFrame, + mapping: Mapping["SocialIndicator", Iterable[int]], +) -> pd.DataFrame: + """ + Attach 'indicator' column to social service types using SOCIAL_INDICATORS_MAPPING + and normalize naming convention. + + Parameters + ---------- + service_types_df : pd.DataFrame + DataFrame where index represents service_type_id. + mapping : Mapping[SocialIndicator, list[int]] + Mapping from SocialIndicator enum to service_type IDs. + + Returns + ------- + pd.DataFrame + Adapted DataFrame with added 'indicator' column and renamed columns. + """ + df = service_types_df.copy() + + id_to_indicator = { + st_id: indicator for indicator, ids in mapping.items() for st_id in ids + } + + df["indicator"] = df.index.map(id_to_indicator) + + df = df.rename( + columns={ + "radius_availability_meters": "meters", + "time_availability_minutes": "minutes", + "services_per_1000_normative": "count", + "services_capacity_per_1000_normative": "capacity", + } + ) + + return df + + +def _build_name_maps( + service_types_df: pd.DataFrame, +) -> tuple[dict[str, int], dict[str, int]]: + """Build lookups from service type names to ids.""" + if service_types_df.index.name is None: + service_types_df = service_types_df.copy() + service_types_df.index.name = "service_type_id" + + id_series = ( + pd.to_numeric(service_types_df.index.to_series(), errors="coerce") + .dropna() + .astype("int64") + ) + + name_to_id: dict[str, int] = {} + blocksnet_to_id: dict[str, int] = {} + + for sid, row in service_types_df.loc[id_series.index].iterrows(): + try: + sid_int = int(sid) + except Exception: + continue + + nm = row.get("name") + if isinstance(nm, str) and nm: + prev = name_to_id.get(nm) + if prev is not None and prev != sid_int: + logger.warning( + f"Duplicate mapping for name {nm}: {prev} -> {sid_int} (last wins)", + nm, + prev, + sid_int, + ) + name_to_id[nm] = sid_int + + bn = row.get("blocksnet") + if isinstance(bn, str) and bn: + prev = blocksnet_to_id.get(bn) + if prev is not None and prev != sid_int: + logger.warning( + f"Duplicate mapping for blocksnet {bn}: {prev} -> {sid_int} (last wins)" + ) + blocksnet_to_id[bn] = sid_int + + return name_to_id, blocksnet_to_id + + +def _rename_non_id_columns_to_ids( + df: pd.DataFrame, + name_to_id: dict[str, int], + blocksnet_to_id: dict[str, int], + prefixes: Iterable[str], +) -> pd.DataFrame: + """ + Rename columns like 'count_kindergarten' -> 'count_21' using provided lookups. + Leaves 'count_21' (already id) as-is. Unknown names are kept and warned. + """ + rename_map: dict[str, str] = {} + + for col in df.columns: + for prefix in prefixes: + pref = f"{prefix}_" + if not col.startswith(pref): + continue + suffix = col[len(pref) :] + + suffix = col[len(pref) :] + if suffix.isnumeric(): + break + + sid = blocksnet_to_id.get(suffix) or name_to_id.get(suffix) + if sid is not None: + rename_map[col] = f"{pref}{sid}" + else: + logger.warning(f"No service_id mapping found for column '{col}") + break + + if rename_map: + df = df.rename(columns=rename_map) + return df + + +def ensure_missing_id_and_name_columns( + blocks_gdf: gpd.GeoDataFrame, + count_prefix: str = "count", + capacity_prefix: str = "capacity", +) -> gpd.GeoDataFrame: + """ + Ensure per-block columns exist for every service in SERVICE_TYPES_MAPPING: + - {count_prefix}_{id} + - {capacity_prefix}_{id} + - {count_prefix}_{name} + - {capacity_prefix}_{name} + Missing columns are created and filled with zeros. + + Parameters + ---------- + blocks_gdf : GeoDataFrame + Blocks data. + service_type_mapping : dict[int -> str or None] + Mapping of service_type_id to service_name. + count_prefix, capacity_prefix : str + Column prefixes. + + Returns + ------- + GeoDataFrame + Updated dataframe containing all required columns. + """ + ids = sorted(int(sid) for sid in SERVICE_TYPES_MAPPING.keys()) + names = [ + name + for name in SERVICE_TYPES_MAPPING.values() + if isinstance(name, str) and name.strip() + ] + + required_cols = [] + for sid in ids: + required_cols.append(f"{count_prefix}_{sid}") + required_cols.append(f"{capacity_prefix}_{sid}") + for name in names: + required_cols.append(f"{count_prefix}_{name}") + required_cols.append(f"{capacity_prefix}_{name}") + + missing = [c for c in required_cols if c not in blocks_gdf.columns] + if missing: + logger.info(f"Creating missing service columns (zeros): {missing}") + add = {} + for col in missing: + if col.startswith(f"{count_prefix}_"): + add[col] = pd.Series(0, index=blocks_gdf.index, dtype="int64") + else: + add[col] = pd.Series(0.0, index=blocks_gdf.index, dtype="float64") + + blocks_gdf = blocks_gdf.join(pd.DataFrame(add, index=blocks_gdf.index)) + + return blocks_gdf + + +def generate_blocksnet_columns( + blocks_gdf: gpd.GeoDataFrame, + service_types_df: pd.DataFrame, + count_prefix: str = "count", + capacity_prefix: str = "capacity", + strict: bool = False, +) -> gpd.GeoDataFrame: + """ + Build notebook-like aggregated columns by 'blocksnet' groups: + + 1) Normalize per-service columns in `blocks_gdf`: + - Accept either '_' OR '_'. + - Non-numeric suffixes are renamed to numeric ids using `service_types_df`. + + 2) Aggregate by `blocksnet`: + - sum of count_ -> count_ + - sum of capacity_ -> capacity_ + + Robust behavior: + - service_type_id taken from index or 'service_type_id' column + - ids cast to int + - sum only existing columns; missing -> warning (or raise if strict=True) + """ + st_df = service_types_df[service_types_df["blocksnet"].notna()].copy() + + if "service_type_id" in st_df.columns: + ids_series = st_df["service_type_id"] + else: + ids_series = st_df.index.to_series(name="service_type_id") + + ids_series = pd.to_numeric(ids_series, errors="coerce").astype("Int64") + st_df = st_df.assign(service_type_id=ids_series).dropna(subset=["service_type_id"]) + st_df["service_type_id"] = st_df["service_type_id"].astype("int64") + + name_to_id, blocksnet_to_id = _build_name_maps(st_df) + blocks_gdf = _rename_non_id_columns_to_ids( + blocks_gdf, + name_to_id=name_to_id, + blocksnet_to_id=blocksnet_to_id, + prefixes=(count_prefix, capacity_prefix), + ) + + grouped = ( + st_df.groupby("blocksnet")["service_type_id"] + .apply(lambda s: sorted(set(int(x) for x in s))) + .to_dict() + ) + + new_columns: dict[str, pd.Series] = {} + for bnet, st_ids in grouped.items(): + for prefix in (count_prefix, capacity_prefix): + expected = [f"{prefix}_{sid}" for sid in st_ids] + existing = [c for c in expected if c in blocks_gdf.columns] + missing = [c for c in expected if c not in blocks_gdf.columns] + + if missing: + msg = f"Missing columns for '{bnet}' [{prefix}]: {missing}" + if strict: + raise KeyError(msg) + logger.warning(msg) + + series = ( + blocks_gdf[existing].fillna(0).sum(axis=1) + if existing + else pd.Series(0, index=blocks_gdf.index, dtype="float64") + ) + new_columns[f"{prefix}_{bnet}"] = series + + out = pd.concat( + [blocks_gdf, pd.DataFrame(new_columns, index=blocks_gdf.index)], axis=1 + ) + return cast(gpd.GeoDataFrame, out) diff --git a/app/effects_api/modules/services_service.py b/app/effects_api/modules/services_service.py new file mode 100644 index 0000000..ff7c672 --- /dev/null +++ b/app/effects_api/modules/services_service.py @@ -0,0 +1,48 @@ +import geopandas as gpd +import pandas as pd +from blocksnet.blocks.aggregation import aggregate_objects +from loguru import logger + + +def _adapt_service_type(data: dict, service_types: pd.DataFrame) -> int: + service_type_id = int(data["service_type_id"]) + if service_type_id in service_types.index: + service_type_name = service_types.loc[service_type_id, "name"] + return service_type_name + return None + + +def adapt_services( + buildings_gdf: gpd.GeoDataFrame, service_types: pd.DataFrame +) -> dict[int, gpd.GeoDataFrame]: + """ + Convert the raw building GeoDataFrame into a dictionary where each key is a + canonical service-type ID and the value is a GeoDataFrame of buildings of + that service type. + + Parameters: + buildings_gdf : gpd.GeoDataFrame + Required columns: + • geometry – building footprint or centroid + • capacity – numeric design capacity + • service_type – raw service-type ID + service_types : pd.DataFrame + Lookup table used by the helper _adapt_service_type to map raw + service_type IDs onto canonical IDs. + + Returns: + dict[int, gpd.GeoDataFrame] + Keys are canonical service-type IDs (int). + Each value contains only geometry and capacity columns; the temporary + service_type column is removed. + Buildings whose service_type cannot be mapped are discarded. + """ + gdf = buildings_gdf[["geometry", "capacity"]].copy() + gdf["service_type"] = buildings_gdf["service_type"].apply( + lambda st: _adapt_service_type(st, service_types) + ) + gdf = gdf[~gdf["service_type"].isna()].copy() + return { + st: gdf[gdf["service_type"] == st].drop(columns=["service_type"]) + for st in sorted(gdf["service_type"].unique()) + } diff --git a/app/effects_api/modules/task_service.py b/app/effects_api/modules/task_service.py new file mode 100644 index 0000000..f6d7061 --- /dev/null +++ b/app/effects_api/modules/task_service.py @@ -0,0 +1,285 @@ +import asyncio +import contextlib +import json +from contextlib import asynccontextmanager +from typing import Any, Callable, Literal, Coroutine + +import geopandas as gpd +from fastapi import FastAPI +from loguru import logger + +from app.common.exceptions.http_exception_wrapper import http_exception +from app.dependencies import effects_service, file_cache, effects_utils, consumer, producer, config +from app.prometheus.server import start_metrics_server, stop_metrics_server +import time + +from app.prometheus.metrics import ( + bind_queue_metrics, get_task_metrics) + +MethodFunc = Callable[[str, Any], Coroutine[Any, Any, Any]] +TASK_METRICS = get_task_metrics() + +TASK_METHODS: dict[str, MethodFunc] = { + "territory_transformation": effects_service.territory_transformation, + "values_transformation": effects_service.values_transformation, + "values_oriented_requirements": effects_service.values_oriented_requirements, + "social_economical_metrics": effects_service.evaluate_social_economical_metrics +} + + +def _cache_complete(method: str, cached_obj: dict | None) -> bool: + if not cached_obj: + return False + data = cached_obj.get("data") or {} + if method == "territory_transformation": + if data.get("after"): + return True + return bool(data.get("before")) + return True + +_task_queue: asyncio.Queue["AnyTask"] = asyncio.Queue() +_task_map: dict[str, "AnyTask"] = {} + + +class AnyTask: + def __init__( + self, + method: str, + scenario_id: int, + token: str, + params: Any, + params_hash: str, + cache: file_cache, + task_id: str, + ): + self.method = method + self.scenario_id = scenario_id + self.token = token + self.params = params + self.param_hash = params_hash + + self.status: Literal["queued", "running", "done", "failed"] = "queued" + self.result: dict | None = None + self.error: str | None = None + self.cache = cache + self.task_id = task_id + + async def to_response(self) -> dict: + if self.status in {"queued", "running"}: + return {"status": self.status} + if self.status == "done": + return {"status": "done", "result": self.result} + return {"status": "failed", "error": self.error} + + async def run(self) -> None: + """ + Run task asynchronously inside event loop. + """ + start_time = time.perf_counter() + TASK_METRICS.on_started(self.method) + + try: + logger.info(f"[{self.task_id}] started") + self.status = "running" + + force = getattr(self.params, "force", False) + cached = None if force else self.cache.load(self.method, self.scenario_id, self.param_hash) + + if not force and _cache_complete(self.method, cached): + logger.info(f"[{self.task_id}] loaded from cache") + TASK_METRICS.on_cache_hit(self.method) + self.result = cached["data"] + self.status = "done" + return + + func = TASK_METHODS[self.method] + raw_data = await func(self.token, self.params) + + self.result = self._serialize_result(raw_data) + self.status = "done" + TASK_METRICS.on_finished_success(self.method) + + + except Exception as exc: + logger.exception(f"[{self.task_id}] failed") + self.status = "failed" + self.error = str(exc) + TASK_METRICS.on_finished_failed(self.method) + + finally: + TASK_METRICS.observe_duration(self.method, time.perf_counter() - start_time) + + def _serialize_result(self, raw_data): + """Serialize GeoDataFrame or dict to json-compatible structure.""" + if isinstance(raw_data, gpd.GeoDataFrame): + return json.loads(raw_data.to_json(drop_id=True)) + + if isinstance(raw_data, dict): + return { + k: json.loads(v.to_json(drop_id=True)) + if isinstance(v, gpd.GeoDataFrame) + else v + for k, v in raw_data.items() + } + + return raw_data + + +async def create_task( + method: str, + token: str, + params, +) -> dict: + """ + Create (or reuse) an async Effects task. + + Returns: + dict: { "task_id": str, "status": "queued" | "running" | "done" } + """ + TASK_METRICS.on_created(method) + + project_based_methods = {"social_economical_metrics", "urbanomy_metrics"} + + if method in project_based_methods: + owner_id = getattr(params, "project_id", None) + + params_for_hash = { + "project_id": getattr(params, "project_id", None), + "regional_scenario_id": getattr(params, "regional_scenario_id", None), + } + + force = bool(getattr(params, "force", False)) + + try: + phash = file_cache.params_hash(params_for_hash) + except Exception as e: + logger.exception("Failed to hash params (project)") + raise http_exception(500, "Failed to hash task parameters", + _input=params_for_hash, _detail=str(e)) + + task_id = f"{method}_{owner_id}_{phash}" + + try: + cached = None if force else file_cache.load(method, owner_id, phash) + except Exception as e: + logger.exception("Cache load failed (project)") + raise http_exception(500, "Cache load failed", + _input={"method": method, "owner_id": owner_id}, _detail=str(e)) + + if not force and _cache_complete(method, cached): + TASK_METRICS.on_cache_hit(method) + return {"task_id": task_id, "status": "done"} + + existing = None if force else _task_map.get(task_id) + if not force and existing and existing.status in {"queued", "running"}: + return {"task_id": task_id, "status": existing.status} + + task = AnyTask(method, owner_id, token, params, phash, file_cache, task_id) + _task_map[task_id] = task + TASK_METRICS.on_enqueued(method) + await _task_queue.put(task) + return {"task_id": task_id, "status": "queued"} + + if method == "values_oriented_requirements": + base_id = await effects_utils._resolve_base_id(token, getattr(params, "scenario_id")) + logger.info( + "[Tasks] values_oriented_requirements base_id=%s (requested=%s)", + base_id, getattr(params, "scenario_id") + ) + + base_params = params.model_copy(update={ + "scenario_id": base_id, + "proj_func_zone_source": None, + "proj_func_source_year": None, + "context_func_zone_source": None, + "context_func_source_year": None, + }) + norm_params = await effects_service.get_optimal_func_zone_data(base_params, token) + + params_for_hash = await effects_service.build_hash_params(norm_params, token) + phash = file_cache.params_hash(params_for_hash) + owner_id = base_id + task_id = f"{method}_{owner_id}_{phash}" + + cached = file_cache.load(method, owner_id, phash) + if cached and "data" in cached and "result" in cached["data"]: + logger.info("[Tasks] Cache hit for values_oriented_requirements -> DONE") + TASK_METRICS.on_cache_hit(method) + return {"task_id": task_id, "status": "done"} + + task = AnyTask(method, owner_id, token, norm_params, phash, file_cache, task_id) + if task.task_id in _task_map: + return {"task_id": task.task_id, "status": "running"} + _task_map[task.task_id] = task + TASK_METRICS.on_enqueued(method) + await _task_queue.put(task) + return {"task_id": task.task_id, "status": "queued"} + + norm_params = await effects_service.get_optimal_func_zone_data(params, token) + params_for_hash = await effects_service.build_hash_params(norm_params, token) + phash = file_cache.params_hash(params_for_hash) + owner_id = norm_params.scenario_id + task_id = f"{method}_{owner_id}_{phash}" + + cached = file_cache.load(method, owner_id, phash) + if cached and "data" in cached: + TASK_METRICS.on_cache_hit(method) + return {"task_id": task_id, "status": "done"} + + task = AnyTask(method, owner_id, token, norm_params, phash, file_cache, task_id) + if task.task_id in _task_map: + return {"task_id": task.task_id, "status": "running"} + _task_map[task.task_id] = task + TASK_METRICS.on_enqueued(method) + await _task_queue.put(task) + return {"task_id": task.task_id, "status": "queued"} + + +async def _worker(): + while True: + task: AnyTask = await _task_queue.get() + await asyncio.to_thread(task.run_sync) + _task_queue.task_done() + + +worker_task: asyncio.Task | None = None + + +class Worker: + def __init__(self): + self.is_alive = True + self.task: asyncio.Task | None = None + + async def run(self): + while self.is_alive: + task: AnyTask = await _task_queue.get() + await task.run() + _task_queue.task_done() + + def start(self): + self.task = asyncio.create_task(self.run(), name="any_task_worker") + + async def stop(self): + self.is_alive = False + if self.task: + self.task.cancel() + with contextlib.suppress(asyncio.CancelledError): + await self.task + + +worker = Worker() + +@asynccontextmanager +async def lifespan(app: FastAPI): + start_metrics_server(int(config.get("PROMETHEUS_PORT"))) + bind_queue_metrics(_task_queue) + worker.start() + await producer.start() + await consumer.start(["scenario.events"]) + try: + yield + finally: + await consumer.stop() + await producer.stop() + await worker.stop() + stop_metrics_server() diff --git a/app/effects_api/schemas/__init__.py b/app/effects_api/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/effects_api/schemas/output_maps/__init__.py b/app/effects_api/schemas/output_maps/__init__.py new file mode 100644 index 0000000..608e1a2 --- /dev/null +++ b/app/effects_api/schemas/output_maps/__init__.py @@ -0,0 +1,13 @@ +import json +from pathlib import Path + +abs_path = Path(__file__).parent + + +with open( + abs_path / "soc_economy_pred_name_map.json", "r", encoding="utf-8" +) as sepnm_file: + soc_economy_pred_name_map = json.load(sepnm_file) + +with open(abs_path / "pred_columns_names_map.json", "r", encoding="utf-8") as pcnp_file: + pred_columns_names_map = json.load(pcnp_file) diff --git a/app/effects_api/schemas/output_maps/pred_columns_names_map.json b/app/effects_api/schemas/output_maps/pred_columns_names_map.json new file mode 100644 index 0000000..df7059a --- /dev/null +++ b/app/effects_api/schemas/output_maps/pred_columns_names_map.json @@ -0,0 +1,6 @@ +{ + "pred": "Предсказанное количество сервисов", + "lower": "Нижняя граница доверительного интервала", + "upper": "Верхняя граница доверительного интервала", + "is_interval": "Попадание предсказания в доверительный интервал" +} \ No newline at end of file diff --git a/app/effects_api/schemas/output_maps/soc_economy_pred_name_map.json b/app/effects_api/schemas/output_maps/soc_economy_pred_name_map.json new file mode 100644 index 0000000..6485df2 --- /dev/null +++ b/app/effects_api/schemas/output_maps/soc_economy_pred_name_map.json @@ -0,0 +1,30 @@ +{ + "nursing_home_count": "Количество домов престарелых", + "hotel_count": "Количество гостиниц", + "theatre_count": "Количество театров", + "cinema_count": "Количество кинотеатров", + "secondary_vocational_education_institutions_count": "Количество образовательных учреждений СПО", + "university_count": "Количество высших учебных заведений", + "stadium_count": "Количество стадионов", + "emergency_medical_service_stations_count": "Количество станций скорой медицинской помощи", + "kindergarten_count": "Количество дошкольных образовательных учреждений", + "hostel_count": "Количество хостелов", + "park_count": "Количество парков", + "multifunctional_center_count": "Количество центров предоставления государственных и муниципальных услуг", + "pharmacy_count": "Количество аптек", + "sports_halls_count": "Количество спортивных залов", + "hospital_count": "Количество больничных учреждений (стационаров)", + "school_count": "Количество общеобразовательных учреждений", + "mall_count": "Количество торгово-развлекательных центров", + "polyclinic_count": "Количество амбулаторно-поликлинических учреждений", + "post_count": "Количество почтовых отделений", + "swimming_pool_count": "Количество плавательных бассейнов", + "library_count": "Количество библиотек", + "guest_house_count": "Количество туристических баз", + "fire_safety_facilities_count": "Количество объектов обеспечения пожарной безопасности", + "restaurant_count": "Количество объектов общественного питания", + "police_count": "Количество полицейских участков", + "museum_count": "Количество музеев", + "bank_count": "Количество отделений банков", + "pitch_count": "Количество плоскостных спортивных сооружений" +} diff --git a/app/effects_api/schemas/service_types_response_schema.py b/app/effects_api/schemas/service_types_response_schema.py new file mode 100644 index 0000000..2dca0a3 --- /dev/null +++ b/app/effects_api/schemas/service_types_response_schema.py @@ -0,0 +1,30 @@ +from typing import List, Optional + +from pydantic import BaseModel, Field + +from app.common.dto.models import ServiceType + + +class ServiceTypesResponse(BaseModel): + """ + List of service types available before and/or after scenario transformation. + Both lists may be present, and `after` may be empty or populated depending on scenario changes. + """ + + before: List[ServiceType] = Field( + ..., description="Service types in the base (before) scenario" + ) + after: Optional[List[ServiceType]] = Field( + None, + description="Service types in the transformed (after) scenario; may be empty or identical to 'before'", + ) + + +class ValuesServiceTypesResponse(BaseModel): + """ + List of service types available for values oriented requirements. + """ + + services: List[ServiceType] = Field( + ..., description="Service types in the base scenario" + ) diff --git a/app/effects_api/schemas/socio_economic_metrics_response_schema.py b/app/effects_api/schemas/socio_economic_metrics_response_schema.py new file mode 100644 index 0000000..f973b97 --- /dev/null +++ b/app/effects_api/schemas/socio_economic_metrics_response_schema.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel, Field + + +class SocioEconomicMetricsResponseSchema(BaseModel): + results: dict[str, dict[str, dict[str, int | float | None]]] = Field( + ..., description="Results of socio economic metrics" + ) diff --git a/app/effects_api/schemas/territory_transformation_response_schema.py b/app/effects_api/schemas/territory_transformation_response_schema.py new file mode 100644 index 0000000..8a06d18 --- /dev/null +++ b/app/effects_api/schemas/territory_transformation_response_schema.py @@ -0,0 +1,31 @@ +from typing import Dict, Optional + +from pydantic import BaseModel, Field + +from app.common.dto.models import FeatureCollectionModel + + +class TerritoryTransformationResponseTablesSchema(BaseModel): + provision_total_before: Dict[str, float] = Field( + None, description="Provision values for the base scenario, by service name" + ) + provision_total_after: Optional[Dict[str, float]] = Field( + None, + description="Provision values for the transformed scenario, by service name", + ) + + +class TerritoryTransformationLayerResponse(TerritoryTransformationResponseTablesSchema): + """ + API response for a single service's territory transformation layer. + Either 'before', 'after', or both can be present. + Provision totals are optional numeric aggregates. + """ + + before: Optional[FeatureCollectionModel] = Field( + None, description="GeoJSON FeatureCollection for the base (before) scenario" + ) + after: Optional[FeatureCollectionModel] = Field( + None, + description="GeoJSON FeatureCollection for the transformed (after) scenario", + ) diff --git a/app/effects_api/schemas/values_oriented_response_schema.py b/app/effects_api/schemas/values_oriented_response_schema.py new file mode 100644 index 0000000..9d0e448 --- /dev/null +++ b/app/effects_api/schemas/values_oriented_response_schema.py @@ -0,0 +1,18 @@ +from typing import Dict, List, Optional + +from pydantic import BaseModel, Field + +from app.common.dto.models import FeatureCollectionModel + + +class ValuesOrientedResponseSchema(BaseModel): + base_scenario_id: int = Field(..., description="Id of the base scenario") + geojson: FeatureCollectionModel = Field( + ..., description="GeoJSON FeatureCollection for the base scenario" + ) + values_table: Dict[str, Dict[str, str | float]] = Field( + ..., description="Values table for the base scenario" + ) + services_type_deficit: List[Dict[str, str | float | List[str]]] = Field( + ..., description="Services type deficit for the base scenario" + ) diff --git a/app/effects_api/schemas/values_tables_response_schema.py b/app/effects_api/schemas/values_tables_response_schema.py new file mode 100644 index 0000000..95fee31 --- /dev/null +++ b/app/effects_api/schemas/values_tables_response_schema.py @@ -0,0 +1,13 @@ +from typing import Dict, List + +from pydantic import BaseModel, Field + + +class ValuesOrientedResponseTablesSchema(BaseModel): + base_scenario_id: int = Field(..., description="Id of the base scenario") + values_table: Dict[str, Dict[str, str | float]] = Field( + ..., description="Values table for the base scenario" + ) + services_type_deficit: List[Dict[str, str | float | List[str]]] = Field( + ..., description="Services type deficit for the base scenario" + ) diff --git a/app/effects_api/schemas/values_transformation_response_schema.py b/app/effects_api/schemas/values_transformation_response_schema.py new file mode 100644 index 0000000..ea42979 --- /dev/null +++ b/app/effects_api/schemas/values_transformation_response_schema.py @@ -0,0 +1,11 @@ +from typing import Dict, List, Optional + +from pydantic import BaseModel, Field + +from app.common.dto.models import FeatureCollectionModel + + +class ValuesTransformationSchema(BaseModel): + geojson: FeatureCollectionModel = Field( + ..., description="GeoJSON FeatureCollection for the scenario" + ) diff --git a/app/effects_api/tasks_controller.py b/app/effects_api/tasks_controller.py new file mode 100644 index 0000000..30f2e9e --- /dev/null +++ b/app/effects_api/tasks_controller.py @@ -0,0 +1,511 @@ +import asyncio +from typing import Annotated, Union, Literal + +from fastapi import APIRouter, Query +from fastapi.params import Depends +from loguru import logger +from starlette.responses import JSONResponse + +from app.common.auth.auth import verify_token +from app.effects_api.modules.task_service import ( + TASK_METHODS, + AnyTask, + _task_map, + _task_queue, create_task, +) +from .dto.socio_economic_project_dto import SocioEconomicByProjectDTO +from .schemas.service_types_response_schema import ServiceTypesResponse, ValuesServiceTypesResponse +from .schemas.socio_economic_metrics_response_schema import SocioEconomicMetricsResponseSchema +from .schemas.territory_transformation_response_schema import TerritoryTransformationLayerResponse, \ + TerritoryTransformationResponseTablesSchema +from .schemas.values_oriented_response_schema import ValuesOrientedResponseSchema +from .schemas.values_tables_response_schema import ValuesOrientedResponseTablesSchema +from .schemas.values_transformation_response_schema import ValuesTransformationSchema +from ..common.dto.models import FeatureCollectionModel + +from ..common.exceptions.http_exception_wrapper import http_exception +from ..dependencies import effects_service, effects_utils, file_cache, urban_api_client +from .dto.development_dto import ContextDevelopmentDTO +from .modules.service_type_service import get_services_with_ids_from_layer +from ..prometheus.metrics import get_task_metrics + +TASK_METRICS = get_task_metrics() + +router = APIRouter(prefix="/tasks", tags=["tasks"]) + +_locks: dict[str, asyncio.Lock] = {} + + +#TODO continue response schemas + +def _get_lock(key: str) -> asyncio.Lock: + lock = _locks.get(key) + if lock is None: + lock = asyncio.Lock() + _locks[key] = lock + return lock + + +async def _with_defaults( + dto: ContextDevelopmentDTO, token: str +) -> ContextDevelopmentDTO: + return await effects_service.get_optimal_func_zone_data(dto, token) + + +def _is_fc(x: dict) -> bool: + return ( + isinstance(x, dict) + and x.get("type") == "FeatureCollection" + and isinstance(x.get("features"), list) + ) + + +def _section_ready(sec: dict | None) -> bool: + return isinstance(sec, dict) and any(_is_fc(v) for v in sec.values()) + + +def _cache_complete(method: str, cached: dict | None) -> bool: + if not cached: + return False + data = cached.get("data") or {} + if method == "territory_transformation": + before_ok = _section_ready(data.get("before")) + after_sec = data.get("after") + if after_sec: + return before_ok and _section_ready(after_sec) + return before_ok + return True + + +@router.get("/methods", summary="List available task methods", + description=( + "Returns the current list of task method names that can be scheduled via this API.\n\n" + "- `territory_transformation` — F 35 scenario-based, create with `POST /tasks/{method}`\n" + "- `values_transformation` — F 26 scenario-based, create with `POST /tasks/{method}`\n" + "- `values_oriented_requirements` — F 36 scenario-based, create with `POST /tasks/{method}`\n" + "- `social_economical_metrics` — F22 project-based, create with `POST /tasks/project/{method}`" + )) +async def get_methods(): + return list(TASK_METHODS.keys()) + + +@router.post("/{method}", status_code=202, + summary="Create scenario-based task", + description=( + "Queues an asynchronous **scenario-based** task.\n\n" + "Currently supported:" + "`territory_transformation`, `values_transformation`, `values_oriented_requirements` \n\n" + "**Caching behavior**: if `force=false` and a complete cached result exists " + "for the computed parameter hash, the endpoint returns `status=done` immediately. " + "Otherwise a task is queued and `status=queued` is returned.\n\n" + "**Response statuses**:\n" + "- `queued`: task was enqueued successfully\n" + "- `running`: a task with the same id is already being processed\n" + "- `done`: cached result is available\n" + "- `failed`: check `GET /tasks/status/{task_id}` for error details\n\n" + "**Task id format**: `{method}_{scenario_id}_{phash}`" + )) +async def create_scenario_task( + method: str, + params: Annotated[ContextDevelopmentDTO, Depends()], + token: str = Depends(verify_token), +): + """Roter for task creation""" + if method not in TASK_METHODS: + raise http_exception(404, f"method '{method}' is not registered", method) + + coarse_key = f"{method}:{params.scenario_id}" + lock = _get_lock(coarse_key) + + async with lock: + params_filled = await effects_service.get_optimal_func_zone_data(params, token) + params_for_hash = await effects_service.build_hash_params(params_filled, token) + phash = file_cache.params_hash(params_for_hash) + + task_id = f"{method}_{params_filled.scenario_id}_{phash}" + + force = getattr(params, "force", False) + + cached = ( + None if force else file_cache.load(method, params_filled.scenario_id, phash) + ) + if not force and _cache_complete(method, cached): + TASK_METRICS.on_cache_hit(method) + return {"task_id": task_id, "status": "done"} + + existing = None if force else _task_map.get(task_id) + if not force and existing and existing.status in {"queued", "running"}: + return {"task_id": task_id, "status": existing.status} + + task = AnyTask( + method, + params_filled.scenario_id, + token, + params_filled, + phash, + file_cache, + task_id, + ) + _task_map[task_id] = task + await _task_queue.put(task) + TASK_METRICS.on_enqueued(method) + + return {"task_id": task_id, "status": "queued"} + +@router.post("/project/{method}", status_code=202, + summary="Create project-based task", + description=( + "Queues an asynchronous **project-level** task. Currently supported: " + "`social_economical_metrics`.\n\n" + "**Hash parameters**: `{project_id, regional_scenario_id}` (territory_ids are not part of cache key).\n" + "**Caching behavior**: if `force=false` and a complete cached result exists, " + "for the computed parameter hash, the endpoint returns `status=done` immediately. " + "Otherwise a task is queued and `status=queued` is returned.\n\n" + "**Response statuses**:\n" + "- `queued`: task was enqueued successfully\n" + "- `running`: a task with the same id is already being processed\n" + "- `done`: cached result is available\n" + "- `failed`: check `GET /tasks/status/{task_id}` for error details\n\n" + "**Task id format**: `{method}_{project_id}_{phash}`" + )) +async def create_project_task( + method: Literal["social_economical_metrics"], + params: Annotated[SocioEconomicByProjectDTO, Depends()], + token: Annotated[str, Depends(verify_token)], +): + """ + separate endpoint for project-based tasks (e.g., socio_economics). + """ + if method not in ["social_economical_metrics"]: + raise http_exception(400, f"method '{method}' is not project-based", method) + + try: + result = await create_task(method, token, params) + except Exception as e: + logger.exception("Failed to enqueue project task") + raise http_exception( + 500, + "Failed to enqueue project task", + _input={"method": method, "project_id": params.project_id}, + _detail=str(e), + ) + + status = result.get("status") + http_code = 200 if status == "done" else 202 + return JSONResponse(result, status_code=http_code) + + +@router.get("/status/{task_id}", + summary="Get task status", + description=( + "Returns current status for a task id.\n\n" + "**Statuses**:\n" + "- `queued`: waiting in queue\n" + "- `running`: being processed\n" + "- `done`: cached (final) result exists\n" + "- `failed`: task failed, `error` field may be present\n" + "- `unknown`: task is tracked but status cannot be resolved\n\n" + "If the cache already contains a complete result for the `task_id`, " + "the endpoint responds with `status=done`." + )) +async def task_status(task_id: str): + method, scenario_id, phash = file_cache.parse_task_id(task_id) + if method and scenario_id is not None and phash: + try: + cached = file_cache.load(method, scenario_id, phash) + if _cache_complete(method, cached): + return {"task_id": task_id, "status": "done"} + if cached: + return {"task_id": task_id, "status": "running"} + except Exception: + pass + + task = _task_map.get(task_id) + if task: + payload = { + "task_id": task_id, + "status": getattr(task, "status", "unknown"), + } + if getattr(task, "status", None) == "failed" and getattr(task, "error", None): + payload["error"] = str(task.error) + return payload + + raise http_exception(404, "task not found", task_id) + + +@router.get( + "/get_service_types", + summary="List service types", + response_model=Union[ServiceTypesResponse, ValuesServiceTypesResponse], + description=( + "Returns service type identifiers available for a given `scenario_id` and `method` " + "from the cached layer. Intended to help clients discover which services can be requested." + "For 'territory_transformation' method 'before' and 'after' keys with services are returned" + "For 'values_oriented_requirements' only 'services' key with services is returned" + ), + response_model_exclude_none=True, +) +async def get_service_types( + scenario_id: int, + method: str = "territory_transformation", + token: str = Depends(verify_token), +): + """Return service types depending on the method.""" + if method == "territory_transformation": + data = await get_services_with_ids_from_layer( + scenario_id, method, file_cache, effects_utils, token=token + ) + return ServiceTypesResponse(before=data["before"], after=data.get("after", [])) + + if method == "values_oriented_requirements": + services = await get_services_with_ids_from_layer( + scenario_id, method, file_cache, effects_utils, token=token + ) + return ValuesServiceTypesResponse( + services=services.get("services", []) + ) + raise http_exception(400, f"Unsupported method", f"{method}") + + +@router.get("/territory_transformation/{scenario_id}/{service_name}", + summary="Get territory transformation layer by service", + description=( + "Fetches a GeoJSON layer for a specific `service_name` from the cached " + "`territory_transformation` result.\n\n" + "**Responses**:\n" + "- When both versions exist: returns `{ before, after, provision_total_before, provision_total_after }`\n" + "- When only `before` exists: returns `{ before, provision_total_before }`\n" + "- When only `after` exists: returns `{ after, provision_total_after }`" + ), + response_model=TerritoryTransformationLayerResponse, + ) +async def get_territory_transformation_layer(scenario_id: int, service_name: str): + cached = file_cache.load_latest("territory_transformation", scenario_id) + if not cached: + raise http_exception(404, "no saved result for this scenario", scenario_id) + + data: dict = cached["data"] + + if "after" not in data or not data.get("after"): + fc = data.get("before", {}).get(service_name) + if not fc: + raise http_exception(404, f"service '{service_name}' not found") + return TerritoryTransformationLayerResponse(before= fc) + + before_dict = data.get("before", {}) or {} + after_dict = data.get("after", {}) or {} + + fc_before = before_dict.get(service_name) + fc_after = after_dict.get(service_name) + + provision_before = { + k: 0.0 if v is None else float(v) + for k, v in (before_dict.get("provision_total_before") or {}).items() + } + + provision_after = { + k: 0.0 if v is None else float(v) + for k, v in (after_dict.get("provision_total_after") or {}).items() + } + if fc_before and fc_after: + return TerritoryTransformationLayerResponse( + before = fc_before, + after = fc_after, + provision_total_before = provision_before, + provision_total_after = provision_after, + ) + + if fc_before and not fc_after: + return TerritoryTransformationLayerResponse( + before = fc_before, provision_total_before = provision_before) + + if fc_after and not fc_before: + return TerritoryTransformationLayerResponse( + after= fc_after, provision_total_after = provision_after + ) + + raise http_exception(404, f"service '{service_name}' not found") + + +@router.get("/values_oriented_requirements/{scenario_id}/{service_name}", + summary="Get Values-Oriented Requirements layer", + description=( + "Returns the GeoJSON layer and values table for a `service_name`, computed for the " + "**base scenario** of the provided `scenario_id`.\n\n" + "Rejects the request if the cached base result is stale compared to the base scenario metadata." + ), + response_model=ValuesOrientedResponseSchema) +async def get_values_oriented_requirements_layer( + scenario_id: int, + service_name: str, + token: str = Depends(verify_token), +): + base_id = await effects_utils.resolve_base_id(token, scenario_id) + + cached = file_cache.load_latest("values_oriented_requirements", base_id) + if not cached: + raise http_exception( + 404, f"no saved result for base scenario {base_id}", base_id + ) + + info_base = await urban_api_client.get_scenario_info(base_id, token) + if cached.get("meta", {}).get("scenario_updated_at") != info_base.get("updated_at"): + raise http_exception( + 404, f"stale cache for base scenario {base_id}, recompute required", base_id + ) + + data: dict = cached.get("data", {}) + prov = (data.get("provision") or {}).get(service_name) + values_dict = data.get("result") + values_table = data.get("social_values_table") + + if not prov: + raise http_exception( + 404, f"service '{service_name}' not found in base scenario {base_id}" + ) + + return ValuesOrientedResponseSchema( + base_scenario_id= base_id, + geojson= prov, + values_table= values_dict, + services_type_deficit= values_table, + ) + + +@router.get("/values_oriented_requirements_table/{scenario_id}", + summary="Get Values-Oriented Requirements tables", + description=( + "Returns the values table and service-type deficit table for the **base scenario** " + "of the provided `scenario_id`." + ), + response_model=ValuesOrientedResponseTablesSchema + ) +async def get_values_oriented_requirements_table( + scenario_id: int, + token: str = Depends(verify_token), +): + base_id = await effects_utils.resolve_base_id(token, scenario_id) + + cached = file_cache.load_latest("values_oriented_requirements", base_id) + if not cached: + raise http_exception( + 404, f"no saved result for base scenario {base_id}", base_id + ) + + info_base = await urban_api_client.get_scenario_info(base_id, token) + if cached.get("meta", {}).get("scenario_updated_at") != info_base.get("updated_at"): + raise http_exception( + 404, f"stale cache for base scenario {base_id}, recompute required", base_id + ) + + data: dict = cached.get("data", {}) + values_dict = data.get("result") + values_table = data.get("social_values_table") + + return ValuesOrientedResponseTablesSchema( + base_scenario_id = base_id, + values_table = values_dict, + services_type_deficit = values_table, + ) + + +@router.get("/get_from_cache/{method_name}/{project_scenario_id}", + summary="Get raw cached data by method and owner id", + description=( + "Reads the latest cached JSON payload for a given `method_name` and owner id. " + "For scenario-based methods the owner is a **scenario id**; for project-based " + "methods the owner is a **project id**." + ), + response_model=Union[FeatureCollectionModel, SocioEconomicMetricsResponseSchema]) +async def get_layer( + project_scenario_id: int, + method_name: str, + regional_scenario_id: int | None = Query( + default=None, + description="Required for social_economical_metrics (project-based).", + ), +): + if method_name == "social_economical_metrics": + if regional_scenario_id is None: + raise http_exception( + 400, + "regional_scenario_id is required for social_economical_metrics", + {"project_id": project_scenario_id}, + ) + + params_for_hash = { + "project_id": project_scenario_id, + "regional_scenario_id": regional_scenario_id, + } + phash = file_cache.params_hash(params_for_hash) + + cached = file_cache.load(method_name, project_scenario_id, phash) + if not cached: + raise http_exception( + 404, + "no saved result for this project + regional_scenario_id", + {"project_id": project_scenario_id, "regional_scenario_id": regional_scenario_id}, + ) + + results = cached["data"]["results"] + + return SocioEconomicMetricsResponseSchema(results=results) + + cached = file_cache.load_latest(method_name, project_scenario_id) + if not cached: + raise http_exception(404, "no saved result for this scenario", project_scenario_id) + + data = cached["data"] + + if method_name == "values_transformation": + return FeatureCollectionModel.model_validate(data) + + raise http_exception( + 400, + "Method not implemented", + method_name, + "Allowed methods: values_transformation, social_economical_metrics", + ) + +@router.get("/get_provisions/{scenario_id}", + summary="Get total provision values", + description=( + "Returns total provision values from the cached `territory_transformation` result for " + "the specified `scenario_id`. Depending on availability, the response contains:\n" + "- `provision_total_before` and `provision_total_after`, or\n" + "- only one of them if the other is not present." + ), + response_model=TerritoryTransformationResponseTablesSchema) +async def get_total_provisions(scenario_id: int): + cached = file_cache.load_latest("territory_transformation", scenario_id) + if not cached: + raise http_exception(404, "no saved result for this scenario", scenario_id) + + data: dict = cached["data"] + + before_dict = data.get("before", {}) or {} + after_dict = data.get("after", {}) or {} + + provision_before = { + k: 0.0 if v is None else float(v) + for k, v in (before_dict.get("provision_total_before") or {}).items() + } + + provision_after = { + k: 0.0 if v is None else float(v) + for k, v in (after_dict.get("provision_total_after") or {}).items() + } + + if provision_before and provision_after: + return TerritoryTransformationResponseTablesSchema( + provision_total_before = provision_before, + provision_total_after= provision_after, + ) + + if provision_before and not provision_after: + return TerritoryTransformationResponseTablesSchema(provision_total_before = provision_before) + + if provision_after and not provision_before: + return TerritoryTransformationResponseTablesSchema(provision_total_after= provision_after) + + raise http_exception(404, f"Result for scenario ID{scenario_id} not found") diff --git a/app/main.py b/app/main.py index 8ad1a16..e2374cb 100644 --- a/app/main.py +++ b/app/main.py @@ -1,55 +1,37 @@ -from contextlib import asynccontextmanager - -from app.api.routers.effects import effects_controller -from app.api.routers.effects.task_schema import TaskSchema -from app.api.utils.const import API_DESCRIPTION, API_TITLE from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.gzip import GZipMiddleware from fastapi.responses import RedirectResponse -controllers = [effects_controller] - -async def on_startup(): - for c in controllers: - c.on_startup() - -async def on_shutdown(): - ... - -@asynccontextmanager -async def lifespan(router : FastAPI): - await on_startup() - yield - await on_shutdown() +from app.common.exceptions.exception_handler import ExceptionHandlerMiddleware +from app.effects_api.modules.task_service import lifespan +from app.effects_api.tasks_controller import router as tasks_router +from app.system_router.system_controller import system_router +# TODO add app version app = FastAPI( - title=API_TITLE, - description=API_DESCRIPTION, - lifespan=lifespan + title="Effects API", + description="API for calculating effects of territory transformation with BlocksNet library", + lifespan=lifespan, ) -# disable cors +origins = ["*"] + app.add_middleware( CORSMiddleware, - allow_origin_regex='http://.*', + allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) app.add_middleware(GZipMiddleware, minimum_size=100) +app.add_middleware(ExceptionHandlerMiddleware) + @app.get("/", include_in_schema=False) async def read_root(): - return RedirectResponse('/docs') - -@app.get('/tasks', tags=['Tasks']) -def get_tasks() -> dict[int, TaskSchema]: - return effects_controller.tasks + return RedirectResponse("/docs") -@app.get('/task_status', tags=['Tasks']) -def get_task_status(task_id : int) -> TaskSchema: - return effects_controller.tasks[task_id] -for controller in controllers: - app.include_router(controller.router) \ No newline at end of file +app.include_router(tasks_router) +app.include_router(system_router) diff --git a/app/prometheus/metrics.py b/app/prometheus/metrics.py new file mode 100644 index 0000000..75b2085 --- /dev/null +++ b/app/prometheus/metrics.py @@ -0,0 +1,154 @@ +from prometheus_client import Counter, Histogram, Gauge + +from app.prometheus.task_metrics import TaskMetrics + +CACHE_INVALIDATION_EVENTS_TOTAL = Counter( + "effects_cache_invalidation_events_total", + "Total number of cache invalidation events received", +) + +CACHE_INVALIDATION_SUCCESS_TOTAL = Counter( + "effects_cache_invalidation_success_total", + "Total number of cache invalidation events successfully processed", +) + +CACHE_INVALIDATION_ERROR_TOTAL = Counter( + "effects_cache_invalidation_error_total", + "Total number of cache invalidation events failed during processing", +) + +CACHE_INVALIDATION_DURATION_SECONDS = Histogram( + "effects_cache_invalidation_duration_seconds", + "Duration of cache invalidation processing", + buckets=(0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10), +) + +EFFECTS_TERRITORY_TRANSFORMATION_TOTAL = Counter( + "effects_territory_transformation_total", + "Total number of territory_transformation calls", +) + +EFFECTS_TERRITORY_TRANSFORMATION_ERROR_TOTAL = Counter( + "effects_territory_transformation_error_total", + "Total number of failed territory_transformation calls", +) + +EFFECTS_TERRITORY_TRANSFORMATION_DURATION_SECONDS = Histogram( + "effects_territory_transformation_duration_seconds", + "Duration of territory_transformation execution", + buckets=(1, 2, 5, 10, 30, 60, 120, 300, 600, 900, 1200), +) + +EFFECTS_TASKS_CREATED_TOTAL = Counter( + "effects_tasks_created_total", + "Total number of tasks created", + labelnames=("method",), +) + +EFFECTS_TASKS_CACHE_HIT_TOTAL = Counter( + "effects_tasks_cache_hit_total", + "Total number of tasks served from cache (no execution needed)", + labelnames=("method",), +) + +EFFECTS_TASKS_ENQUEUED_TOTAL = Counter( + "effects_tasks_enqueued_total", + "Total number of tasks enqueued for execution", + labelnames=("method",), +) + +EFFECTS_TASKS_STARTED_TOTAL = Counter( + "effects_tasks_started_total", + "Total number of tasks started execution", + labelnames=("method",), +) + +EFFECTS_TASKS_FINISHED_TOTAL = Counter( + "effects_tasks_finished_total", + "Total number of tasks finished execution", + labelnames=("method", "status"), +) + +EFFECTS_TASK_DURATION_SECONDS = Histogram( + "effects_task_duration_seconds", + "Task execution duration in seconds", + labelnames=("method",), + buckets=(0.1, 0.2, 0.5, 1, 2, 5, 10, 30, 60, 120, 300, 600), +) + +EFFECTS_TASKS_QUEUE_SIZE = Gauge( + "effects_tasks_queue_size", + "Current number of tasks waiting in queue", +) + +def bind_queue_metrics(queue) -> None: + """Bind runtime queue instance to observable metrics.""" + EFFECTS_TASKS_QUEUE_SIZE.set_function(queue.qsize) + +EFFECTS_TASKS_RUNNING = Gauge( + "effects_tasks_running", + "Current number of tasks running", +) + +# --- Service entrypoints metrics --- + +EFFECTS_VALUES_TRANSFORMATION_TOTAL = Counter( + "effects_values_transformation_total", + "Total number of values_transformation calls", +) + +EFFECTS_VALUES_TRANSFORMATION_ERROR_TOTAL = Counter( + "effects_values_transformation_error_total", + "Total number of failed values_transformation calls", +) + +EFFECTS_VALUES_TRANSFORMATION_DURATION_SECONDS = Histogram( + "effects_values_transformation_duration_seconds", + "Duration of values_transformation execution", + buckets=(1, 2, 5, 10, 30, 60, 120, 300, 600), +) + +EFFECTS_VALUES_ORIENTED_REQUIREMENTS_TOTAL = Counter( + "effects_values_oriented_requirements_total", + "Total number of values_oriented_requirements calls", +) + +EFFECTS_VALUES_ORIENTED_REQUIREMENTS_ERROR_TOTAL = Counter( + "effects_values_oriented_requirements_error_total", + "Total number of failed values_oriented_requirements calls", +) + +EFFECTS_VALUES_ORIENTED_REQUIREMENTS_DURATION_SECONDS = Histogram( + "effects_values_oriented_requirements_duration_seconds", + "Duration of values_oriented_requirements execution", + buckets=(1, 2, 5, 10, 30, 60, 120, 300, 600), +) + +EFFECTS_SOCIO_ECONOMICAL_METRICS_TOTAL = Counter( + "effects_social_economical_metrics_total", + "Total number of evaluate_social_economical_metrics calls", +) + +EFFECTS_SOCIO_ECONOMICAL_METRICS_ERROR_TOTAL = Counter( + "effects_social_economical_metrics_error_total", + "Total number of failed evaluate_social_economical_metrics calls", +) + +EFFECTS_SOCIO_ECONOMICAL_METRICS_DURATION_SECONDS = Histogram( + "effects_social_economical_metrics_duration_seconds", + "Duration of evaluate_social_economical_metrics execution", + buckets=(1, 2, 5, 10, 30, 60, 120, 300, 600), +) + +def get_task_metrics() -> TaskMetrics: + """Create TaskMetrics facade.""" + return TaskMetrics( + created_total=EFFECTS_TASKS_CREATED_TOTAL, + cache_hit_total=EFFECTS_TASKS_CACHE_HIT_TOTAL, + enqueued_total=EFFECTS_TASKS_ENQUEUED_TOTAL, + started_total=EFFECTS_TASKS_STARTED_TOTAL, + finished_total=EFFECTS_TASKS_FINISHED_TOTAL, + duration_seconds=EFFECTS_TASK_DURATION_SECONDS, + running=EFFECTS_TASKS_RUNNING, + queue_size=EFFECTS_TASKS_QUEUE_SIZE, + ) diff --git a/app/prometheus/server.py b/app/prometheus/server.py new file mode 100644 index 0000000..9db4761 --- /dev/null +++ b/app/prometheus/server.py @@ -0,0 +1,17 @@ +from prometheus_client import start_http_server + +_server = None + + +def start_metrics_server(port: int = 8000) -> None: + """Start Prometheus metrics HTTP server.""" + global _server + _server = start_http_server(port) + + +def stop_metrics_server() -> None: + """Stop Prometheus metrics HTTP server.""" + global _server + if _server is not None: + _server.shutdown() + _server = None diff --git a/app/prometheus/task_metrics.py b/app/prometheus/task_metrics.py new file mode 100644 index 0000000..e5d45f5 --- /dev/null +++ b/app/prometheus/task_metrics.py @@ -0,0 +1,60 @@ +"""Task metrics reporting utilities.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Callable + +from prometheus_client import Counter, Gauge, Histogram + + +@dataclass(frozen=True) +class TaskMetrics: + """ + Aggregates Prometheus metrics related to tasks and provides + convenient reporting methods. + + This class hides Prometheus primitives from business logic. + """ + + created_total: Counter + cache_hit_total: Counter + enqueued_total: Counter + started_total: Counter + finished_total: Counter + duration_seconds: Histogram + running: Gauge + queue_size: Gauge + + def bind_queue_size(self, qsize_getter: Callable[[], int]) -> None: + """ + Bind queue size metric to a callable that returns current size. + + Args: + qsize_getter: Callable returning current queue size. + """ + self.queue_size.set_function(qsize_getter) + + def on_created(self, method: str) -> None: + self.created_total.labels(method=method).inc() + + def on_cache_hit(self, method: str) -> None: + self.cache_hit_total.labels(method=method).inc() + + def on_enqueued(self, method: str) -> None: + self.enqueued_total.labels(method=method).inc() + + def on_started(self, method: str) -> None: + self.started_total.labels(method=method).inc() + self.running.inc() + + def on_finished_success(self, method: str) -> None: + self.finished_total.labels(method=method, status="success").inc() + self.running.dec() + + def on_finished_failed(self, method: str) -> None: + self.finished_total.labels(method=method, status="failed").inc() + self.running.dec() + + def observe_duration(self, method: str, seconds: float) -> None: + self.duration_seconds.labels(method=method).observe(seconds) diff --git a/app/system_router/__init__.py b/app/system_router/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/system_router/system_controller.py b/app/system_router/system_controller.py new file mode 100644 index 0000000..0177892 --- /dev/null +++ b/app/system_router/system_controller.py @@ -0,0 +1,37 @@ +from fastapi import APIRouter +from fastapi.responses import FileResponse + +from app.common.exceptions.http_exception_wrapper import http_exception +from app.dependencies import absolute_app_path, config + +LOGS_PATH = absolute_app_path / f"{config.get('LOG_NAME')}" +system_router = APIRouter(prefix="/system", tags=["System"]) + + +# TODO use structlog instead of loguru +@system_router.get("/logs") +async def get_logs(): + """ + Get logs file from app + """ + + try: + return FileResponse( + LOGS_PATH, + media_type="application/octet-stream", + filename=f"effects.log", + ) + except FileNotFoundError as e: + raise http_exception( + status_code=404, + msg="Log file not found", + _input={"log_path": LOGS_PATH, "log_file_name": config.get("LOG_NAME")}, + _detail={"error": repr(e)}, + ) from e + except Exception as e: + raise http_exception( + status_code=500, + msg="Internal server error during reading logs", + _input={"log_path": LOGS_PATH, "log_file_name": config.get("LOG_NAME")}, + _detail={"error": repr(e)}, + ) from e diff --git a/catboost_model.cbm b/catboost_model.cbm new file mode 100644 index 0000000..ef6bcec Binary files /dev/null and b/catboost_model.cbm differ diff --git a/docker-compose.actions.yml b/docker-compose.actions.yml new file mode 100644 index 0000000..67e35da --- /dev/null +++ b/docker-compose.actions.yml @@ -0,0 +1,12 @@ +services: + object_effects: + image: ${IMAGE} + container_name: ${CONTAINER_NAME} + ports: + - 5100:80 + volumes: + - /var/essdata/effects/__effects_cache__:/app/__effects_cache__ + env_file: + - .env.development + restart: always + diff --git a/docker-compose.yml b/docker-compose.yml index bfeefdb..6db91d2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,9 +2,11 @@ version: '3.4' services: EffectsAPI: - image: effectsapi:dev + image: effectsapi build: context: . dockerfile: ./Dockerfile ports: - - 80:80 \ No newline at end of file + - "80:80" + volumes: + - ./__effects_cache__:/app/__effects_cache__ \ No newline at end of file diff --git a/pip.conf b/pip.conf new file mode 100644 index 0000000..d16d3ff --- /dev/null +++ b/pip.conf @@ -0,0 +1,4 @@ +[global] +index-url=http://10.32.11.13:3141/root/pypi/+simple/ +trusted-host=10.32.11.13 +timeout=120 diff --git a/requirements.txt b/requirements.txt index 77ca512..5ec80ef 100644 Binary files a/requirements.txt and b/requirements.txt differ