From 8143ed2306450464f2962f0b6966617daa86a626 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Tue, 27 May 2025 16:30:07 +0200 Subject: [PATCH 01/38] first tries for geonetwork datadir checker useless ressources --- geordash/checks/gn_datadir.py | 159 ++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 geordash/checks/gn_datadir.py diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py new file mode 100644 index 0000000..9c730b1 --- /dev/null +++ b/geordash/checks/gn_datadir.py @@ -0,0 +1,159 @@ +#!/bin/env python3 +# -*- coding: utf-8 -*- +# vim: ts=4 sw=4 et + +from sqlalchemy import create_engine, MetaData, select, Column, String, Integer, Text +from sqlalchemy.dialects.postgresql import array +from sqlalchemy.engine import URL +from sqlalchemy.ext.automap import automap_base +from sqlalchemy.exc import NoResultFound, OperationalError +from sqlalchemy.orm import sessionmaker +from sqlalchemy.ext.declarative import declarative_base +import glob +from pathlib import Path +Base = declarative_base() + +# Define the Metadata model (example schema of a GeoNetwork metadata table) +class Metadata(Base): + __tablename__ = "metadata" + __table_args__ = {"schema": "geonetwork"} + id = Column(Integer, primary_key=True) + uuid = Column(String, unique=True) + data = Column(Text) # Metadata content (e.g., XML or JSON) + schemaid = Column(String) # Metadata schema (e.g., ISO 19115) + isharvested = Column(Integer) + +def get_folder_size(folder): + return ByteSize(sum(file.stat().st_size for file in Path(folder).rglob('*'))) + + +class ByteSize(int): + _KB = 1024 + _suffixes = 'B', 'KB', 'MB', 'GB', 'PB' + + def __new__(cls, *args, **kwargs): + return super().__new__(cls, *args, **kwargs) + + def __init__(self, *args, **kwargs): + self.bytes = self.B = int(self) + self.kilobytes = self.KB = self / self._KB ** 1 + self.megabytes = self.MB = self / self._KB ** 2 + self.gigabytes = self.GB = self / self._KB ** 3 + self.petabytes = self.PB = self / self._KB ** 4 + *suffixes, last = self._suffixes + suffix = next(( + suffix + for suffix in suffixes + if 1 < getattr(self, suffix) < self._KB + ), last) + self.readable = suffix, getattr(self, suffix) + + super().__init__() + + def __str__(self): + return self.__format__('.2f') + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, super().__repr__()) + + def __format__(self, format_spec): + suffix, val = self.readable + return '{val:{fmt}} {suf}'.format(val=val, fmt=format_spec, suf=suffix) + + def __sub__(self, other): + return self.__class__(super().__sub__(other)) + + def __add__(self, other): + return self.__class__(super().__add__(other)) + + def __mul__(self, other): + return self.__class__(super().__mul__(other)) + + def __rsub__(self, other): + return self.__class__(super().__sub__(other)) + + def __radd__(self, other): + return self.__class__(super().__add__(other)) + + def __rmul__(self, other): + return self.__class__(super().__rmul__(other)) +conf = { + 'pgsqlUser': 'georchestra', + 'pgsqlHost': '127.0.0.1', + 'pgsqlPort': '5432', + 'pgsqlPassword': 'georchestra', + 'pgsqlDatabase': 'georchestra', + 'geonetworkSchema': 'geonetwork' +} + +# solves conflicts in relationship naming ? +def name_for_collection_relationship(base, local_cls, referred_cls, constraint): + name = referred_cls.__name__.lower() + local_table = local_cls.__table__ + # print("local_cls={}, local_table={}, referred_cls={}, will return name={}, constraint={}".format(local_cls, local_table, referred_cls, name, constraint)) + if name in local_table.columns: + newname = name + "_" + print("Already detected name %s present. using %s" % (name, newname)) + return newname + return name + +class GeonetworkDatadirChecker: + def __init__(self, conf): + url = URL.create( + drivername="postgresql", + username=conf.get("pgsqlUser"), + host=conf.get("pgsqlHost"), + port=conf.get("pgsqlPort"), + password=conf.get("pgsqlPassword"), + database=conf.get("pgsqlDatabase"), + ) + + engine = create_engine(url) + + # Perform database reflection to analyze tables and relationships + m = MetaData(schema=conf.get("geonetworkSchema")) + Base = automap_base(metadata=m) + Base.prepare( + autoload_with=engine, + name_for_collection_relationship=name_for_collection_relationship, + ) + + self.sessionm = sessionmaker(bind=engine) + self.sessiono = self.sessionm() + + self.gnmetadatas = self.session().query(Metadata).all() + # self.gnmetadatas.sort(key=lambda x: x.id) + self.meta = [] + for foldermeta in glob.glob("/mnt/geonetwork_datadir/data/metadata_data/*/*"): + idmeta = foldermeta.split("/")[-1] + existing_index = 0 + for (index, item) in enumerate(self.gnmetadatas): + if item.id == int(idmeta): + existing_index =index + break + if existing_index: + continue + else: + # append useless folder + self.meta.append(foldermeta) + print(self.meta) + total_could_be_deleted = 0 + for path in self.meta: + total_could_be_deleted += get_folder_size(path) + + print("In total " + str(total_could_be_deleted) + " on "+ str(get_folder_size("/mnt/geonetwork_datdadir")) +" bytes could be deleted") + + def session(self): + try: + self.sessiono.execute(select(1)) + except OperationalError: + print("Reconnecting to the database...") + self.sessiono = self.sessionm() + return self.sessiono + + +def check_configs(): + """Check geonetwork datadirs.""" + return False + +GeonetworkDatadirChecker(conf) \ No newline at end of file From 4118c63e4369aed545af7c21b1044caa879d71e3 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 25 Jun 2025 17:42:39 +0200 Subject: [PATCH 02/38] continue work for adding geonetwork datadir check inside gaia --- docker/docker-compose.yml | 2 + geordash/__init__.py | 2 + geordash/admin.py | 15 +++++++ geordash/checks/gn_datadir.py | 15 +++---- .../templates/admin/geonetwork_datadir.html | 40 +++++++++++++++++++ geordash/templates/dashboard/header.tmpl | 1 + geordash/templates/dashboard/home.html | 9 +++++ 7 files changed, 77 insertions(+), 7 deletions(-) create mode 100644 geordash/templates/admin/geonetwork_datadir.html diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index efe279d..f4f55db 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -21,6 +21,7 @@ services: - ../config.py.example:/geordash/config.py:ro - ../geordash/celeryconfig.py.example:/geordash/geordash/celeryconfig.py:ro - /etc/ssl/certs/ca-certificates.crt:/etc/ssl/certs/ca-certificates.crt:ro + - ../:/geordash #- ../:/geordash # need to add path of geonetwork and geoserver datadir in here @@ -74,6 +75,7 @@ services: - ../config.py.example:/geordash/config.py:ro - ../geordash/celeryconfig.py.example:/geordash/geordash/celeryconfig.py:ro - /etc/ssl/certs/ca-certificates.crt:/etc/ssl/certs/ca-certificates.crt:ro + - ../:/geordash #- ../:/geordash # need to add path of geonetwork and geoserver datadir in here diff --git a/geordash/__init__.py b/geordash/__init__.py index aabae75..692b35e 100644 --- a/geordash/__init__.py +++ b/geordash/__init__.py @@ -11,6 +11,7 @@ from geordash.georchestraconfig import GeorchestraConfig from geordash.result_backend.redisbackend import RedisClient from geordash.checks.mapstore import MapstoreChecker +from geordash.checks.gn_datadir import GeonetworkDatadirChecker from geordash.decorators import is_superuser from config import url as redisurl import threading @@ -85,6 +86,7 @@ def inject_globals(): app.extensions["conf"] = conf app.extensions["owscache"] = OwsCapCache(conf, app) app.extensions["msc"] = MapstoreChecker(conf) + app.extensions["gndc"] = GeonetworkDatadirChecker(conf) app.extensions["rcli"] = RedisClient(redisurl) from . import views, api, admin, dashboard diff --git a/geordash/admin.py b/geordash/admin.py index 3d3c5ee..6c2ea31 100644 --- a/geordash/admin.py +++ b/geordash/admin.py @@ -51,6 +51,21 @@ def geonetwork(): p["xurl"] = url_for("dashboard.csw", portal=p["uuid"]) return render_template("admin/geonetwork.html", portals=portals) +@admin_bp.route("/geonetwork/datadir") +@check_role(role="GN_ADMIN") +def geonetwork_datadir(): + localgn = app.extensions["conf"].get("localgn", "urls") + useless_ressource = app.extensions["gndc"].get_metauseless_list() + + if type(useless_ressource) != list: + return make_response( + jsonify( + {"error": f"an error occured when fetching subportals: got {useless_ressource}"}, + 404, + ) + ) + + return render_template("admin/geonetwork_datadir.html", useless_ressource=useless_ressource) @admin_bp.route("/geoserver") @check_role(role="ADMINISTRATOR") diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 9c730b1..b7c41c0 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -11,6 +11,8 @@ from sqlalchemy.ext.declarative import declarative_base import glob from pathlib import Path +from flask import current_app as app + Base = declarative_base() # Define the Metadata model (example schema of a GeoNetwork metadata table) @@ -136,11 +138,14 @@ def __init__(self, conf): else: # append useless folder self.meta.append(foldermeta) - print(self.meta) + + def get_metauseless_list(self): + return self.meta + + def process_size(self): total_could_be_deleted = 0 for path in self.meta: total_could_be_deleted += get_folder_size(path) - print("In total " + str(total_could_be_deleted) + " on "+ str(get_folder_size("/mnt/geonetwork_datdadir")) +" bytes could be deleted") def session(self): @@ -152,8 +157,4 @@ def session(self): return self.sessiono -def check_configs(): - """Check geonetwork datadirs.""" - return False - -GeonetworkDatadirChecker(conf) \ No newline at end of file +# GeonetworkDatadirChecker(conf) \ No newline at end of file diff --git a/geordash/templates/admin/geonetwork_datadir.html b/geordash/templates/admin/geonetwork_datadir.html new file mode 100644 index 0000000..a1e620f --- /dev/null +++ b/geordash/templates/admin/geonetwork_datadir.html @@ -0,0 +1,40 @@ +{% set title = 'admin geonetwork' %} +{% include 'header.tmpl' %} + + {% block scripts %} + + {% endblock %} + + {% block content %} +
+

Geonetwork datadir Checker

+
+List of the useless ressources in geonetwork datadir: +
+ +
+
+ {% endblock %} + + diff --git a/geordash/templates/dashboard/header.tmpl b/geordash/templates/dashboard/header.tmpl index b0596e5..4283997 100644 --- a/geordash/templates/dashboard/header.tmpl +++ b/geordash/templates/dashboard/header.tmpl @@ -91,6 +91,7 @@ Non connecté
  • {% if superuser %} {{ render_dropdown_item('dashboard.admin.geonetwork', 'Liste des sous-portails') }} + {{ render_dropdown_item('dashboard.admin.geonetwork_datadir', 'Datadir checker') }} {% endif %} diff --git a/geordash/templates/dashboard/home.html b/geordash/templates/dashboard/home.html index aaa6f27..262d72f 100644 --- a/geordash/templates/dashboard/home.html +++ b/geordash/templates/dashboard/home.html @@ -103,6 +103,15 @@
    Mapstore contexts
    +
    +
    +
    +
    Geonetwork Datadir checker
    +

    Placeholder

    +

    Job never ran

    +
    +
    +
    {% endblock %} From 711c282a78533d9921210529bd559b97290f2892 Mon Sep 17 00:00:00 2001 From: mmohad Date: Thu, 26 Jun 2025 16:12:06 +0200 Subject: [PATCH 03/38] feat(datadir): some desgin to show result of datadir check --- docker/docker-compose.yml | 10 ++++------ geordash/admin.py | 5 +++-- geordash/checks/gn_datadir.py | 8 ++++++-- geordash/templates/admin/geonetwork_datadir.html | 13 +++++++------ 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index f4f55db..c1fad47 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -12,9 +12,8 @@ services: environment: - REDIS_REPLICATION_MODE=master gaia-back: - build: - context: ../ - dockerfile: ./docker/Dockerfile_flask + image: "docker-gaia-back" + volumes: - ../../datadir:/etc/georchestra - ../gunicorn.conf.py.example:/geordash/gunicorn.conf.py:ro @@ -66,9 +65,8 @@ services: gaia-celery: - build: - context: ../ - dockerfile: ./docker/Dockerfile_celery + image: "docker-gaia-celery:latest" + volumes: - ../../datadir:/etc/georchestra - ../gunicorn.conf.py.example:/geordash/gunicorn.conf.py:ro diff --git a/geordash/admin.py b/geordash/admin.py index 6c2ea31..e59dd46 100644 --- a/geordash/admin.py +++ b/geordash/admin.py @@ -55,7 +55,8 @@ def geonetwork(): @check_role(role="GN_ADMIN") def geonetwork_datadir(): localgn = app.extensions["conf"].get("localgn", "urls") - useless_ressource = app.extensions["gndc"].get_metauseless_list() + useless_ressource = [{"paht":f,"size":app.extensions["gndc"].process_size(f)} for f in app.extensions["gndc"].get_metauseless_list()] + total_process_size = app.extensions["gndc"].all_process_size() if type(useless_ressource) != list: return make_response( @@ -65,7 +66,7 @@ def geonetwork_datadir(): ) ) - return render_template("admin/geonetwork_datadir.html", useless_ressource=useless_ressource) + return render_template("admin/geonetwork_datadir.html", useless_ressource=useless_ressource, total_process_size=total_process_size) @admin_bp.route("/geoserver") @check_role(role="ADMINISTRATOR") diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index b7c41c0..004c579 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -142,11 +142,15 @@ def __init__(self, conf): def get_metauseless_list(self): return self.meta - def process_size(self): + def all_process_size(self): total_could_be_deleted = 0 for path in self.meta: total_could_be_deleted += get_folder_size(path) - print("In total " + str(total_could_be_deleted) + " on "+ str(get_folder_size("/mnt/geonetwork_datdadir")) +" bytes could be deleted") + return "In total " + str(total_could_be_deleted) + " on "+ str(get_folder_size("/mnt/geonetwork_datdadir")) +" bytes could be deleted" + + + def process_size(self, path): + return get_folder_size(path) def session(self): try: diff --git a/geordash/templates/admin/geonetwork_datadir.html b/geordash/templates/admin/geonetwork_datadir.html index a1e620f..82c9e19 100644 --- a/geordash/templates/admin/geonetwork_datadir.html +++ b/geordash/templates/admin/geonetwork_datadir.html @@ -4,16 +4,16 @@ {% block scripts %} {% endblock %} - + {% block content %}

    Geonetwork datadir Checker

    +

    List of the useless ressources in geonetwork datadir:
    From 09587e1ea17a17fec19437ddca552c6890154891 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Thu, 26 Jun 2025 16:42:03 +0200 Subject: [PATCH 04/38] add debug function entry point --- geordash/dashboard.py | 4 ++++ geordash/georchestraconfig.py | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/geordash/dashboard.py b/geordash/dashboard.py index 54a3145..03b6db1 100644 --- a/geordash/dashboard.py +++ b/geordash/dashboard.py @@ -66,6 +66,10 @@ def get_rescontent_from_resid(restype, resid): def home(): return render_template("home.html") +@dash_bp.route("/debug") +def debug(): + # app.logger.error(app.extensions["conf"]) + return app.extensions["conf"].tostr() @dash_bp.route("/my-metadata") @check_role(role="GN_EDITOR") diff --git a/geordash/georchestraconfig.py b/geordash/georchestraconfig.py index b131ec3..1e7ff71 100644 --- a/geordash/georchestraconfig.py +++ b/geordash/georchestraconfig.py @@ -7,7 +7,7 @@ from os import getenv import json import re - +import pprint class GeorchestraConfig: def __init__(self): @@ -51,6 +51,20 @@ def __init__(self): # safe default value self.sections["urls"]["localgs"] = "geoserver" + def tostr(self): + # pp = pprint.PrettyPrinter(indent=4) + # return pp.pprint(self.sections) + + str = "" + for key in self.sections: + str += key + ":\r\n
    " + for key2 in self.sections[key]: + str += " \t " + key2 + " : " + str+= " \t " + self.sections[key][key2] + " = " + self.get(key2, section=key) + "\r\n
    " + print(type(str)) + print(f"Keys in string: {str}") + return str + def get(self, key, section="default"): if section not in self.sections: return None From ee47cdd367d4e443cedaab06d2ea1f22fc5b9132 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Thu, 26 Jun 2025 17:17:22 +0200 Subject: [PATCH 05/38] add back the build in docker compose --- docker/docker-compose.yml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index c1fad47..f4d0b68 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -12,8 +12,10 @@ services: environment: - REDIS_REPLICATION_MODE=master gaia-back: - image: "docker-gaia-back" - + # image: "docker-gaia-back" + build: + context: ../ + dockerfile: ./docker/Dockerfile_flask volumes: - ../../datadir:/etc/georchestra - ../gunicorn.conf.py.example:/geordash/gunicorn.conf.py:ro @@ -65,7 +67,10 @@ services: gaia-celery: - image: "docker-gaia-celery:latest" + # image: "docker-gaia-celery:latest" + build: + context: ../ + dockerfile: ./docker/Dockerfile_celery volumes: - ../../datadir:/etc/georchestra From 1fa5c5d543f4ea7cae29475b0e9f2d65d09b7def Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 6 Aug 2025 15:31:19 +0200 Subject: [PATCH 06/38] rework WIP as a task the geonetwork datadir checker --- geordash/admin.py | 18 +-- geordash/celeryconfig.py.example | 5 + geordash/checks/gn_datadir.py | 109 ++++++++++++------ .../templates/admin/geonetwork_datadir.html | 34 +++--- geordash/views.py | 7 ++ 5 files changed, 110 insertions(+), 63 deletions(-) diff --git a/geordash/admin.py b/geordash/admin.py index e59dd46..46697e2 100644 --- a/geordash/admin.py +++ b/geordash/admin.py @@ -54,19 +54,11 @@ def geonetwork(): @admin_bp.route("/geonetwork/datadir") @check_role(role="GN_ADMIN") def geonetwork_datadir(): - localgn = app.extensions["conf"].get("localgn", "urls") - useless_ressource = [{"paht":f,"size":app.extensions["gndc"].process_size(f)} for f in app.extensions["gndc"].get_metauseless_list()] - total_process_size = app.extensions["gndc"].all_process_size() - - if type(useless_ressource) != list: - return make_response( - jsonify( - {"error": f"an error occured when fetching subportals: got {useless_ressource}"}, - 404, - ) - ) - - return render_template("admin/geonetwork_datadir.html", useless_ressource=useless_ressource, total_process_size=total_process_size) + all_jobs_for_gnconfigs = app.extensions["rcli"].get_taskids_by_taskname_and_args( + "geordash.checks.gn_datadir.check_gn_meta", [] + ) + #if type(all_jobs_for_gnconfigs) != NoneType: + return render_template("admin/geonetwork_datadir.html", previous_configs_jobs=all_jobs_for_gnconfigs) @admin_bp.route("/geoserver") @check_role(role="ADMINISTRATOR") diff --git a/geordash/celeryconfig.py.example b/geordash/celeryconfig.py.example index 3749cba..5c4e62c 100644 --- a/geordash/celeryconfig.py.example +++ b/geordash/celeryconfig.py.example @@ -68,6 +68,11 @@ beat_schedule = { # "args": [], # "schedule": crontab(minute=0, hour=1), # }, + "check-gn-metadatadir-every-night": { + "task": "geordash.checks.gn_datadir.check_gn_meta", + "args": [""], + "schedule": crontab(minute=55, hour=0), + }, } # otherwise scheduled hours is taken as UTC timezone = "Europe/Paris" diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 004c579..92baa87 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -2,6 +2,17 @@ # -*- coding: utf-8 -*- # vim: ts=4 sw=4 et +import requests +from requests.exceptions import ReadTimeout + +from celery import shared_task +from celery import Task +from celery import group + +from flask import current_app as app +from geordash.utils import find_localmduuid, unmunge, objtype +from geordash.logwrap import get_logger + from sqlalchemy import create_engine, MetaData, select, Column, String, Integer, Text from sqlalchemy.dialects.postgresql import array from sqlalchemy.engine import URL @@ -11,7 +22,6 @@ from sqlalchemy.ext.declarative import declarative_base import glob from pathlib import Path -from flask import current_app as app Base = declarative_base() @@ -101,7 +111,7 @@ def name_for_collection_relationship(base, local_cls, referred_cls, constraint): class GeonetworkDatadirChecker: def __init__(self, conf): - url = URL.create( + self.url = URL.create( drivername="postgresql", username=conf.get("pgsqlUser"), host=conf.get("pgsqlHost"), @@ -109,8 +119,10 @@ def __init__(self, conf): password=conf.get("pgsqlPassword"), database=conf.get("pgsqlDatabase"), ) - - engine = create_engine(url) + def connectdb(self): + engine = create_engine(self.url) + self.sessionm = sessionmaker(bind=engine) + self.sessiono = self.sessionm() # Perform database reflection to analyze tables and relationships m = MetaData(schema=conf.get("geonetworkSchema")) @@ -119,38 +131,8 @@ def __init__(self, conf): autoload_with=engine, name_for_collection_relationship=name_for_collection_relationship, ) - - self.sessionm = sessionmaker(bind=engine) - self.sessiono = self.sessionm() - - self.gnmetadatas = self.session().query(Metadata).all() - # self.gnmetadatas.sort(key=lambda x: x.id) - self.meta = [] - for foldermeta in glob.glob("/mnt/geonetwork_datadir/data/metadata_data/*/*"): - idmeta = foldermeta.split("/")[-1] - existing_index = 0 - for (index, item) in enumerate(self.gnmetadatas): - if item.id == int(idmeta): - existing_index =index - break - if existing_index: - continue - else: - # append useless folder - self.meta.append(foldermeta) - - def get_metauseless_list(self): - return self.meta - - def all_process_size(self): - total_could_be_deleted = 0 - for path in self.meta: - total_could_be_deleted += get_folder_size(path) - return "In total " + str(total_could_be_deleted) + " on "+ str(get_folder_size("/mnt/geonetwork_datdadir")) +" bytes could be deleted" - - - def process_size(self, path): - return get_folder_size(path) + def request_metadata(self): + return self.session().query(Metadata).all() def session(self): try: @@ -159,6 +141,61 @@ def session(self): print("Reconnecting to the database...") self.sessiono = self.sessionm() return self.sessiono + def closedb(self): + self.sessiono.close() + self.sessionm.close_all() + +def all_process_size(meta): + total_could_be_deleted = 0 + for path in meta: + total_could_be_deleted += get_folder_size(path) + return "In total " + str(total_could_be_deleted) + " on "+ str(get_folder_size("/mnt/geonetwork_datdadir")) +" bytes could be deleted" + +def process_size(path): + return get_folder_size(path) + +def session(sessiono, sessionm): + try: + sessiono.execute(select(1)) + except OperationalError: + print("Reconnecting to the database...") + sessiono = sessionm() + return sessiono + +@shared_task() +def check_gn_meta(): + get_logger("CheckGNDatadir").debug("Start gn datadir checker") + geonetworkdatadirchecker = app.extensions["gndc"] + geonetworkdatadirchecker.connectdb() + + gnmetadatas = geonetworkdatadirchecker.request_metadata() + # self.gnmetadatas.sort(key=lambda x: x.id) + get_logger("CheckGNDatadir").debug("pouet1") + meta = [] + total_could_be_deleted = 0 + for foldermeta in glob.glob("/mnt/geonetwork_datadir/data/metadata_data/*/*"): + idmeta = foldermeta.split("/")[-1] + get_logger("CheckGNDatadir").debug("pouet "+foldermeta) + existing_index = 0 + + for (index, item) in enumerate(gnmetadatas): + if item.id == int(idmeta): + existing_index = index + break + if existing_index: + continue + else: + # append useless folder + meta.append([foldermeta, str(get_folder_size(foldermeta))]) + total_could_be_deleted+=get_folder_size(foldermeta) + get_logger("CheckGNDatadir").debug("pouet aie aie aie") + geonetworkdatadirchecker.closedb() + get_logger("CheckGNDatadir").debug("finish gn datadir checker") + if not len(meta): + meta.append("No result") + else: + meta.append(["Total",str(total_could_be_deleted)]) + return meta # GeonetworkDatadirChecker(conf) \ No newline at end of file diff --git a/geordash/templates/admin/geonetwork_datadir.html b/geordash/templates/admin/geonetwork_datadir.html index 82c9e19..d370151 100644 --- a/geordash/templates/admin/geonetwork_datadir.html +++ b/geordash/templates/admin/geonetwork_datadir.html @@ -3,28 +3,29 @@ {% block scripts %} {% endblock %} + {% block content %}

    Geonetwork datadir Checker

    + +
    + + +

    List of the useless ressources in geonetwork datadir: @@ -36,6 +37,11 @@

    Geonetwork datadir Checker

    data-search="true">
    +

    Problems

    +
    +
    +

    Previous

    +

    {% endblock %} diff --git a/geordash/views.py b/geordash/views.py index fdf5d59..0c3c7c9 100644 --- a/geordash/views.py +++ b/geordash/views.py @@ -320,3 +320,10 @@ def check_cswservice(url): "geordash.checks.csw.check_catalog", [url], groupresult.id ) return {"result_id": groupresult.id} + +@tasks_bp.route("/check/gndatadir/result.json") +def check_gndatadir(): + # metalist = geordash.checks.gn_datadir.check_gn_meta() + # return {"result_id": metalist} + result = geordash.checks.gn_datadir.check_gn_meta.delay() + return {"result_id": result.id} From a726891b619fef79dbf5b8b63a73071b49b91e5d Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 6 Aug 2025 20:00:26 +0200 Subject: [PATCH 07/38] continue but still not working --- geordash/celeryconfig.py.example | 18 ++------- geordash/checks/gn_datadir.py | 63 +++++++++++++++++--------------- geordash/views.py | 7 +++- 3 files changed, 43 insertions(+), 45 deletions(-) diff --git a/geordash/celeryconfig.py.example b/geordash/celeryconfig.py.example index 5c4e62c..1802f84 100644 --- a/geordash/celeryconfig.py.example +++ b/geordash/celeryconfig.py.example @@ -53,26 +53,16 @@ beat_schedule = { "args": ("wfs", "/geoserver/ows"), "schedule": crontab(minute=30, hour=0), }, - "check-gs-datadir-every-sunday": { - "task": "geordash.checks.gsd.gsdatadir", - "args": [], - "schedule": crontab(day_of_week='sunday',minute=30, hour=1), - }, "check-gn-metadatas-every-night": { "task": "geordash.checks.csw.check_catalog", "args": ["/geonetwork/srv/fre/csw"], "schedule": crontab(minute=45, hour=0), }, - # "check-all-mviewer": { - # "task": "geordash.checks.mviewer.check_all", - # "args": [], - # "schedule": crontab(minute=0, hour=1), - # }, "check-gn-metadatadir-every-night": { - "task": "geordash.checks.gn_datadir.check_gn_meta", - "args": [""], - "schedule": crontab(minute=55, hour=0), - }, + "task": "geordash.checks.gn_datadir.check_gn_meta", + "args": [""], + "schedule": crontab(minute=55, hour=0), +}, } # otherwise scheduled hours is taken as UTC timezone = "Europe/Paris" diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 92baa87..10a16ae 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -1,6 +1,7 @@ #!/bin/env python3 # -*- coding: utf-8 -*- # vim: ts=4 sw=4 et +import json import requests from requests.exceptions import ReadTimeout @@ -9,9 +10,14 @@ from celery import Task from celery import group +from geordash.logwrap import get_logger +from geordash.owscapcache import OwsCapCache + +from owslib.fes import PropertyIsEqualTo, And + from flask import current_app as app from geordash.utils import find_localmduuid, unmunge, objtype -from geordash.logwrap import get_logger + from sqlalchemy import create_engine, MetaData, select, Column, String, Integer, Text from sqlalchemy.dialects.postgresql import array @@ -20,9 +26,11 @@ from sqlalchemy.exc import NoResultFound, OperationalError from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.ext.declarative import DeclarativeMeta import glob from pathlib import Path + Base = declarative_base() # Define the Metadata model (example schema of a GeoNetwork metadata table) @@ -38,7 +46,6 @@ class Metadata(Base): def get_folder_size(folder): return ByteSize(sum(file.stat().st_size for file in Path(folder).rglob('*'))) - class ByteSize(int): _KB = 1024 _suffixes = 'B', 'KB', 'MB', 'GB', 'PB' @@ -111,7 +118,7 @@ def name_for_collection_relationship(base, local_cls, referred_cls, constraint): class GeonetworkDatadirChecker: def __init__(self, conf): - self.url = URL.create( + url = URL.create( drivername="postgresql", username=conf.get("pgsqlUser"), host=conf.get("pgsqlHost"), @@ -119,8 +126,8 @@ def __init__(self, conf): password=conf.get("pgsqlPassword"), database=conf.get("pgsqlDatabase"), ) - def connectdb(self): - engine = create_engine(self.url) + + engine = create_engine(url) self.sessionm = sessionmaker(bind=engine) self.sessiono = self.sessionm() @@ -131,8 +138,9 @@ def connectdb(self): autoload_with=engine, name_for_collection_relationship=name_for_collection_relationship, ) - def request_metadata(self): - return self.session().query(Metadata).all() + self.allmetadatas = self.session().query(Metadata).all() + # for (index, item) in enumerate(self.allmetadatas): + # get_logger("CheckGNDatadir").debug("test1") def session(self): try: @@ -141,9 +149,13 @@ def session(self): print("Reconnecting to the database...") self.sessiono = self.sessionm() return self.sessiono - def closedb(self): - self.sessiono.close() - self.sessionm.close_all() + + def refresh_meta_list(self): + self.allmetadatas = self.session().query(Metadata).all() + + def get_meta_list(self): + return self.allmetadatas + def all_process_size(meta): total_could_be_deleted = 0 @@ -154,28 +166,18 @@ def all_process_size(meta): def process_size(path): return get_folder_size(path) -def session(sessiono, sessionm): - try: - sessiono.execute(select(1)) - except OperationalError: - print("Reconnecting to the database...") - sessiono = sessionm() - return sessiono - -@shared_task() -def check_gn_meta(): +@shared_task(bind=True) +def check_gn_meta(self): get_logger("CheckGNDatadir").debug("Start gn datadir checker") - geonetworkdatadirchecker = app.extensions["gndc"] - geonetworkdatadirchecker.connectdb() - - gnmetadatas = geonetworkdatadirchecker.request_metadata() + gnmetadatas = app.extensions["gndc"].get_meta_list() # self.gnmetadatas.sort(key=lambda x: x.id) get_logger("CheckGNDatadir").debug("pouet1") - meta = [] + print(gnmetadatas) + meta = list() total_could_be_deleted = 0 for foldermeta in glob.glob("/mnt/geonetwork_datadir/data/metadata_data/*/*"): idmeta = foldermeta.split("/")[-1] - get_logger("CheckGNDatadir").debug("pouet "+foldermeta) + get_logger("CheckGNDatadir").debug("pouet " + foldermeta) existing_index = 0 for (index, item) in enumerate(gnmetadatas): @@ -187,15 +189,16 @@ def check_gn_meta(): else: # append useless folder meta.append([foldermeta, str(get_folder_size(foldermeta))]) - total_could_be_deleted+=get_folder_size(foldermeta) + total_could_be_deleted += get_folder_size(foldermeta) get_logger("CheckGNDatadir").debug("pouet aie aie aie") - geonetworkdatadirchecker.closedb() get_logger("CheckGNDatadir").debug("finish gn datadir checker") if not len(meta): - meta.append("No result") + meta.append("result") else: - meta.append(["Total",str(total_could_be_deleted)]) + meta.append(["Total", str(total_could_be_deleted)]) + return meta + # GeonetworkDatadirChecker(conf) \ No newline at end of file diff --git a/geordash/views.py b/geordash/views.py index 0c3c7c9..1e79b17 100644 --- a/geordash/views.py +++ b/geordash/views.py @@ -12,6 +12,7 @@ from geordash.utils import unmunge from geordash.checks.mapstore import check_res, check_configs, check_resources +from geordash.checks.gn_datadir import check_gn_meta from geordash.tasks.fetch_csw import get_records from geordash.tasks.gsdatadir import parse_gsdatadir import geordash.checks.ows @@ -325,5 +326,9 @@ def check_cswservice(url): def check_gndatadir(): # metalist = geordash.checks.gn_datadir.check_gn_meta() # return {"result_id": metalist} - result = geordash.checks.gn_datadir.check_gn_meta.delay() + + # app.extensions["gndc"].refresh_meta_list() + + result = check_gn_meta.delay() + return {"result_id": result.id} From 20971c2453d29e83d2e1dda986bd6617dff027a3 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 3 Sep 2025 09:33:08 +0200 Subject: [PATCH 08/38] continue testing --- geordash/checks/gn_datadir.py | 5 +++-- geordash/templates/admin/geonetwork_datadir.html | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 10a16ae..54e278a 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -169,7 +169,8 @@ def process_size(path): @shared_task(bind=True) def check_gn_meta(self): get_logger("CheckGNDatadir").debug("Start gn datadir checker") - gnmetadatas = app.extensions["gndc"].get_meta_list() + testeddd = app.extensions["gndc"] + gnmetadatas = testeddd.get_meta_list() # self.gnmetadatas.sort(key=lambda x: x.id) get_logger("CheckGNDatadir").debug("pouet1") print(gnmetadatas) @@ -197,7 +198,7 @@ def check_gn_meta(self): else: meta.append(["Total", str(total_could_be_deleted)]) - return meta + return ["1", "2", "3"] diff --git a/geordash/templates/admin/geonetwork_datadir.html b/geordash/templates/admin/geonetwork_datadir.html index d370151..6b01d73 100644 --- a/geordash/templates/admin/geonetwork_datadir.html +++ b/geordash/templates/admin/geonetwork_datadir.html @@ -22,7 +22,7 @@

    Geonetwork datadir Checker

    -
    +
    From 7902e434de3535cbcd04568c324e3e9ff8bbe6f5 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 3 Sep 2025 11:42:59 +0200 Subject: [PATCH 09/38] finaly something working --- geordash/checks/gn_datadir.py | 35 +++++++++++++------ geordash/static/js/script.js | 6 ++++ .../templates/admin/geonetwork_datadir.html | 16 +++------ geordash/views.py | 3 +- 4 files changed, 37 insertions(+), 23 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 54e278a..b946f1a 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -172,13 +172,12 @@ def check_gn_meta(self): testeddd = app.extensions["gndc"] gnmetadatas = testeddd.get_meta_list() # self.gnmetadatas.sort(key=lambda x: x.id) - get_logger("CheckGNDatadir").debug("pouet1") - print(gnmetadatas) - meta = list() + meta = dict() + meta["problems"] = list() total_could_be_deleted = 0 for foldermeta in glob.glob("/mnt/geonetwork_datadir/data/metadata_data/*/*"): idmeta = foldermeta.split("/")[-1] - get_logger("CheckGNDatadir").debug("pouet " + foldermeta) + get_logger("CheckGNDatadir").debug(foldermeta) existing_index = 0 for (index, item) in enumerate(gnmetadatas): @@ -189,16 +188,32 @@ def check_gn_meta(self): continue else: # append useless folder - meta.append([foldermeta, str(get_folder_size(foldermeta))]) + meta["problems"].append( + { + "type": "UnusedFileRes", + "path": foldermeta, + "size" : str(get_folder_size(foldermeta)) + + } + ) total_could_be_deleted += get_folder_size(foldermeta) - get_logger("CheckGNDatadir").debug("pouet aie aie aie") get_logger("CheckGNDatadir").debug("finish gn datadir checker") if not len(meta): - meta.append("result") + meta["problems"].append( + { + "type": "UnusedFileResNone", + "path": "None", + "size": "0 KB", + }) else: - meta.append(["Total", str(total_could_be_deleted)]) - - return ["1", "2", "3"] + meta["problems"].append( + { + "type": "UnusedFileResTotal", + "path": "Total", + "size": str(total_could_be_deleted), + }) + + return meta diff --git a/geordash/static/js/script.js b/geordash/static/js/script.js index 498b441..a57887f 100644 --- a/geordash/static/js/script.js +++ b/geordash/static/js/script.js @@ -290,6 +290,12 @@ const GetPbStr = (p) => { return `RasterData '${p.skey.replaceAll('~','/')}' is unused` case 'UnusedVectorData': return `VectorData '${p.skey.replaceAll('~','/')}' is unused` + case 'UnusedFileRes': + return `Folder is useless '${p.path}' with size '${p.size}'` + case 'UnusedFileResTotal': + return `In total '${p.size}' could be saved` + case 'UnusedFileResNone': + return `No file are useless` default: return `Unhandled error code ${p.type} for problem ${p}` } diff --git a/geordash/templates/admin/geonetwork_datadir.html b/geordash/templates/admin/geonetwork_datadir.html index 6b01d73..e4b4444 100644 --- a/geordash/templates/admin/geonetwork_datadir.html +++ b/geordash/templates/admin/geonetwork_datadir.html @@ -8,10 +8,10 @@ const last = prev.reduce((a, b) => { return new Date(a['finished']) > new Date(b['finished']) ? a : b; }); - PollTaskRes('gn_datadir', 'result', last['id'], {{ superuser|int }}, '#pbtitle'); + PollTaskRes('gndatadir', 'result', last['id'], {{ superuser|int }}, '#pbtitle'); } $(document).ready(function(){ - DisplayPrev('gn_datadir', 'result', prev, {{ superuser|int }}, '#previouslist'); + DisplayPrev('gndatadir', 'result', prev, {{ superuser|int }}, '#previouslist'); }); @@ -21,8 +21,7 @@ {% block content %}

    Geonetwork datadir Checker

    - -
    +
    @@ -30,18 +29,13 @@

    Geonetwork datadir Checker

    List of the useless ressources in geonetwork datadir:
    - -
    -
    +

    Problems

    Previous

    +
    {% endblock %} diff --git a/geordash/views.py b/geordash/views.py index 1e79b17..5682ec7 100644 --- a/geordash/views.py +++ b/geordash/views.py @@ -41,6 +41,7 @@ def result(id: str) -> dict[str, object]: "geordash.checks.ows.owsservice", "geordash.checks.csw.check_catalog", "geordash.checks.gsd.gsdatadir", + "geordash.checks.gn_datadir", ): # print(f"real taskset id is {result.result[0][0]}") result = GroupResult.restore(result.result[0][0]) @@ -326,9 +327,7 @@ def check_cswservice(url): def check_gndatadir(): # metalist = geordash.checks.gn_datadir.check_gn_meta() # return {"result_id": metalist} - # app.extensions["gndc"].refresh_meta_list() - result = check_gn_meta.delay() return {"result_id": result.id} From 038ab38a45c4c1de787e33268d8e5430809cca46 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 3 Sep 2025 12:43:40 +0200 Subject: [PATCH 10/38] make dynamic folder search and change home dashboard --- geordash/checks/gn_datadir.py | 10 +++++++--- geordash/georchestraconfig.py | 5 +++++ geordash/templates/dashboard/home.html | 14 +++++++++----- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index b946f1a..3e55ec5 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -169,19 +169,23 @@ def process_size(path): @shared_task(bind=True) def check_gn_meta(self): get_logger("CheckGNDatadir").debug("Start gn datadir checker") - testeddd = app.extensions["gndc"] - gnmetadatas = testeddd.get_meta_list() + metadatabase = app.extensions["gndc"] + gnmetadatas = metadatabase.get_meta_list() # self.gnmetadatas.sort(key=lambda x: x.id) meta = dict() meta["problems"] = list() total_could_be_deleted = 0 - for foldermeta in glob.glob("/mnt/geonetwork_datadir/data/metadata_data/*/*"): + for foldermeta in glob.glob(app.extensions['conf'].get("geonetwork.dir", "geonetwork")+"/data/metadata_data/*/*"): idmeta = foldermeta.split("/")[-1] + get_logger("CheckGNDatadir").info(idmeta + " "+ foldermeta) get_logger("CheckGNDatadir").debug(foldermeta) existing_index = 0 for (index, item) in enumerate(gnmetadatas): + if item.id == int(idmeta): + get_logger("CheckGNDatadir").info(index) + get_logger("CheckGNDatadir").info(item.id) existing_index = index break if existing_index: diff --git a/geordash/georchestraconfig.py b/geordash/georchestraconfig.py index 1e7ff71..95b6413 100644 --- a/geordash/georchestraconfig.py +++ b/geordash/georchestraconfig.py @@ -50,6 +50,11 @@ def __init__(self): except: # safe default value self.sections["urls"]["localgs"] = "geoserver" + with open(f"{self.datadirpath}/geonetwork/geonetwork.properties") as lines: + lines = chain(("[section]",), lines) # This line does the trick. + parser.read_file(lines) + self.sections["geonetwork"] = parser["section"] + def tostr(self): # pp = pprint.PrettyPrinter(indent=4) diff --git a/geordash/templates/dashboard/home.html b/geordash/templates/dashboard/home.html index 262d72f..4e1887b 100644 --- a/geordash/templates/dashboard/home.html +++ b/geordash/templates/dashboard/home.html @@ -13,6 +13,7 @@ {% endif %} {% if superuser %} {"prefix": "#gs-datadir", "taskname": "geordash.checks.gsd.gsdatadir","taskargs": []}, + {"prefix": "#gndc", "taskname": "geordash.checks.gn_datadir.check_gn_meta","taskargs": []}, {% endif %} ]); {% if 'sec-roles' not in request.headers or ( 'sec-roles' in request.headers and 'MAPSTORE_ADMIN' not in request.headers['sec-roles'] ) %} @@ -104,15 +105,18 @@
    Mapstore contexts
    -
    {% endblock %} From c9848a07c756b4b814dc64f10d5e6444a807287f Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 3 Sep 2025 13:02:02 +0200 Subject: [PATCH 11/38] add total count --- geordash/checks/gn_datadir.py | 17 +++-------------- geordash/static/js/script.js | 2 +- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 3e55ec5..ec2105a 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -156,36 +156,24 @@ def refresh_meta_list(self): def get_meta_list(self): return self.allmetadatas - -def all_process_size(meta): - total_could_be_deleted = 0 - for path in meta: - total_could_be_deleted += get_folder_size(path) - return "In total " + str(total_could_be_deleted) + " on "+ str(get_folder_size("/mnt/geonetwork_datdadir")) +" bytes could be deleted" - -def process_size(path): - return get_folder_size(path) - @shared_task(bind=True) def check_gn_meta(self): get_logger("CheckGNDatadir").debug("Start gn datadir checker") metadatabase = app.extensions["gndc"] gnmetadatas = metadatabase.get_meta_list() + geonetwork_datadir_path = app.extensions['conf'].get("geonetwork.dir", "geonetwork") # self.gnmetadatas.sort(key=lambda x: x.id) meta = dict() meta["problems"] = list() total_could_be_deleted = 0 - for foldermeta in glob.glob(app.extensions['conf'].get("geonetwork.dir", "geonetwork")+"/data/metadata_data/*/*"): + for foldermeta in glob.glob(geonetwork_datadir_path+"/data/metadata_data/*/*"): idmeta = foldermeta.split("/")[-1] - get_logger("CheckGNDatadir").info(idmeta + " "+ foldermeta) get_logger("CheckGNDatadir").debug(foldermeta) existing_index = 0 for (index, item) in enumerate(gnmetadatas): if item.id == int(idmeta): - get_logger("CheckGNDatadir").info(index) - get_logger("CheckGNDatadir").info(item.id) existing_index = index break if existing_index: @@ -215,6 +203,7 @@ def check_gn_meta(self): "type": "UnusedFileResTotal", "path": "Total", "size": str(total_could_be_deleted), + "total": str(get_folder_size(geonetwork_datadir_path)) }) return meta diff --git a/geordash/static/js/script.js b/geordash/static/js/script.js index a57887f..bb01eb8 100644 --- a/geordash/static/js/script.js +++ b/geordash/static/js/script.js @@ -293,7 +293,7 @@ const GetPbStr = (p) => { case 'UnusedFileRes': return `Folder is useless '${p.path}' with size '${p.size}'` case 'UnusedFileResTotal': - return `In total '${p.size}' could be saved` + return `In total '${p.size}' could be saved on '${p.total}'` case 'UnusedFileResNone': return `No file are useless` default: From a471ce28f8639bcdf7bbc90874af926ffbf6c4e0 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 3 Sep 2025 14:51:08 +0200 Subject: [PATCH 12/38] revert wrong merge --- geordash/celeryconfig.py.example | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/geordash/celeryconfig.py.example b/geordash/celeryconfig.py.example index 1802f84..e48ac6c 100644 --- a/geordash/celeryconfig.py.example +++ b/geordash/celeryconfig.py.example @@ -53,16 +53,26 @@ beat_schedule = { "args": ("wfs", "/geoserver/ows"), "schedule": crontab(minute=30, hour=0), }, + "check-gs-datadir-every-sunday": { + "task": "geordash.checks.gsd.gsdatadir", + "args": [], + "schedule": crontab(day_of_week='sunday',minute=30, hour=1), + }, "check-gn-metadatas-every-night": { "task": "geordash.checks.csw.check_catalog", "args": ["/geonetwork/srv/fre/csw"], "schedule": crontab(minute=45, hour=0), }, + # "check-all-mviewer": { + # "task": "geordash.checks.mviewer.check_all", + # "args": [], + # "schedule": crontab(minute=0, hour=1), + # }, "check-gn-metadatadir-every-night": { - "task": "geordash.checks.gn_datadir.check_gn_meta", - "args": [""], - "schedule": crontab(minute=55, hour=0), -}, + "task": "geordash.checks.gn_datadir.check_gn_meta", + "args": [""], + "schedule": crontab(minute=55, hour=0), + }, } # otherwise scheduled hours is taken as UTC timezone = "Europe/Paris" From ea4ef117f9f71d93ce9bbd3118833b95de6113c1 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Thu, 11 Sep 2025 17:31:58 +0200 Subject: [PATCH 13/38] continue test and dev, take care about reviews --- geordash/checks/gn_datadir.py | 75 ++------------------------ geordash/dashboard.py | 1 + geordash/static/js/script.js | 39 ++++++++++++-- geordash/templates/dashboard/home.html | 6 ++- 4 files changed, 45 insertions(+), 76 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index ec2105a..f79b212 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -44,66 +44,7 @@ class Metadata(Base): isharvested = Column(Integer) def get_folder_size(folder): - return ByteSize(sum(file.stat().st_size for file in Path(folder).rglob('*'))) - -class ByteSize(int): - _KB = 1024 - _suffixes = 'B', 'KB', 'MB', 'GB', 'PB' - - def __new__(cls, *args, **kwargs): - return super().__new__(cls, *args, **kwargs) - - def __init__(self, *args, **kwargs): - self.bytes = self.B = int(self) - self.kilobytes = self.KB = self / self._KB ** 1 - self.megabytes = self.MB = self / self._KB ** 2 - self.gigabytes = self.GB = self / self._KB ** 3 - self.petabytes = self.PB = self / self._KB ** 4 - *suffixes, last = self._suffixes - suffix = next(( - suffix - for suffix in suffixes - if 1 < getattr(self, suffix) < self._KB - ), last) - self.readable = suffix, getattr(self, suffix) - - super().__init__() - - def __str__(self): - return self.__format__('.2f') - - def __repr__(self): - return '{}({})'.format(self.__class__.__name__, super().__repr__()) - - def __format__(self, format_spec): - suffix, val = self.readable - return '{val:{fmt}} {suf}'.format(val=val, fmt=format_spec, suf=suffix) - - def __sub__(self, other): - return self.__class__(super().__sub__(other)) - - def __add__(self, other): - return self.__class__(super().__add__(other)) - - def __mul__(self, other): - return self.__class__(super().__mul__(other)) - - def __rsub__(self, other): - return self.__class__(super().__sub__(other)) - - def __radd__(self, other): - return self.__class__(super().__add__(other)) - - def __rmul__(self, other): - return self.__class__(super().__rmul__(other)) -conf = { - 'pgsqlUser': 'georchestra', - 'pgsqlHost': '127.0.0.1', - 'pgsqlPort': '5432', - 'pgsqlPassword': 'georchestra', - 'pgsqlDatabase': 'georchestra', - 'geonetworkSchema': 'geonetwork' -} + return sum(file.stat().st_size for file in Path(folder).rglob('*')) # solves conflicts in relationship naming ? def name_for_collection_relationship(base, local_cls, referred_cls, constraint): @@ -190,14 +131,8 @@ def check_gn_meta(self): ) total_could_be_deleted += get_folder_size(foldermeta) get_logger("CheckGNDatadir").debug("finish gn datadir checker") - if not len(meta): - meta["problems"].append( - { - "type": "UnusedFileResNone", - "path": "None", - "size": "0 KB", - }) - else: + + if len(meta["problems"]) > 0: meta["problems"].append( { "type": "UnusedFileResTotal", @@ -207,7 +142,3 @@ def check_gn_meta(self): }) return meta - - - -# GeonetworkDatadirChecker(conf) \ No newline at end of file diff --git a/geordash/dashboard.py b/geordash/dashboard.py index 03b6db1..c5a172c 100644 --- a/geordash/dashboard.py +++ b/geordash/dashboard.py @@ -243,6 +243,7 @@ def mviewer(url): if r.status_code == 200: if r.headers["content-type"] == "text/xml": r.encoding = "utf-8" + mviewer_configs = app.extensions["owscache"].get_mviewer_configs() if not mviewer_configs: app.extensions["owscache"].set_mviewer_configs( diff --git a/geordash/static/js/script.js b/geordash/static/js/script.js index bb01eb8..1d9a5a5 100644 --- a/geordash/static/js/script.js +++ b/geordash/static/js/script.js @@ -40,6 +40,40 @@ const fetchForHome = (widgets) => { }) } +const fetchForHomeSingleTask = (widgets) => { + widgets.forEach(function(o) { + fetch(baseurl + '/tasks/lastresultbytask/' + o["taskname"] + "?taskargs=" + o["taskargs"].join(",")) + .then(response => response.json()) + .then(mydata => { + if (parseInt(mydata["finished"])) { + const d = new Date(mydata["finished"] * 1000); + $(o["prefix"] + '-lastupdated').html("Information valid as of "+ d.toLocaleString("fr-FR") + '
    (taskid: '+ mydata['taskid'] + ')') + } + if (mydata === "notask") { + $(o["prefix"] + '-abstract').html("no " + o["taskname"] + " job found with args " + o["taskargs"].join(",") + ", something went wrong ?") + return; + } + if (mydata['value'] === null && mydata['ready'] === false) { + $(o["prefix"] + '-abstract').html("job is currently running, " + mydata['completed'] + " objects checked") + return; + } + let str = "" + mydata['value']['problems'].length + ' entries
    '; + + const nerrors = mydata['value']['problems'].length; + + if (nerrors > 0) { + str += "" + nerrors + " errors found !"; + } else { + str += " no errors !"; + } + $(o["prefix"] + '-abstract').html(str); + }) + .catch(function(err) { + $(o["prefix"] + '-abstract').html("something went wrong") + }); + }) +} + const fetchMyMd = (localgnbaseurl) => { fetch(baseurl + '/api/geonetwork/metadatas.json') .then(response => response.json()) @@ -291,11 +325,10 @@ const GetPbStr = (p) => { case 'UnusedVectorData': return `VectorData '${p.skey.replaceAll('~','/')}' is unused` case 'UnusedFileRes': - return `Folder is useless '${p.path}' with size '${p.size}'` + {{ p.size |filesizeformat}} + return `Folder is useless '${p.path}' with size ' ${ p.size } '` case 'UnusedFileResTotal': return `In total '${p.size}' could be saved on '${p.total}'` - case 'UnusedFileResNone': - return `No file are useless` default: return `Unhandled error code ${p.type} for problem ${p}` } diff --git a/geordash/templates/dashboard/home.html b/geordash/templates/dashboard/home.html index 4e1887b..9b00f30 100644 --- a/geordash/templates/dashboard/home.html +++ b/geordash/templates/dashboard/home.html @@ -13,9 +13,13 @@ {% endif %} {% if superuser %} {"prefix": "#gs-datadir", "taskname": "geordash.checks.gsd.gsdatadir","taskargs": []}, - {"prefix": "#gndc", "taskname": "geordash.checks.gn_datadir.check_gn_meta","taskargs": []}, {% endif %} ]); +{% if superuser %} + fetchForHomeSingleTask([ + {"prefix": "#gndc", "taskname": "geordash.checks.gn_datadir.check_gn_meta","taskargs": []}, + ]); +{% endif %} {% if 'sec-roles' not in request.headers or ( 'sec-roles' in request.headers and 'MAPSTORE_ADMIN' not in request.headers['sec-roles'] ) %} fetchMapsAndCtxCount('{{ url_for('dashboard.my_maps_and_apps') }}'); {% endif %} From 57c1f49da3c4b4bc4fddb5a05df63480dd15dbc3 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Fri, 12 Sep 2025 09:50:27 +0200 Subject: [PATCH 14/38] make geonetwork datadir dynamic + filesizeformat using jinja2 --- geordash/checks/gn_datadir.py | 15 ++++++++------- geordash/dashboard.py | 1 - geordash/static/js/script.js | 1 - 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index f79b212..253c328 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -29,6 +29,7 @@ from sqlalchemy.ext.declarative import DeclarativeMeta import glob from pathlib import Path +import jinja2 Base = declarative_base() @@ -102,16 +103,17 @@ def check_gn_meta(self): get_logger("CheckGNDatadir").debug("Start gn datadir checker") metadatabase = app.extensions["gndc"] gnmetadatas = metadatabase.get_meta_list() - geonetwork_datadir_path = app.extensions['conf'].get("geonetwork.dir", "geonetwork") + geonetwork_dir_path = app.extensions['conf'].get("geonetwork.dir", "geonetwork") + geonetwork_datadir_path = app.extensions['conf'].get("geonetwork.data.dir", "geonetwork").replace("${geonetwork.dir}", geonetwork_dir_path) # self.gnmetadatas.sort(key=lambda x: x.id) meta = dict() meta["problems"] = list() total_could_be_deleted = 0 - for foldermeta in glob.glob(geonetwork_datadir_path+"/data/metadata_data/*/*"): + for foldermeta in glob.glob(geonetwork_datadir_path+"*/*"): idmeta = foldermeta.split("/")[-1] get_logger("CheckGNDatadir").debug(foldermeta) existing_index = 0 - + # maybe i could count the number of folders for (index, item) in enumerate(gnmetadatas): if item.id == int(idmeta): @@ -125,8 +127,7 @@ def check_gn_meta(self): { "type": "UnusedFileRes", "path": foldermeta, - "size" : str(get_folder_size(foldermeta)) - + "size" : jinja2.Template("{{ bytes | filesizeformat }}").render(bytes=get_folder_size(foldermeta)) } ) total_could_be_deleted += get_folder_size(foldermeta) @@ -137,8 +138,8 @@ def check_gn_meta(self): { "type": "UnusedFileResTotal", "path": "Total", - "size": str(total_could_be_deleted), - "total": str(get_folder_size(geonetwork_datadir_path)) + "size": jinja2.Template("{{ bytes | filesizeformat }}").render(bytes=total_could_be_deleted), + "total": jinja2.Template("{{ bytes | filesizeformat }}").render(bytes=get_folder_size(geonetwork_dir_path)) }) return meta diff --git a/geordash/dashboard.py b/geordash/dashboard.py index c5a172c..03b6db1 100644 --- a/geordash/dashboard.py +++ b/geordash/dashboard.py @@ -243,7 +243,6 @@ def mviewer(url): if r.status_code == 200: if r.headers["content-type"] == "text/xml": r.encoding = "utf-8" - mviewer_configs = app.extensions["owscache"].get_mviewer_configs() if not mviewer_configs: app.extensions["owscache"].set_mviewer_configs( diff --git a/geordash/static/js/script.js b/geordash/static/js/script.js index 1d9a5a5..d376c11 100644 --- a/geordash/static/js/script.js +++ b/geordash/static/js/script.js @@ -325,7 +325,6 @@ const GetPbStr = (p) => { case 'UnusedVectorData': return `VectorData '${p.skey.replaceAll('~','/')}' is unused` case 'UnusedFileRes': - {{ p.size |filesizeformat}} return `Folder is useless '${p.path}' with size ' ${ p.size } '` case 'UnusedFileResTotal': return `In total '${p.size}' could be saved on '${p.total}'` From 4b716489757eed25c089a22894a96fbb6886be48 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Fri, 12 Sep 2025 09:59:55 +0200 Subject: [PATCH 15/38] activate debug route only if debug is ON --- docker/Dockerfile_flask | 1 + geordash/dashboard.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile_flask b/docker/Dockerfile_flask index c56ade2..b095863 100644 --- a/docker/Dockerfile_flask +++ b/docker/Dockerfile_flask @@ -5,6 +5,7 @@ ENV FLASK_APP=geordash ENV FLASK_OPTS="-h 0.0.0.0 -p 5002" ENV georchestradatadir=/etc/georchestra ENV REDISURL="" +#ENV FLASK_DEBUG=1 # set fixed UID and GID - see github.com/hexops/dockerfile ARG UID=10000 diff --git a/geordash/dashboard.py b/geordash/dashboard.py index 03b6db1..9bf61ad 100644 --- a/geordash/dashboard.py +++ b/geordash/dashboard.py @@ -6,6 +6,7 @@ from flask import request, render_template, abort, url_for from flask import current_app as app import requests +from functools import wraps from geordash.decorators import is_superuser, check_role from geordash.checks.mapstore import get_resources_using_ows, get_res @@ -22,6 +23,13 @@ "dashboard", __name__, url_prefix="/gaia", template_folder="templates/dashboard" ) +def debug_only(f): + @wraps(f) + def wrapped(**kwargs): + if not app.debug: + abort(404) + return f(**kwargs) + return wrapped def get_rescontent_from_resid(restype, resid): r = mapstore_get(request, f"rest/geostore/data/{resid}", False) @@ -67,8 +75,9 @@ def home(): return render_template("home.html") @dash_bp.route("/debug") +@debug_only def debug(): - # app.logger.error(app.extensions["conf"]) + return app.extensions["conf"].tostr() @dash_bp.route("/my-metadata") From 2fc440f0a3002a343d4d60d21c7526f26c2d4666 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Fri, 12 Sep 2025 10:08:51 +0200 Subject: [PATCH 16/38] cleaning before ready for reviews --- docker/docker-compose.yml | 2 -- geordash/checks/gn_datadir.py | 4 +--- geordash/georchestraconfig.py | 1 - geordash/static/js/script.js | 2 +- geordash/templates/dashboard/home.html | 3 ++- 5 files changed, 4 insertions(+), 8 deletions(-) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index f4d0b68..bad351d 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -22,7 +22,6 @@ services: - ../config.py.example:/geordash/config.py:ro - ../geordash/celeryconfig.py.example:/geordash/geordash/celeryconfig.py:ro - /etc/ssl/certs/ca-certificates.crt:/etc/ssl/certs/ca-certificates.crt:ro - - ../:/geordash #- ../:/geordash # need to add path of geonetwork and geoserver datadir in here @@ -78,7 +77,6 @@ services: - ../config.py.example:/geordash/config.py:ro - ../geordash/celeryconfig.py.example:/geordash/geordash/celeryconfig.py:ro - /etc/ssl/certs/ca-certificates.crt:/etc/ssl/certs/ca-certificates.crt:ro - - ../:/geordash #- ../:/geordash # need to add path of geonetwork and geoserver datadir in here diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 253c328..47296ef 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -31,7 +31,6 @@ from pathlib import Path import jinja2 - Base = declarative_base() # Define the Metadata model (example schema of a GeoNetwork metadata table) @@ -113,9 +112,8 @@ def check_gn_meta(self): idmeta = foldermeta.split("/")[-1] get_logger("CheckGNDatadir").debug(foldermeta) existing_index = 0 - # maybe i could count the number of folders - for (index, item) in enumerate(gnmetadatas): + for (index, item) in enumerate(gnmetadatas): if item.id == int(idmeta): existing_index = index break diff --git a/geordash/georchestraconfig.py b/geordash/georchestraconfig.py index 95b6413..59bc390 100644 --- a/geordash/georchestraconfig.py +++ b/geordash/georchestraconfig.py @@ -55,7 +55,6 @@ def __init__(self): parser.read_file(lines) self.sections["geonetwork"] = parser["section"] - def tostr(self): # pp = pprint.PrettyPrinter(indent=4) # return pp.pprint(self.sections) diff --git a/geordash/static/js/script.js b/geordash/static/js/script.js index d376c11..6ef3f44 100644 --- a/geordash/static/js/script.js +++ b/geordash/static/js/script.js @@ -57,7 +57,7 @@ const fetchForHomeSingleTask = (widgets) => { $(o["prefix"] + '-abstract').html("job is currently running, " + mydata['completed'] + " objects checked") return; } - let str = "" + mydata['value']['problems'].length + ' entries
    '; + let str = "
    "; const nerrors = mydata['value']['problems'].length; diff --git a/geordash/templates/dashboard/home.html b/geordash/templates/dashboard/home.html index 9b00f30..33cc327 100644 --- a/geordash/templates/dashboard/home.html +++ b/geordash/templates/dashboard/home.html @@ -108,6 +108,7 @@
    Mapstore contexts
    +{% if superuser %} - +{% endif %} {% endblock %} From 1818d199fb66f1597f469136b2dd1d3f301daa37 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Fri, 12 Sep 2025 10:10:10 +0200 Subject: [PATCH 17/38] cleaning before ready for reviews2 --- docker/docker-compose.yml | 1 - geordash/dashboard.py | 1 - 2 files changed, 2 deletions(-) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index bad351d..3cc1284 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -70,7 +70,6 @@ services: build: context: ../ dockerfile: ./docker/Dockerfile_celery - volumes: - ../../datadir:/etc/georchestra - ../gunicorn.conf.py.example:/geordash/gunicorn.conf.py:ro diff --git a/geordash/dashboard.py b/geordash/dashboard.py index 9bf61ad..488fbf7 100644 --- a/geordash/dashboard.py +++ b/geordash/dashboard.py @@ -77,7 +77,6 @@ def home(): @dash_bp.route("/debug") @debug_only def debug(): - return app.extensions["conf"].tostr() @dash_bp.route("/my-metadata") From 87fd6dbb915626bd84aed91f5f1cd862176b6da7 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 24 Sep 2025 15:57:29 +0200 Subject: [PATCH 18/38] taking review into account --- geordash/admin.py | 1 - geordash/checks/gn_datadir.py | 5 ----- geordash/views.py | 4 ---- 3 files changed, 10 deletions(-) diff --git a/geordash/admin.py b/geordash/admin.py index 46697e2..900dc49 100644 --- a/geordash/admin.py +++ b/geordash/admin.py @@ -57,7 +57,6 @@ def geonetwork_datadir(): all_jobs_for_gnconfigs = app.extensions["rcli"].get_taskids_by_taskname_and_args( "geordash.checks.gn_datadir.check_gn_meta", [] ) - #if type(all_jobs_for_gnconfigs) != NoneType: return render_template("admin/geonetwork_datadir.html", previous_configs_jobs=all_jobs_for_gnconfigs) @admin_bp.route("/geoserver") diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 47296ef..701f6fd 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -80,8 +80,6 @@ def __init__(self, conf): name_for_collection_relationship=name_for_collection_relationship, ) self.allmetadatas = self.session().query(Metadata).all() - # for (index, item) in enumerate(self.allmetadatas): - # get_logger("CheckGNDatadir").debug("test1") def session(self): try: @@ -91,9 +89,6 @@ def session(self): self.sessiono = self.sessionm() return self.sessiono - def refresh_meta_list(self): - self.allmetadatas = self.session().query(Metadata).all() - def get_meta_list(self): return self.allmetadatas diff --git a/geordash/views.py b/geordash/views.py index 5682ec7..4920af4 100644 --- a/geordash/views.py +++ b/geordash/views.py @@ -325,9 +325,5 @@ def check_cswservice(url): @tasks_bp.route("/check/gndatadir/result.json") def check_gndatadir(): - # metalist = geordash.checks.gn_datadir.check_gn_meta() - # return {"result_id": metalist} - # app.extensions["gndc"].refresh_meta_list() result = check_gn_meta.delay() - return {"result_id": result.id} From a6bc3b52b3748d55b233fb29463171a6f0ca4df0 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Mon, 20 Oct 2025 11:31:37 +0200 Subject: [PATCH 19/38] correct database arguments variables --- geordash/celeryconfig.py.example | 1 + geordash/checks/gn_datadir.py | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/geordash/celeryconfig.py.example b/geordash/celeryconfig.py.example index e48ac6c..5add22b 100644 --- a/geordash/celeryconfig.py.example +++ b/geordash/celeryconfig.py.example @@ -11,6 +11,7 @@ imports = ( "geordash.checks.csw", "geordash.checks.mviewer", "geordash.checks.gsd", + "geordash.checks.gn_datadir", ) # worker_pool = solo worker_log_format = ( diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 701f6fd..3c1f243 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -61,11 +61,11 @@ class GeonetworkDatadirChecker: def __init__(self, conf): url = URL.create( drivername="postgresql", - username=conf.get("pgsqlUser"), - host=conf.get("pgsqlHost"), - port=conf.get("pgsqlPort"), - password=conf.get("pgsqlPassword"), - database=conf.get("pgsqlDatabase"), + username=conf.get("jdbc.username", "geonetwork"), + host=conf.get("jdbc.host", "geonetwork"), + port=conf.get("jdbc.port", "geonetwork"), + password=conf.get("jdbc.password", "geonetwork"), + database=conf.get("jdbc.database", "geonetwork"), ) engine = create_engine(url) @@ -73,7 +73,7 @@ def __init__(self, conf): self.sessiono = self.sessionm() # Perform database reflection to analyze tables and relationships - m = MetaData(schema=conf.get("geonetworkSchema")) + m = MetaData(schema=conf.get("jdbc.schema", "geonetwork")) Base = automap_base(metadata=m) Base.prepare( autoload_with=engine, From 503f7c11254e7c634f1b8c35bbf6e815a46559bc Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 22 Oct 2025 09:57:48 +0200 Subject: [PATCH 20/38] correct docker file and compose --- docker/Dockerfile_celery | 2 +- docker/docker-compose.yml | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docker/Dockerfile_celery b/docker/Dockerfile_celery index 106c20e..d1d4652 100644 --- a/docker/Dockerfile_celery +++ b/docker/Dockerfile_celery @@ -27,4 +27,4 @@ USER celery COPY . /geordash WORKDIR /geordash -CMD ["sh", "-c", "python3 -m celery -A make_celery worker --loglevel INFO -P solo -B -E -s workdir/celerybeat-schedule -s celerybeat-schedule"] +CMD ["sh", "-c", "python3 -m celery -A make_celery worker --loglevel INFO -P solo -B -E -s workdir/celerybeat-schedule"] diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 3cc1284..cdbae39 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -8,11 +8,12 @@ services: redis: image: "redis:alpine" command: redis-server - environment: - REDIS_REPLICATION_MODE=master + - gaia-back: - # image: "docker-gaia-back" + # if you want to switch back to official image + # image: "georchestra/gaia:latest" build: context: ../ dockerfile: ./docker/Dockerfile_flask @@ -66,7 +67,8 @@ services: gaia-celery: - # image: "docker-gaia-celery:latest" + # if you want to switch back to official image + # image: "georchestra/gaia-celery:latest" build: context: ../ dockerfile: ./docker/Dockerfile_celery From e6150eafe574d3ec1c57cef1345e4781de32112b Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 22 Oct 2025 10:02:09 +0200 Subject: [PATCH 21/38] working way to request database without providing the Metadata class --- geordash/checks/gn_datadir.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 3c1f243..449f0e2 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -19,7 +19,7 @@ from geordash.utils import find_localmduuid, unmunge, objtype -from sqlalchemy import create_engine, MetaData, select, Column, String, Integer, Text +from sqlalchemy import create_engine, MetaData, select, Column, String, Integer, Text, Table from sqlalchemy.dialects.postgresql import array from sqlalchemy.engine import URL from sqlalchemy.ext.automap import automap_base @@ -33,16 +33,6 @@ Base = declarative_base() -# Define the Metadata model (example schema of a GeoNetwork metadata table) -class Metadata(Base): - __tablename__ = "metadata" - __table_args__ = {"schema": "geonetwork"} - id = Column(Integer, primary_key=True) - uuid = Column(String, unique=True) - data = Column(Text) # Metadata content (e.g., XML or JSON) - schemaid = Column(String) # Metadata schema (e.g., ISO 19115) - isharvested = Column(Integer) - def get_folder_size(folder): return sum(file.stat().st_size for file in Path(folder).rglob('*')) @@ -68,17 +58,17 @@ def __init__(self, conf): database=conf.get("jdbc.database", "geonetwork"), ) - engine = create_engine(url) + engine = create_engine(url, connect_args={"options": f"-csearch_path={conf.get('jdbc.schema', 'geonetwork')}"}) self.sessionm = sessionmaker(bind=engine) self.sessiono = self.sessionm() # Perform database reflection to analyze tables and relationships m = MetaData(schema=conf.get("jdbc.schema", "geonetwork")) + Table('metadata', m, autoload_with=engine) Base = automap_base(metadata=m) - Base.prepare( - autoload_with=engine, - name_for_collection_relationship=name_for_collection_relationship, - ) + Base.prepare() + Metadata = Base.classes.metadata + self.allmetadatas = self.session().query(Metadata).all() def session(self): From 1aab1e6b2208b94fc0fe1a53ea1cb0f05957a904 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Thu, 23 Oct 2025 10:47:34 +0200 Subject: [PATCH 22/38] move the query to get all metadata for dynamic updates --- geordash/checks/gn_datadir.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 449f0e2..8017017 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -67,9 +67,7 @@ def __init__(self, conf): Table('metadata', m, autoload_with=engine) Base = automap_base(metadata=m) Base.prepare() - Metadata = Base.classes.metadata - - self.allmetadatas = self.session().query(Metadata).all() + self.Metadata = Base.classes.metadata def session(self): try: @@ -80,7 +78,7 @@ def session(self): return self.sessiono def get_meta_list(self): - return self.allmetadatas + return self.session().query(self.Metadata).all() @shared_task(bind=True) def check_gn_meta(self): From 880b5964b4d42621a62f1ea4fcfe6bd4ed43ec21 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Thu, 23 Oct 2025 17:26:53 +0200 Subject: [PATCH 23/38] result of gn checker in beautifull table --- geordash/checks/gn_datadir.py | 4 +++- geordash/static/js/script.js | 31 ++++++++++++++++++++++--------- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 8017017..b081e9c 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -89,6 +89,7 @@ def check_gn_meta(self): geonetwork_datadir_path = app.extensions['conf'].get("geonetwork.data.dir", "geonetwork").replace("${geonetwork.dir}", geonetwork_dir_path) # self.gnmetadatas.sort(key=lambda x: x.id) meta = dict() + meta["args"] = geonetwork_datadir_path meta["problems"] = list() total_could_be_deleted = 0 for foldermeta in glob.glob(geonetwork_datadir_path+"*/*"): @@ -108,7 +109,8 @@ def check_gn_meta(self): { "type": "UnusedFileRes", "path": foldermeta, - "size" : jinja2.Template("{{ bytes | filesizeformat }}").render(bytes=get_folder_size(foldermeta)) + "url": foldermeta, + "problem" : jinja2.Template("{{ bytes | filesizeformat }}").render(bytes=get_folder_size(foldermeta)) } ) total_could_be_deleted += get_folder_size(foldermeta) diff --git a/geordash/static/js/script.js b/geordash/static/js/script.js index 6ef3f44..292b14c 100644 --- a/geordash/static/js/script.js +++ b/geordash/static/js/script.js @@ -325,9 +325,9 @@ const GetPbStr = (p) => { case 'UnusedVectorData': return `VectorData '${p.skey.replaceAll('~','/')}' is unused` case 'UnusedFileRes': - return `Folder is useless '${p.path}' with size ' ${ p.size } '` + return `${p.path} with size ${p.size}` case 'UnusedFileResTotal': - return `In total '${p.size}' could be saved on '${p.total}'` + return `In total ${p.size} could be saved on ${p.total}` default: return `Unhandled error code ${p.type} for problem ${p}` } @@ -484,11 +484,13 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') data["value"].problems = probs.flat(1) } else { // if problems is undef, single task badly failed and returned the python exception as value - if (data["value"].problems !== undefined) { - const probs = data["value"].problems.map(i => { - return GetPbStr(i) - }) - data["value"].problems = probs + if (!data['task'].includes('gn_datadir')) { + if (data["value"].problems !== undefined) { + const probs = data["value"].problems.map(i => { + return GetPbStr(i) + }) + data["value"].problems = probs + } } } if (data["value"].problems !== undefined && data["value"].problems.length > 0) { @@ -504,10 +506,18 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') const missing = all.filter(x => !done.includes(x)); $(targetdivid).text("jobs on " + missing + " failed, did " + data["completed"] + " - on those, " + data["value"].problems.length + ' problems found'); } else { + if (data['task'].includes('gn_datadir')) { + // if gn_datadir will remove last problem as it is a total count + totalgndatadir = data["value"].problems.pop() + const exporttotalgndatadir = $("

    "); + exporttotalgndatadir.html("

    "+GetPbStr(totalgndatadir)+"

    ") + $(targetpbdivid).append(exporttotalgndatadir) + } $(targetdivid).text(data["value"].problems.length + ' problems found'); } - if (Array.isArray(data["value"])) { + if (Array.isArray(data["value"]) || Array.isArray(data["value"]['problems'])) { var argtitle = 'Layer' + var argcolumn2 = 'Problem' if (data['task'].includes('csw')) { argtitle = 'Metadata' } else if (data['task'].includes('check_resources')) { @@ -518,6 +528,9 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') argtitle = 'Config url' } else if (data['task'].includes('gsd.gsdatadir')) { argtitle = 'Item' + } else if (data['task'].includes('gn_datadir')) { + argtitle = 'Path' + argcolumn2 = 'Size' } var prevexp = $(targetpbdivid + '-export') if (prevexp.length > 0) { @@ -544,7 +557,7 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') columns: [ {'title': 'Index', 'formatter': 'runningFormatter'}, {'field': 'url', 'title': argtitle, 'sortable': true, 'formatter': 'urlFormatter'}, - {'field': 'problem', 'title': 'Problem', 'sortable': true} + {'field': 'problem', 'title': argcolumn2, 'sortable': true} ] }); } From bab417958fc63c2975834346e6754a9675797d17 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Mon, 27 Oct 2025 10:52:54 +0100 Subject: [PATCH 24/38] changing the path printed --- geordash/checks/gn_datadir.py | 7 ++++--- geordash/static/js/script.js | 7 ++++--- geordash/templates/admin/geonetwork_datadir.html | 1 + 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index b081e9c..0b19c9c 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -89,11 +89,12 @@ def check_gn_meta(self): geonetwork_datadir_path = app.extensions['conf'].get("geonetwork.data.dir", "geonetwork").replace("${geonetwork.dir}", geonetwork_dir_path) # self.gnmetadatas.sort(key=lambda x: x.id) meta = dict() - meta["args"] = geonetwork_datadir_path + meta["searching_path"] = geonetwork_datadir_path meta["problems"] = list() total_could_be_deleted = 0 for foldermeta in glob.glob(geonetwork_datadir_path+"*/*"): idmeta = foldermeta.split("/")[-1] + subpath = foldermeta.split("/")[-2] get_logger("CheckGNDatadir").debug(foldermeta) existing_index = 0 @@ -108,8 +109,8 @@ def check_gn_meta(self): meta["problems"].append( { "type": "UnusedFileRes", - "path": foldermeta, - "url": foldermeta, + "path": subpath+"/"+idmeta, + "url": subpath+"/"+idmeta, "problem" : jinja2.Template("{{ bytes | filesizeformat }}").render(bytes=get_folder_size(foldermeta)) } ) diff --git a/geordash/static/js/script.js b/geordash/static/js/script.js index 292b14c..c06044d 100644 --- a/geordash/static/js/script.js +++ b/geordash/static/js/script.js @@ -508,10 +508,11 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') } else { if (data['task'].includes('gn_datadir')) { // if gn_datadir will remove last problem as it is a total count + const targetpboverviewdivid = targetdivid.replace('#pbtitle', '#pboverviews') totalgndatadir = data["value"].problems.pop() - const exporttotalgndatadir = $("

    "); - exporttotalgndatadir.html("

    "+GetPbStr(totalgndatadir)+"

    ") - $(targetpbdivid).append(exporttotalgndatadir) + const exporttotalgndatadir = $("
    "); + exporttotalgndatadir.html("

    "+GetPbStr(totalgndatadir)+" within the path "+data["value"]["searching_path"]+"

    " ) + $(targetpboverviewdivid).html(exporttotalgndatadir) } $(targetdivid).text(data["value"].problems.length + ' problems found'); } diff --git a/geordash/templates/admin/geonetwork_datadir.html b/geordash/templates/admin/geonetwork_datadir.html index e4b4444..b09c732 100644 --- a/geordash/templates/admin/geonetwork_datadir.html +++ b/geordash/templates/admin/geonetwork_datadir.html @@ -32,6 +32,7 @@

    Geonetwork datadir Checker

    Problems

    +

    Previous

    From 1f339ac69992532609481f91b5ad893f3e32c6cd Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Mon, 27 Oct 2025 10:54:58 +0100 Subject: [PATCH 25/38] removing UnusedFileRes from GetPbStr --- geordash/static/js/script.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/geordash/static/js/script.js b/geordash/static/js/script.js index c06044d..0d454c3 100644 --- a/geordash/static/js/script.js +++ b/geordash/static/js/script.js @@ -324,8 +324,6 @@ const GetPbStr = (p) => { return `RasterData '${p.skey.replaceAll('~','/')}' is unused` case 'UnusedVectorData': return `VectorData '${p.skey.replaceAll('~','/')}' is unused` - case 'UnusedFileRes': - return `${p.path} with size ${p.size}` case 'UnusedFileResTotal': return `In total ${p.size} could be saved on ${p.total}` default: From 5746c68dae356f945d179a5387bbc4cdbdd78c53 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 29 Oct 2025 14:57:48 +0100 Subject: [PATCH 26/38] changing total path count --- geordash/checks/gn_datadir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 0b19c9c..7de5a8a 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -123,7 +123,7 @@ def check_gn_meta(self): "type": "UnusedFileResTotal", "path": "Total", "size": jinja2.Template("{{ bytes | filesizeformat }}").render(bytes=total_could_be_deleted), - "total": jinja2.Template("{{ bytes | filesizeformat }}").render(bytes=get_folder_size(geonetwork_dir_path)) + "total": jinja2.Template("{{ bytes | filesizeformat }}").render(bytes=get_folder_size(geonetwork_datadir_path)) }) return meta From ee317d67153d718abe8e06d76920aa6709e896d2 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 29 Oct 2025 15:13:29 +0100 Subject: [PATCH 27/38] taking reviews into account --- geordash/checks/gn_datadir.py | 8 -------- geordash/georchestraconfig.py | 4 ---- geordash/templates/admin/geonetwork_datadir.html | 4 ++-- 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 7de5a8a..78f73a8 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -7,20 +7,12 @@ from requests.exceptions import ReadTimeout from celery import shared_task -from celery import Task -from celery import group from geordash.logwrap import get_logger -from geordash.owscapcache import OwsCapCache - -from owslib.fes import PropertyIsEqualTo, And from flask import current_app as app -from geordash.utils import find_localmduuid, unmunge, objtype - from sqlalchemy import create_engine, MetaData, select, Column, String, Integer, Text, Table -from sqlalchemy.dialects.postgresql import array from sqlalchemy.engine import URL from sqlalchemy.ext.automap import automap_base from sqlalchemy.exc import NoResultFound, OperationalError diff --git a/geordash/georchestraconfig.py b/geordash/georchestraconfig.py index b9b72f3..f7f6255 100644 --- a/geordash/georchestraconfig.py +++ b/geordash/georchestraconfig.py @@ -7,7 +7,6 @@ from os import getenv, getcwd import json import re -import pprint class GeorchestraConfig: def __init__(self): @@ -74,9 +73,6 @@ def __init__(self): pass def tostr(self): - # pp = pprint.PrettyPrinter(indent=4) - # return pp.pprint(self.sections) - str = "" for key in self.sections: str += key + ":\r\n
    " diff --git a/geordash/templates/admin/geonetwork_datadir.html b/geordash/templates/admin/geonetwork_datadir.html index b09c732..81b3832 100644 --- a/geordash/templates/admin/geonetwork_datadir.html +++ b/geordash/templates/admin/geonetwork_datadir.html @@ -23,11 +23,11 @@

    Geonetwork datadir Checker

    - +

    -List of the useless ressources in geonetwork datadir: +Leftover dirs from removed metadatas:

    Problems

    From b3914f7a69674543510a5032f83893168f4750a2 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 29 Oct 2025 15:42:10 +0100 Subject: [PATCH 28/38] reset pboverviews when there is no problemo! --- geordash/static/js/script.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/geordash/static/js/script.js b/geordash/static/js/script.js index 0d454c3..e24a2a5 100644 --- a/geordash/static/js/script.js +++ b/geordash/static/js/script.js @@ -579,6 +579,10 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') $(targetdivid).html('No problemo!') } $(targetpbdivid).empty(); + if (data['task'].includes('gn_datadir')) { + const targetpboverviewdivid = targetdivid.replace('#pbtitle', '#pboverviews'); + $(targetpboverviewdivid).empty(); + } } const d = new Date(data["finished"] * 1000); $(targetpbdetdivid).text('vérification faite le '+ d.toLocaleString("fr-FR")); From 154b3956bfcdd11f8ed7d5b7cac0b1885428cb8f Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 29 Oct 2025 15:54:00 +0100 Subject: [PATCH 29/38] remove useless imports --- geordash/checks/gn_datadir.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 78f73a8..8f340bd 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -1,24 +1,15 @@ #!/bin/env python3 # -*- coding: utf-8 -*- # vim: ts=4 sw=4 et -import json - -import requests -from requests.exceptions import ReadTimeout - from celery import shared_task - from geordash.logwrap import get_logger - from flask import current_app as app - -from sqlalchemy import create_engine, MetaData, select, Column, String, Integer, Text, Table +from sqlalchemy import create_engine, MetaData, select, Table from sqlalchemy.engine import URL from sqlalchemy.ext.automap import automap_base -from sqlalchemy.exc import NoResultFound, OperationalError +from sqlalchemy.exc import OperationalError from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.ext.declarative import DeclarativeMeta import glob from pathlib import Path import jinja2 From ab94d92b130668388e1349137e58c75ff67e4b2b Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 29 Oct 2025 15:54:57 +0100 Subject: [PATCH 30/38] name_for_collection_relationship drop --- geordash/checks/gn_datadir.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 8f340bd..6046d5e 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -19,17 +19,6 @@ def get_folder_size(folder): return sum(file.stat().st_size for file in Path(folder).rglob('*')) -# solves conflicts in relationship naming ? -def name_for_collection_relationship(base, local_cls, referred_cls, constraint): - name = referred_cls.__name__.lower() - local_table = local_cls.__table__ - # print("local_cls={}, local_table={}, referred_cls={}, will return name={}, constraint={}".format(local_cls, local_table, referred_cls, name, constraint)) - if name in local_table.columns: - newname = name + "_" - print("Already detected name %s present. using %s" % (name, newname)) - return newname - return name - class GeonetworkDatadirChecker: def __init__(self, conf): url = URL.create( From 85878932c791f3308d88f8f1778bbf6234d5ab08 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 29 Oct 2025 15:59:30 +0100 Subject: [PATCH 31/38] run black tool in all files --- geordash/admin.py | 6 +++- geordash/celeryconfig.py.example | 10 +++---- geordash/checks/gn_datadir.py | 51 ++++++++++++++++++++++---------- geordash/dashboard.py | 5 ++++ geordash/georchestraconfig.py | 11 +++++-- geordash/views.py | 3 ++ 6 files changed, 62 insertions(+), 24 deletions(-) diff --git a/geordash/admin.py b/geordash/admin.py index 900dc49..7703363 100644 --- a/geordash/admin.py +++ b/geordash/admin.py @@ -51,13 +51,17 @@ def geonetwork(): p["xurl"] = url_for("dashboard.csw", portal=p["uuid"]) return render_template("admin/geonetwork.html", portals=portals) + @admin_bp.route("/geonetwork/datadir") @check_role(role="GN_ADMIN") def geonetwork_datadir(): all_jobs_for_gnconfigs = app.extensions["rcli"].get_taskids_by_taskname_and_args( "geordash.checks.gn_datadir.check_gn_meta", [] ) - return render_template("admin/geonetwork_datadir.html", previous_configs_jobs=all_jobs_for_gnconfigs) + return render_template( + "admin/geonetwork_datadir.html", previous_configs_jobs=all_jobs_for_gnconfigs + ) + @admin_bp.route("/geoserver") @check_role(role="ADMINISTRATOR") diff --git a/geordash/celeryconfig.py.example b/geordash/celeryconfig.py.example index 5add22b..33d634f 100644 --- a/geordash/celeryconfig.py.example +++ b/geordash/celeryconfig.py.example @@ -57,7 +57,7 @@ beat_schedule = { "check-gs-datadir-every-sunday": { "task": "geordash.checks.gsd.gsdatadir", "args": [], - "schedule": crontab(day_of_week='sunday',minute=30, hour=1), + "schedule": crontab(day_of_week="sunday", minute=30, hour=1), }, "check-gn-metadatas-every-night": { "task": "geordash.checks.csw.check_catalog", @@ -70,10 +70,10 @@ beat_schedule = { # "schedule": crontab(minute=0, hour=1), # }, "check-gn-metadatadir-every-night": { - "task": "geordash.checks.gn_datadir.check_gn_meta", - "args": [""], - "schedule": crontab(minute=55, hour=0), - }, + "task": "geordash.checks.gn_datadir.check_gn_meta", + "args": [""], + "schedule": crontab(minute=55, hour=0), + }, } # otherwise scheduled hours is taken as UTC timezone = "Europe/Paris" diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 6046d5e..3a1d906 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -16,8 +16,10 @@ Base = declarative_base() + def get_folder_size(folder): - return sum(file.stat().st_size for file in Path(folder).rglob('*')) + return sum(file.stat().st_size for file in Path(folder).rglob("*")) + class GeonetworkDatadirChecker: def __init__(self, conf): @@ -30,13 +32,18 @@ def __init__(self, conf): database=conf.get("jdbc.database", "geonetwork"), ) - engine = create_engine(url, connect_args={"options": f"-csearch_path={conf.get('jdbc.schema', 'geonetwork')}"}) + engine = create_engine( + url, + connect_args={ + "options": f"-csearch_path={conf.get('jdbc.schema', 'geonetwork')}" + }, + ) self.sessionm = sessionmaker(bind=engine) self.sessiono = self.sessionm() # Perform database reflection to analyze tables and relationships m = MetaData(schema=conf.get("jdbc.schema", "geonetwork")) - Table('metadata', m, autoload_with=engine) + Table("metadata", m, autoload_with=engine) Base = automap_base(metadata=m) Base.prepare() self.Metadata = Base.classes.metadata @@ -52,25 +59,30 @@ def session(self): def get_meta_list(self): return self.session().query(self.Metadata).all() + @shared_task(bind=True) def check_gn_meta(self): get_logger("CheckGNDatadir").debug("Start gn datadir checker") metadatabase = app.extensions["gndc"] gnmetadatas = metadatabase.get_meta_list() - geonetwork_dir_path = app.extensions['conf'].get("geonetwork.dir", "geonetwork") - geonetwork_datadir_path = app.extensions['conf'].get("geonetwork.data.dir", "geonetwork").replace("${geonetwork.dir}", geonetwork_dir_path) + geonetwork_dir_path = app.extensions["conf"].get("geonetwork.dir", "geonetwork") + geonetwork_datadir_path = ( + app.extensions["conf"] + .get("geonetwork.data.dir", "geonetwork") + .replace("${geonetwork.dir}", geonetwork_dir_path) + ) # self.gnmetadatas.sort(key=lambda x: x.id) meta = dict() meta["searching_path"] = geonetwork_datadir_path meta["problems"] = list() total_could_be_deleted = 0 - for foldermeta in glob.glob(geonetwork_datadir_path+"*/*"): + for foldermeta in glob.glob(geonetwork_datadir_path + "*/*"): idmeta = foldermeta.split("/")[-1] subpath = foldermeta.split("/")[-2] get_logger("CheckGNDatadir").debug(foldermeta) existing_index = 0 - for (index, item) in enumerate(gnmetadatas): + for index, item in enumerate(gnmetadatas): if item.id == int(idmeta): existing_index = index break @@ -81,9 +93,11 @@ def check_gn_meta(self): meta["problems"].append( { "type": "UnusedFileRes", - "path": subpath+"/"+idmeta, - "url": subpath+"/"+idmeta, - "problem" : jinja2.Template("{{ bytes | filesizeformat }}").render(bytes=get_folder_size(foldermeta)) + "path": subpath + "/" + idmeta, + "url": subpath + "/" + idmeta, + "problem": jinja2.Template("{{ bytes | filesizeformat }}").render( + bytes=get_folder_size(foldermeta) + ), } ) total_could_be_deleted += get_folder_size(foldermeta) @@ -91,11 +105,16 @@ def check_gn_meta(self): if len(meta["problems"]) > 0: meta["problems"].append( - { - "type": "UnusedFileResTotal", - "path": "Total", - "size": jinja2.Template("{{ bytes | filesizeformat }}").render(bytes=total_could_be_deleted), - "total": jinja2.Template("{{ bytes | filesizeformat }}").render(bytes=get_folder_size(geonetwork_datadir_path)) - }) + { + "type": "UnusedFileResTotal", + "path": "Total", + "size": jinja2.Template("{{ bytes | filesizeformat }}").render( + bytes=total_could_be_deleted + ), + "total": jinja2.Template("{{ bytes | filesizeformat }}").render( + bytes=get_folder_size(geonetwork_datadir_path) + ), + } + ) return meta diff --git a/geordash/dashboard.py b/geordash/dashboard.py index 488fbf7..9e09fac 100644 --- a/geordash/dashboard.py +++ b/geordash/dashboard.py @@ -23,14 +23,17 @@ "dashboard", __name__, url_prefix="/gaia", template_folder="templates/dashboard" ) + def debug_only(f): @wraps(f) def wrapped(**kwargs): if not app.debug: abort(404) return f(**kwargs) + return wrapped + def get_rescontent_from_resid(restype, resid): r = mapstore_get(request, f"rest/geostore/data/{resid}", False) res = dict() @@ -74,11 +77,13 @@ def get_rescontent_from_resid(restype, resid): def home(): return render_template("home.html") + @dash_bp.route("/debug") @debug_only def debug(): return app.extensions["conf"].tostr() + @dash_bp.route("/my-metadata") @check_role(role="GN_EDITOR") def my_metadata(): diff --git a/geordash/georchestraconfig.py b/geordash/georchestraconfig.py index f7f6255..69f22df 100644 --- a/geordash/georchestraconfig.py +++ b/geordash/georchestraconfig.py @@ -8,6 +8,7 @@ import json import re + class GeorchestraConfig: def __init__(self): self.sections = dict() @@ -78,11 +79,17 @@ def tostr(self): str += key + ":\r\n
    " for key2 in self.sections[key]: str += " \t " + key2 + " : " - str+= " \t " + self.sections[key][key2] + " = " + self.get(key2, section=key) + "\r\n
    " + str += ( + " \t " + + self.sections[key][key2] + + " = " + + self.get(key2, section=key) + + "\r\n
    " + ) print(type(str)) print(f"Keys in string: {str}") return str - + def get(self, key, section="default"): if section not in self.sections: return None diff --git a/geordash/views.py b/geordash/views.py index 4920af4..a9eb835 100644 --- a/geordash/views.py +++ b/geordash/views.py @@ -236,6 +236,7 @@ def check_geoserver_datadir(): ) return {"result_id": groupresult.id} + @tasks_bp.route("/check/geoserver/datadir//.json") def check_geoserver_datadir_item(colltype, itemid): gsd = app.extensions["owscache"].get_geoserver_datadir_view() @@ -249,6 +250,7 @@ def check_geoserver_datadir_item(colltype, itemid): result = geordash.checks.gsd.gsdatadir_item.delay(ctype, itemid, None) return {"result_id": result.id} + @tasks_bp.route("/check/ows///.json") def check_owslayer(stype, url, lname): if stype not in ("wms", "wmts", "wfs"): @@ -323,6 +325,7 @@ def check_cswservice(url): ) return {"result_id": groupresult.id} + @tasks_bp.route("/check/gndatadir/result.json") def check_gndatadir(): result = check_gn_meta.delay() From 0b9e74403bf0e2337de698a415187d71b697ed6f Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 29 Oct 2025 16:34:49 +0100 Subject: [PATCH 32/38] use formatter for printing bytes from frontend --- geordash/checks/gn_datadir.py | 15 +++------------ geordash/static/js/script.js | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 3a1d906..b0c21e5 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -92,12 +92,8 @@ def check_gn_meta(self): # append useless folder meta["problems"].append( { - "type": "UnusedFileRes", - "path": subpath + "/" + idmeta, "url": subpath + "/" + idmeta, - "problem": jinja2.Template("{{ bytes | filesizeformat }}").render( - bytes=get_folder_size(foldermeta) - ), + "problem": get_folder_size(foldermeta), } ) total_could_be_deleted += get_folder_size(foldermeta) @@ -107,13 +103,8 @@ def check_gn_meta(self): meta["problems"].append( { "type": "UnusedFileResTotal", - "path": "Total", - "size": jinja2.Template("{{ bytes | filesizeformat }}").render( - bytes=total_could_be_deleted - ), - "total": jinja2.Template("{{ bytes | filesizeformat }}").render( - bytes=get_folder_size(geonetwork_datadir_path) - ), + "size": total_could_be_deleted, + "total": get_folder_size(geonetwork_datadir_path), } ) diff --git a/geordash/static/js/script.js b/geordash/static/js/script.js index e24a2a5..49f70b9 100644 --- a/geordash/static/js/script.js +++ b/geordash/static/js/script.js @@ -325,7 +325,7 @@ const GetPbStr = (p) => { case 'UnusedVectorData': return `VectorData '${p.skey.replaceAll('~','/')}' is unused` case 'UnusedFileResTotal': - return `In total ${p.size} could be saved on ${p.total}` + return `In total ${sizeFormatter(p.size)} could be saved on ${sizeFormatter(p.total)}` default: return `Unhandled error code ${p.type} for problem ${p}` } @@ -517,6 +517,7 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') if (Array.isArray(data["value"]) || Array.isArray(data["value"]['problems'])) { var argtitle = 'Layer' var argcolumn2 = 'Problem' + var columns2Formatter = 'None' if (data['task'].includes('csw')) { argtitle = 'Metadata' } else if (data['task'].includes('check_resources')) { @@ -530,6 +531,7 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') } else if (data['task'].includes('gn_datadir')) { argtitle = 'Path' argcolumn2 = 'Size' + columns2Formatter = 'sizeFormatter' } var prevexp = $(targetpbdivid + '-export') if (prevexp.length > 0) { @@ -556,7 +558,7 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') columns: [ {'title': 'Index', 'formatter': 'runningFormatter'}, {'field': 'url', 'title': argtitle, 'sortable': true, 'formatter': 'urlFormatter'}, - {'field': 'problem', 'title': argcolumn2, 'sortable': true} + {'field': 'problem', 'title': argcolumn2, 'sortable': true, 'formatter': columns2Formatter} ] }); } @@ -619,6 +621,17 @@ function urlFormatter(value, row) { return row.url } } +function bytesToSize(bytes) { + var sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB']; + if (bytes == 0) return 'n/a'; + var i = parseInt(Math.floor(Math.log(bytes) / Math.log(1024))); + if (i == 0) return bytes + ' ' + sizes[i]; + return (bytes / Math.pow(1024, i)).toFixed(1) + ' ' + sizes[i]; +} + +function sizeFormatter(value, row) { + return bytesToSize(value) +} function runningFormatter(value, row, index) { return 1 + index; } From e142a86a3f63c478c14d74d6eb6269b3dee505f1 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 29 Oct 2025 16:49:27 +0100 Subject: [PATCH 33/38] refactor bytes Formatter --- geordash/static/js/script.js | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/geordash/static/js/script.js b/geordash/static/js/script.js index 49f70b9..1916bd0 100644 --- a/geordash/static/js/script.js +++ b/geordash/static/js/script.js @@ -325,7 +325,7 @@ const GetPbStr = (p) => { case 'UnusedVectorData': return `VectorData '${p.skey.replaceAll('~','/')}' is unused` case 'UnusedFileResTotal': - return `In total ${sizeFormatter(p.size)} could be saved on ${sizeFormatter(p.total)}` + return `In total ${bytesFormatter(p.size)} could be saved on ${bytesFormatter(p.total)}` default: return `Unhandled error code ${p.type} for problem ${p}` } @@ -531,7 +531,7 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') } else if (data['task'].includes('gn_datadir')) { argtitle = 'Path' argcolumn2 = 'Size' - columns2Formatter = 'sizeFormatter' + columns2Formatter = 'bytesFormatter' } var prevexp = $(targetpbdivid + '-export') if (prevexp.length > 0) { @@ -621,17 +621,13 @@ function urlFormatter(value, row) { return row.url } } -function bytesToSize(bytes) { +function bytesFormatter(bytes, row="") { var sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB']; if (bytes == 0) return 'n/a'; var i = parseInt(Math.floor(Math.log(bytes) / Math.log(1024))); if (i == 0) return bytes + ' ' + sizes[i]; return (bytes / Math.pow(1024, i)).toFixed(1) + ' ' + sizes[i]; } - -function sizeFormatter(value, row) { - return bytesToSize(value) -} function runningFormatter(value, row, index) { return 1 + index; } From 4822c19fc8991a6e6ca5dcebfabf83143e29d999 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 29 Oct 2025 16:52:54 +0100 Subject: [PATCH 34/38] remove useless import now --- geordash/checks/gn_datadir.py | 1 - 1 file changed, 1 deletion(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index b0c21e5..e9aae5b 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -12,7 +12,6 @@ from sqlalchemy.ext.declarative import declarative_base import glob from pathlib import Path -import jinja2 Base = declarative_base() From 8e6e4e8b9e53aecf8b5c13973b604f6b1c75f2dd Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Wed, 29 Oct 2025 17:11:46 +0100 Subject: [PATCH 35/38] remove useless print for debug --- geordash/georchestraconfig.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/geordash/georchestraconfig.py b/geordash/georchestraconfig.py index 69f22df..2ef268c 100644 --- a/geordash/georchestraconfig.py +++ b/geordash/georchestraconfig.py @@ -86,8 +86,6 @@ def tostr(self): + self.get(key2, section=key) + "\r\n
    " ) - print(type(str)) - print(f"Keys in string: {str}") return str def get(self, key, section="default"): From 412e2c9b53c1d5dd39ffaa9dc9568b8538585a1b Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Thu, 30 Oct 2025 11:14:42 +0100 Subject: [PATCH 36/38] change way to print debug --- geordash/georchestraconfig.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/geordash/georchestraconfig.py b/geordash/georchestraconfig.py index 2ef268c..4e3a5d9 100644 --- a/geordash/georchestraconfig.py +++ b/geordash/georchestraconfig.py @@ -79,13 +79,16 @@ def tostr(self): str += key + ":\r\n
    " for key2 in self.sections[key]: str += " \t " + key2 + " : " - str += ( - " \t " - + self.sections[key][key2] - + " = " - + self.get(key2, section=key) - + "\r\n
    " - ) + if self.sections[key][key2] == self.get(key2, section=key): + str += " \t " + self.sections[key][key2] + "\r\n
    " + else: + str += ( + " \t " + + self.sections[key][key2] + + " = " + + self.get(key2, section=key) + + "\r\n
    " + ) return str def get(self, key, section="default"): From 9344e95f995c563bf20352c0c7e78c1622afbeb0 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel Date: Thu, 30 Oct 2025 11:19:06 +0100 Subject: [PATCH 37/38] remove useless Base --- geordash/checks/gn_datadir.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index e9aae5b..902902a 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -13,8 +13,6 @@ import glob from pathlib import Path -Base = declarative_base() - def get_folder_size(folder): return sum(file.stat().st_size for file in Path(folder).rglob("*")) From de5f08b449e13131a9f6a4a854fb2f60c519e686 Mon Sep 17 00:00:00 2001 From: Jean-Michel Crepel <45998535+jeanmi151@users.noreply.github.com> Date: Thu, 30 Oct 2025 12:06:04 +0100 Subject: [PATCH 38/38] remove import declarative_base gn_datadir.py --- geordash/checks/gn_datadir.py | 1 - 1 file changed, 1 deletion(-) diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py index 902902a..d4a6aba 100644 --- a/geordash/checks/gn_datadir.py +++ b/geordash/checks/gn_datadir.py @@ -9,7 +9,6 @@ from sqlalchemy.ext.automap import automap_base from sqlalchemy.exc import OperationalError from sqlalchemy.orm import sessionmaker -from sqlalchemy.ext.declarative import declarative_base import glob from pathlib import Path