diff --git a/docker/Dockerfile_celery b/docker/Dockerfile_celery index 106c20e..d1d4652 100644 --- a/docker/Dockerfile_celery +++ b/docker/Dockerfile_celery @@ -27,4 +27,4 @@ USER celery COPY . /geordash WORKDIR /geordash -CMD ["sh", "-c", "python3 -m celery -A make_celery worker --loglevel INFO -P solo -B -E -s workdir/celerybeat-schedule -s celerybeat-schedule"] +CMD ["sh", "-c", "python3 -m celery -A make_celery worker --loglevel INFO -P solo -B -E -s workdir/celerybeat-schedule"] diff --git a/docker/Dockerfile_flask b/docker/Dockerfile_flask index c56ade2..b095863 100644 --- a/docker/Dockerfile_flask +++ b/docker/Dockerfile_flask @@ -5,6 +5,7 @@ ENV FLASK_APP=geordash ENV FLASK_OPTS="-h 0.0.0.0 -p 5002" ENV georchestradatadir=/etc/georchestra ENV REDISURL="" +#ENV FLASK_DEBUG=1 # set fixed UID and GID - see github.com/hexops/dockerfile ARG UID=10000 diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index efe279d..cdbae39 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -8,10 +8,12 @@ services: redis: image: "redis:alpine" command: redis-server - environment: - REDIS_REPLICATION_MODE=master + - gaia-back: + # if you want to switch back to official image + # image: "georchestra/gaia:latest" build: context: ../ dockerfile: ./docker/Dockerfile_flask @@ -65,6 +67,8 @@ services: gaia-celery: + # if you want to switch back to official image + # image: "georchestra/gaia-celery:latest" build: context: ../ dockerfile: ./docker/Dockerfile_celery diff --git a/geordash/__init__.py b/geordash/__init__.py index 07eb00f..b252ef1 100644 --- a/geordash/__init__.py +++ b/geordash/__init__.py @@ -11,6 +11,7 @@ from geordash.georchestraconfig import GeorchestraConfig from geordash.result_backend.redisbackend import RedisClient from geordash.checks.mapstore import MapstoreChecker +from geordash.checks.gn_datadir import GeonetworkDatadirChecker from geordash.decorators import is_superuser from config import url as redisurl import threading @@ -86,6 +87,7 @@ def inject_globals(): app.extensions["conf"] = conf app.extensions["owscache"] = OwsCapCache(conf, app) app.extensions["msc"] = MapstoreChecker(conf) + app.extensions["gndc"] = GeonetworkDatadirChecker(conf) app.extensions["rcli"] = RedisClient(redisurl) from . import views, api, admin, dashboard diff --git a/geordash/admin.py b/geordash/admin.py index 3d3c5ee..7703363 100644 --- a/geordash/admin.py +++ b/geordash/admin.py @@ -52,6 +52,17 @@ def geonetwork(): return render_template("admin/geonetwork.html", portals=portals) +@admin_bp.route("/geonetwork/datadir") +@check_role(role="GN_ADMIN") +def geonetwork_datadir(): + all_jobs_for_gnconfigs = app.extensions["rcli"].get_taskids_by_taskname_and_args( + "geordash.checks.gn_datadir.check_gn_meta", [] + ) + return render_template( + "admin/geonetwork_datadir.html", previous_configs_jobs=all_jobs_for_gnconfigs + ) + + @admin_bp.route("/geoserver") @check_role(role="ADMINISTRATOR") def geoserver(): diff --git a/geordash/celeryconfig.py.example b/geordash/celeryconfig.py.example index 3749cba..33d634f 100644 --- a/geordash/celeryconfig.py.example +++ b/geordash/celeryconfig.py.example @@ -11,6 +11,7 @@ imports = ( "geordash.checks.csw", "geordash.checks.mviewer", "geordash.checks.gsd", + "geordash.checks.gn_datadir", ) # worker_pool = solo worker_log_format = ( @@ -56,7 +57,7 @@ beat_schedule = { "check-gs-datadir-every-sunday": { "task": "geordash.checks.gsd.gsdatadir", "args": [], - "schedule": crontab(day_of_week='sunday',minute=30, hour=1), + "schedule": crontab(day_of_week="sunday", minute=30, hour=1), }, "check-gn-metadatas-every-night": { "task": "geordash.checks.csw.check_catalog", @@ -68,6 +69,11 @@ beat_schedule = { # "args": [], # "schedule": crontab(minute=0, hour=1), # }, + "check-gn-metadatadir-every-night": { + "task": "geordash.checks.gn_datadir.check_gn_meta", + "args": [""], + "schedule": crontab(minute=55, hour=0), + }, } # otherwise scheduled hours is taken as UTC timezone = "Europe/Paris" diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py new file mode 100644 index 0000000..d4a6aba --- /dev/null +++ b/geordash/checks/gn_datadir.py @@ -0,0 +1,107 @@ +#!/bin/env python3 +# -*- coding: utf-8 -*- +# vim: ts=4 sw=4 et +from celery import shared_task +from geordash.logwrap import get_logger +from flask import current_app as app +from sqlalchemy import create_engine, MetaData, select, Table +from sqlalchemy.engine import URL +from sqlalchemy.ext.automap import automap_base +from sqlalchemy.exc import OperationalError +from sqlalchemy.orm import sessionmaker +import glob +from pathlib import Path + + +def get_folder_size(folder): + return sum(file.stat().st_size for file in Path(folder).rglob("*")) + + +class GeonetworkDatadirChecker: + def __init__(self, conf): + url = URL.create( + drivername="postgresql", + username=conf.get("jdbc.username", "geonetwork"), + host=conf.get("jdbc.host", "geonetwork"), + port=conf.get("jdbc.port", "geonetwork"), + password=conf.get("jdbc.password", "geonetwork"), + database=conf.get("jdbc.database", "geonetwork"), + ) + + engine = create_engine( + url, + connect_args={ + "options": f"-csearch_path={conf.get('jdbc.schema', 'geonetwork')}" + }, + ) + self.sessionm = sessionmaker(bind=engine) + self.sessiono = self.sessionm() + + # Perform database reflection to analyze tables and relationships + m = MetaData(schema=conf.get("jdbc.schema", "geonetwork")) + Table("metadata", m, autoload_with=engine) + Base = automap_base(metadata=m) + Base.prepare() + self.Metadata = Base.classes.metadata + + def session(self): + try: + self.sessiono.execute(select(1)) + except OperationalError: + print("Reconnecting to the database...") + self.sessiono = self.sessionm() + return self.sessiono + + def get_meta_list(self): + return self.session().query(self.Metadata).all() + + +@shared_task(bind=True) +def check_gn_meta(self): + get_logger("CheckGNDatadir").debug("Start gn datadir checker") + metadatabase = app.extensions["gndc"] + gnmetadatas = metadatabase.get_meta_list() + geonetwork_dir_path = app.extensions["conf"].get("geonetwork.dir", "geonetwork") + geonetwork_datadir_path = ( + app.extensions["conf"] + .get("geonetwork.data.dir", "geonetwork") + .replace("${geonetwork.dir}", geonetwork_dir_path) + ) + # self.gnmetadatas.sort(key=lambda x: x.id) + meta = dict() + meta["searching_path"] = geonetwork_datadir_path + meta["problems"] = list() + total_could_be_deleted = 0 + for foldermeta in glob.glob(geonetwork_datadir_path + "*/*"): + idmeta = foldermeta.split("/")[-1] + subpath = foldermeta.split("/")[-2] + get_logger("CheckGNDatadir").debug(foldermeta) + existing_index = 0 + + for index, item in enumerate(gnmetadatas): + if item.id == int(idmeta): + existing_index = index + break + if existing_index: + continue + else: + # append useless folder + meta["problems"].append( + { + "url": subpath + "/" + idmeta, + "problem": get_folder_size(foldermeta), + } + ) + total_could_be_deleted += get_folder_size(foldermeta) + get_logger("CheckGNDatadir").debug("finish gn datadir checker") + + if len(meta["problems"]) > 0: + meta["problems"].append( + { + "type": "UnusedFileResTotal", + "size": total_could_be_deleted, + "total": get_folder_size(geonetwork_datadir_path), + } + ) + + return meta diff --git a/geordash/dashboard.py b/geordash/dashboard.py index 54a3145..9e09fac 100644 --- a/geordash/dashboard.py +++ b/geordash/dashboard.py @@ -6,6 +6,7 @@ from flask import request, render_template, abort, url_for from flask import current_app as app import requests +from functools import wraps from geordash.decorators import is_superuser, check_role from geordash.checks.mapstore import get_resources_using_ows, get_res @@ -23,6 +24,16 @@ ) +def debug_only(f): + @wraps(f) + def wrapped(**kwargs): + if not app.debug: + abort(404) + return f(**kwargs) + + return wrapped + + def get_rescontent_from_resid(restype, resid): r = mapstore_get(request, f"rest/geostore/data/{resid}", False) res = dict() @@ -67,6 +78,12 @@ def home(): return render_template("home.html") +@dash_bp.route("/debug") +@debug_only +def debug(): + return app.extensions["conf"].tostr() + + @dash_bp.route("/my-metadata") @check_role(role="GN_EDITOR") def my_metadata(): diff --git a/geordash/georchestraconfig.py b/geordash/georchestraconfig.py index 2c38cdd..4e3a5d9 100644 --- a/geordash/georchestraconfig.py +++ b/geordash/georchestraconfig.py @@ -50,6 +50,12 @@ def __init__(self): except: # safe default value self.sections["urls"]["localgs"] = "geoserver" + + with open(f"{self.datadirpath}/geonetwork/geonetwork.properties") as lines: + lines = chain(("[section]",), lines) # This line does the trick. + parser.read_file(lines) + self.sections["geonetwork"] = parser["section"] + # read current commit from .git/HEAD which might lead to the branch tip prefix = getcwd() + "/.git/" self.sections["gaia"] = {"commit": None} @@ -67,6 +73,24 @@ def __init__(self): # failed to read .git/HEAD or .git/refs/heads/* ? pass + def tostr(self): + str = "" + for key in self.sections: + str += key + ":\r\n
" + for key2 in self.sections[key]: + str += " \t " + key2 + " : " + if self.sections[key][key2] == self.get(key2, section=key): + str += " \t " + self.sections[key][key2] + "\r\n
" + else: + str += ( + " \t " + + self.sections[key][key2] + + " = " + + self.get(key2, section=key) + + "\r\n
" + ) + return str + def get(self, key, section="default"): if section not in self.sections: return None diff --git a/geordash/static/js/script.js b/geordash/static/js/script.js index 498b441..1916bd0 100644 --- a/geordash/static/js/script.js +++ b/geordash/static/js/script.js @@ -40,6 +40,40 @@ const fetchForHome = (widgets) => { }) } +const fetchForHomeSingleTask = (widgets) => { + widgets.forEach(function(o) { + fetch(baseurl + '/tasks/lastresultbytask/' + o["taskname"] + "?taskargs=" + o["taskargs"].join(",")) + .then(response => response.json()) + .then(mydata => { + if (parseInt(mydata["finished"])) { + const d = new Date(mydata["finished"] * 1000); + $(o["prefix"] + '-lastupdated').html("Information valid as of "+ d.toLocaleString("fr-FR") + '
(taskid: '+ mydata['taskid'] + ')') + } + if (mydata === "notask") { + $(o["prefix"] + '-abstract').html("no " + o["taskname"] + " job found with args " + o["taskargs"].join(",") + ", something went wrong ?") + return; + } + if (mydata['value'] === null && mydata['ready'] === false) { + $(o["prefix"] + '-abstract').html("job is currently running, " + mydata['completed'] + " objects checked") + return; + } + let str = "
"; + + const nerrors = mydata['value']['problems'].length; + + if (nerrors > 0) { + str += "" + nerrors + " errors found !"; + } else { + str += " no errors !"; + } + $(o["prefix"] + '-abstract').html(str); + }) + .catch(function(err) { + $(o["prefix"] + '-abstract').html("something went wrong") + }); + }) +} + const fetchMyMd = (localgnbaseurl) => { fetch(baseurl + '/api/geonetwork/metadatas.json') .then(response => response.json()) @@ -290,6 +324,8 @@ const GetPbStr = (p) => { return `RasterData '${p.skey.replaceAll('~','/')}' is unused` case 'UnusedVectorData': return `VectorData '${p.skey.replaceAll('~','/')}' is unused` + case 'UnusedFileResTotal': + return `In total ${bytesFormatter(p.size)} could be saved on ${bytesFormatter(p.total)}` default: return `Unhandled error code ${p.type} for problem ${p}` } @@ -446,11 +482,13 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') data["value"].problems = probs.flat(1) } else { // if problems is undef, single task badly failed and returned the python exception as value - if (data["value"].problems !== undefined) { - const probs = data["value"].problems.map(i => { - return GetPbStr(i) - }) - data["value"].problems = probs + if (!data['task'].includes('gn_datadir')) { + if (data["value"].problems !== undefined) { + const probs = data["value"].problems.map(i => { + return GetPbStr(i) + }) + data["value"].problems = probs + } } } if (data["value"].problems !== undefined && data["value"].problems.length > 0) { @@ -466,10 +504,20 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') const missing = all.filter(x => !done.includes(x)); $(targetdivid).text("jobs on " + missing + " failed, did " + data["completed"] + " - on those, " + data["value"].problems.length + ' problems found'); } else { + if (data['task'].includes('gn_datadir')) { + // if gn_datadir will remove last problem as it is a total count + const targetpboverviewdivid = targetdivid.replace('#pbtitle', '#pboverviews') + totalgndatadir = data["value"].problems.pop() + const exporttotalgndatadir = $("
"); + exporttotalgndatadir.html("

"+GetPbStr(totalgndatadir)+" within the path "+data["value"]["searching_path"]+"

" ) + $(targetpboverviewdivid).html(exporttotalgndatadir) + } $(targetdivid).text(data["value"].problems.length + ' problems found'); } - if (Array.isArray(data["value"])) { + if (Array.isArray(data["value"]) || Array.isArray(data["value"]['problems'])) { var argtitle = 'Layer' + var argcolumn2 = 'Problem' + var columns2Formatter = 'None' if (data['task'].includes('csw')) { argtitle = 'Metadata' } else if (data['task'].includes('check_resources')) { @@ -480,6 +528,10 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') argtitle = 'Config url' } else if (data['task'].includes('gsd.gsdatadir')) { argtitle = 'Item' + } else if (data['task'].includes('gn_datadir')) { + argtitle = 'Path' + argcolumn2 = 'Size' + columns2Formatter = 'bytesFormatter' } var prevexp = $(targetpbdivid + '-export') if (prevexp.length > 0) { @@ -506,7 +558,7 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') columns: [ {'title': 'Index', 'formatter': 'runningFormatter'}, {'field': 'url', 'title': argtitle, 'sortable': true, 'formatter': 'urlFormatter'}, - {'field': 'problem', 'title': 'Problem', 'sortable': true} + {'field': 'problem', 'title': argcolumn2, 'sortable': true, 'formatter': columns2Formatter} ] }); } @@ -529,6 +581,10 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') $(targetdivid).html('No problemo!') } $(targetpbdivid).empty(); + if (data['task'].includes('gn_datadir')) { + const targetpboverviewdivid = targetdivid.replace('#pbtitle', '#pboverviews'); + $(targetpboverviewdivid).empty(); + } } const d = new Date(data["finished"] * 1000); $(targetpbdetdivid).text('vérification faite le '+ d.toLocaleString("fr-FR")); @@ -565,6 +621,13 @@ function urlFormatter(value, row) { return row.url } } +function bytesFormatter(bytes, row="") { + var sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB']; + if (bytes == 0) return 'n/a'; + var i = parseInt(Math.floor(Math.log(bytes) / Math.log(1024))); + if (i == 0) return bytes + ' ' + sizes[i]; + return (bytes / Math.pow(1024, i)).toFixed(1) + ' ' + sizes[i]; +} function runningFormatter(value, row, index) { return 1 + index; } diff --git a/geordash/templates/admin/geonetwork_datadir.html b/geordash/templates/admin/geonetwork_datadir.html new file mode 100644 index 0000000..81b3832 --- /dev/null +++ b/geordash/templates/admin/geonetwork_datadir.html @@ -0,0 +1,42 @@ +{% set title = 'admin geonetwork' %} +{% include 'header.tmpl' %} + + {% block scripts %} + + {% endblock %} + + + {% block content %} +
+

Geonetwork datadir Checker

+
+ + +
+

+
+Leftover dirs from removed metadatas: +
+ +

Problems

+
+
+
+

Previous

+

+
+ {% endblock %} + + diff --git a/geordash/templates/dashboard/header.tmpl b/geordash/templates/dashboard/header.tmpl index 4c21d42..920a24c 100644 --- a/geordash/templates/dashboard/header.tmpl +++ b/geordash/templates/dashboard/header.tmpl @@ -97,6 +97,7 @@ Non connecté
  • {% if superuser %} {{ render_dropdown_item('dashboard.admin.geonetwork', 'Liste des sous-portails') }} + {{ render_dropdown_item('dashboard.admin.geonetwork_datadir', 'Datadir checker') }} {% endif %} diff --git a/geordash/templates/dashboard/home.html b/geordash/templates/dashboard/home.html index aaa6f27..33cc327 100644 --- a/geordash/templates/dashboard/home.html +++ b/geordash/templates/dashboard/home.html @@ -15,6 +15,11 @@ {"prefix": "#gs-datadir", "taskname": "geordash.checks.gsd.gsdatadir","taskargs": []}, {% endif %} ]); +{% if superuser %} + fetchForHomeSingleTask([ + {"prefix": "#gndc", "taskname": "geordash.checks.gn_datadir.check_gn_meta","taskargs": []}, + ]); +{% endif %} {% if 'sec-roles' not in request.headers or ( 'sec-roles' in request.headers and 'MAPSTORE_ADMIN' not in request.headers['sec-roles'] ) %} fetchMapsAndCtxCount('{{ url_for('dashboard.my_maps_and_apps') }}'); {% endif %} @@ -103,7 +108,20 @@
    Mapstore contexts
    - +{% if superuser %} +
    + +
    +
    +
    Geonetwork Datadir Checker
    +

    Placeholder

    +

    Job never ran

    +
    +
    +
    +
    +{% endif %} + {% endblock %} diff --git a/geordash/views.py b/geordash/views.py index fdf5d59..a9eb835 100644 --- a/geordash/views.py +++ b/geordash/views.py @@ -12,6 +12,7 @@ from geordash.utils import unmunge from geordash.checks.mapstore import check_res, check_configs, check_resources +from geordash.checks.gn_datadir import check_gn_meta from geordash.tasks.fetch_csw import get_records from geordash.tasks.gsdatadir import parse_gsdatadir import geordash.checks.ows @@ -40,6 +41,7 @@ def result(id: str) -> dict[str, object]: "geordash.checks.ows.owsservice", "geordash.checks.csw.check_catalog", "geordash.checks.gsd.gsdatadir", + "geordash.checks.gn_datadir", ): # print(f"real taskset id is {result.result[0][0]}") result = GroupResult.restore(result.result[0][0]) @@ -234,6 +236,7 @@ def check_geoserver_datadir(): ) return {"result_id": groupresult.id} + @tasks_bp.route("/check/geoserver/datadir//.json") def check_geoserver_datadir_item(colltype, itemid): gsd = app.extensions["owscache"].get_geoserver_datadir_view() @@ -247,6 +250,7 @@ def check_geoserver_datadir_item(colltype, itemid): result = geordash.checks.gsd.gsdatadir_item.delay(ctype, itemid, None) return {"result_id": result.id} + @tasks_bp.route("/check/ows///.json") def check_owslayer(stype, url, lname): if stype not in ("wms", "wmts", "wfs"): @@ -320,3 +324,9 @@ def check_cswservice(url): "geordash.checks.csw.check_catalog", [url], groupresult.id ) return {"result_id": groupresult.id} + + +@tasks_bp.route("/check/gndatadir/result.json") +def check_gndatadir(): + result = check_gn_meta.delay() + return {"result_id": result.id}