diff --git a/docker/Dockerfile_celery b/docker/Dockerfile_celery
index 106c20e..d1d4652 100644
--- a/docker/Dockerfile_celery
+++ b/docker/Dockerfile_celery
@@ -27,4 +27,4 @@ USER celery
COPY . /geordash
WORKDIR /geordash
-CMD ["sh", "-c", "python3 -m celery -A make_celery worker --loglevel INFO -P solo -B -E -s workdir/celerybeat-schedule -s celerybeat-schedule"]
+CMD ["sh", "-c", "python3 -m celery -A make_celery worker --loglevel INFO -P solo -B -E -s workdir/celerybeat-schedule"]
diff --git a/docker/Dockerfile_flask b/docker/Dockerfile_flask
index c56ade2..b095863 100644
--- a/docker/Dockerfile_flask
+++ b/docker/Dockerfile_flask
@@ -5,6 +5,7 @@ ENV FLASK_APP=geordash
ENV FLASK_OPTS="-h 0.0.0.0 -p 5002"
ENV georchestradatadir=/etc/georchestra
ENV REDISURL=""
+#ENV FLASK_DEBUG=1
# set fixed UID and GID - see github.com/hexops/dockerfile
ARG UID=10000
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index efe279d..cdbae39 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -8,10 +8,12 @@ services:
redis:
image: "redis:alpine"
command: redis-server
-
environment:
- REDIS_REPLICATION_MODE=master
+ -
gaia-back:
+ # if you want to switch back to official image
+ # image: "georchestra/gaia:latest"
build:
context: ../
dockerfile: ./docker/Dockerfile_flask
@@ -65,6 +67,8 @@ services:
gaia-celery:
+ # if you want to switch back to official image
+ # image: "georchestra/gaia-celery:latest"
build:
context: ../
dockerfile: ./docker/Dockerfile_celery
diff --git a/geordash/__init__.py b/geordash/__init__.py
index 07eb00f..b252ef1 100644
--- a/geordash/__init__.py
+++ b/geordash/__init__.py
@@ -11,6 +11,7 @@
from geordash.georchestraconfig import GeorchestraConfig
from geordash.result_backend.redisbackend import RedisClient
from geordash.checks.mapstore import MapstoreChecker
+from geordash.checks.gn_datadir import GeonetworkDatadirChecker
from geordash.decorators import is_superuser
from config import url as redisurl
import threading
@@ -86,6 +87,7 @@ def inject_globals():
app.extensions["conf"] = conf
app.extensions["owscache"] = OwsCapCache(conf, app)
app.extensions["msc"] = MapstoreChecker(conf)
+ app.extensions["gndc"] = GeonetworkDatadirChecker(conf)
app.extensions["rcli"] = RedisClient(redisurl)
from . import views, api, admin, dashboard
diff --git a/geordash/admin.py b/geordash/admin.py
index 3d3c5ee..7703363 100644
--- a/geordash/admin.py
+++ b/geordash/admin.py
@@ -52,6 +52,17 @@ def geonetwork():
return render_template("admin/geonetwork.html", portals=portals)
+@admin_bp.route("/geonetwork/datadir")
+@check_role(role="GN_ADMIN")
+def geonetwork_datadir():
+ all_jobs_for_gnconfigs = app.extensions["rcli"].get_taskids_by_taskname_and_args(
+ "geordash.checks.gn_datadir.check_gn_meta", []
+ )
+ return render_template(
+ "admin/geonetwork_datadir.html", previous_configs_jobs=all_jobs_for_gnconfigs
+ )
+
+
@admin_bp.route("/geoserver")
@check_role(role="ADMINISTRATOR")
def geoserver():
diff --git a/geordash/celeryconfig.py.example b/geordash/celeryconfig.py.example
index 3749cba..33d634f 100644
--- a/geordash/celeryconfig.py.example
+++ b/geordash/celeryconfig.py.example
@@ -11,6 +11,7 @@ imports = (
"geordash.checks.csw",
"geordash.checks.mviewer",
"geordash.checks.gsd",
+ "geordash.checks.gn_datadir",
)
# worker_pool = solo
worker_log_format = (
@@ -56,7 +57,7 @@ beat_schedule = {
"check-gs-datadir-every-sunday": {
"task": "geordash.checks.gsd.gsdatadir",
"args": [],
- "schedule": crontab(day_of_week='sunday',minute=30, hour=1),
+ "schedule": crontab(day_of_week="sunday", minute=30, hour=1),
},
"check-gn-metadatas-every-night": {
"task": "geordash.checks.csw.check_catalog",
@@ -68,6 +69,11 @@ beat_schedule = {
# "args": [],
# "schedule": crontab(minute=0, hour=1),
# },
+ "check-gn-metadatadir-every-night": {
+ "task": "geordash.checks.gn_datadir.check_gn_meta",
+ "args": [""],
+ "schedule": crontab(minute=55, hour=0),
+ },
}
# otherwise scheduled hours is taken as UTC
timezone = "Europe/Paris"
diff --git a/geordash/checks/gn_datadir.py b/geordash/checks/gn_datadir.py
new file mode 100644
index 0000000..d4a6aba
--- /dev/null
+++ b/geordash/checks/gn_datadir.py
@@ -0,0 +1,107 @@
+#!/bin/env python3
+# -*- coding: utf-8 -*-
+# vim: ts=4 sw=4 et
+from celery import shared_task
+from geordash.logwrap import get_logger
+from flask import current_app as app
+from sqlalchemy import create_engine, MetaData, select, Table
+from sqlalchemy.engine import URL
+from sqlalchemy.ext.automap import automap_base
+from sqlalchemy.exc import OperationalError
+from sqlalchemy.orm import sessionmaker
+import glob
+from pathlib import Path
+
+
+def get_folder_size(folder):
+ return sum(file.stat().st_size for file in Path(folder).rglob("*"))
+
+
+class GeonetworkDatadirChecker:
+ def __init__(self, conf):
+ url = URL.create(
+ drivername="postgresql",
+ username=conf.get("jdbc.username", "geonetwork"),
+ host=conf.get("jdbc.host", "geonetwork"),
+ port=conf.get("jdbc.port", "geonetwork"),
+ password=conf.get("jdbc.password", "geonetwork"),
+ database=conf.get("jdbc.database", "geonetwork"),
+ )
+
+ engine = create_engine(
+ url,
+ connect_args={
+ "options": f"-csearch_path={conf.get('jdbc.schema', 'geonetwork')}"
+ },
+ )
+ self.sessionm = sessionmaker(bind=engine)
+ self.sessiono = self.sessionm()
+
+ # Perform database reflection to analyze tables and relationships
+ m = MetaData(schema=conf.get("jdbc.schema", "geonetwork"))
+ Table("metadata", m, autoload_with=engine)
+ Base = automap_base(metadata=m)
+ Base.prepare()
+ self.Metadata = Base.classes.metadata
+
+ def session(self):
+ try:
+ self.sessiono.execute(select(1))
+ except OperationalError:
+ print("Reconnecting to the database...")
+ self.sessiono = self.sessionm()
+ return self.sessiono
+
+ def get_meta_list(self):
+ return self.session().query(self.Metadata).all()
+
+
+@shared_task(bind=True)
+def check_gn_meta(self):
+ get_logger("CheckGNDatadir").debug("Start gn datadir checker")
+ metadatabase = app.extensions["gndc"]
+ gnmetadatas = metadatabase.get_meta_list()
+ geonetwork_dir_path = app.extensions["conf"].get("geonetwork.dir", "geonetwork")
+ geonetwork_datadir_path = (
+ app.extensions["conf"]
+ .get("geonetwork.data.dir", "geonetwork")
+ .replace("${geonetwork.dir}", geonetwork_dir_path)
+ )
+ # self.gnmetadatas.sort(key=lambda x: x.id)
+ meta = dict()
+ meta["searching_path"] = geonetwork_datadir_path
+ meta["problems"] = list()
+ total_could_be_deleted = 0
+ for foldermeta in glob.glob(geonetwork_datadir_path + "*/*"):
+ idmeta = foldermeta.split("/")[-1]
+ subpath = foldermeta.split("/")[-2]
+ get_logger("CheckGNDatadir").debug(foldermeta)
+ existing_index = 0
+
+ for index, item in enumerate(gnmetadatas):
+ if item.id == int(idmeta):
+ existing_index = index
+ break
+ if existing_index:
+ continue
+ else:
+ # append useless folder
+ meta["problems"].append(
+ {
+ "url": subpath + "/" + idmeta,
+ "problem": get_folder_size(foldermeta),
+ }
+ )
+ total_could_be_deleted += get_folder_size(foldermeta)
+ get_logger("CheckGNDatadir").debug("finish gn datadir checker")
+
+ if len(meta["problems"]) > 0:
+ meta["problems"].append(
+ {
+ "type": "UnusedFileResTotal",
+ "size": total_could_be_deleted,
+ "total": get_folder_size(geonetwork_datadir_path),
+ }
+ )
+
+ return meta
diff --git a/geordash/dashboard.py b/geordash/dashboard.py
index 54a3145..9e09fac 100644
--- a/geordash/dashboard.py
+++ b/geordash/dashboard.py
@@ -6,6 +6,7 @@
from flask import request, render_template, abort, url_for
from flask import current_app as app
import requests
+from functools import wraps
from geordash.decorators import is_superuser, check_role
from geordash.checks.mapstore import get_resources_using_ows, get_res
@@ -23,6 +24,16 @@
)
+def debug_only(f):
+ @wraps(f)
+ def wrapped(**kwargs):
+ if not app.debug:
+ abort(404)
+ return f(**kwargs)
+
+ return wrapped
+
+
def get_rescontent_from_resid(restype, resid):
r = mapstore_get(request, f"rest/geostore/data/{resid}", False)
res = dict()
@@ -67,6 +78,12 @@ def home():
return render_template("home.html")
+@dash_bp.route("/debug")
+@debug_only
+def debug():
+ return app.extensions["conf"].tostr()
+
+
@dash_bp.route("/my-metadata")
@check_role(role="GN_EDITOR")
def my_metadata():
diff --git a/geordash/georchestraconfig.py b/geordash/georchestraconfig.py
index 2c38cdd..4e3a5d9 100644
--- a/geordash/georchestraconfig.py
+++ b/geordash/georchestraconfig.py
@@ -50,6 +50,12 @@ def __init__(self):
except:
# safe default value
self.sections["urls"]["localgs"] = "geoserver"
+
+ with open(f"{self.datadirpath}/geonetwork/geonetwork.properties") as lines:
+ lines = chain(("[section]",), lines) # This line does the trick.
+ parser.read_file(lines)
+ self.sections["geonetwork"] = parser["section"]
+
# read current commit from .git/HEAD which might lead to the branch tip
prefix = getcwd() + "/.git/"
self.sections["gaia"] = {"commit": None}
@@ -67,6 +73,24 @@ def __init__(self):
# failed to read .git/HEAD or .git/refs/heads/* ?
pass
+ def tostr(self):
+ str = ""
+ for key in self.sections:
+ str += key + ":\r\n
"
+ for key2 in self.sections[key]:
+ str += " \t " + key2 + " : "
+ if self.sections[key][key2] == self.get(key2, section=key):
+ str += " \t " + self.sections[key][key2] + "\r\n
"
+ else:
+ str += (
+ " \t "
+ + self.sections[key][key2]
+ + " = "
+ + self.get(key2, section=key)
+ + "\r\n
"
+ )
+ return str
+
def get(self, key, section="default"):
if section not in self.sections:
return None
diff --git a/geordash/static/js/script.js b/geordash/static/js/script.js
index 498b441..1916bd0 100644
--- a/geordash/static/js/script.js
+++ b/geordash/static/js/script.js
@@ -40,6 +40,40 @@ const fetchForHome = (widgets) => {
})
}
+const fetchForHomeSingleTask = (widgets) => {
+ widgets.forEach(function(o) {
+ fetch(baseurl + '/tasks/lastresultbytask/' + o["taskname"] + "?taskargs=" + o["taskargs"].join(","))
+ .then(response => response.json())
+ .then(mydata => {
+ if (parseInt(mydata["finished"])) {
+ const d = new Date(mydata["finished"] * 1000);
+ $(o["prefix"] + '-lastupdated').html("Information valid as of "+ d.toLocaleString("fr-FR") + '
(taskid: '+ mydata['taskid'] + ')')
+ }
+ if (mydata === "notask") {
+ $(o["prefix"] + '-abstract').html("no " + o["taskname"] + " job found with args " + o["taskargs"].join(",") + ", something went wrong ?")
+ return;
+ }
+ if (mydata['value'] === null && mydata['ready'] === false) {
+ $(o["prefix"] + '-abstract').html("job is currently running, " + mydata['completed'] + " objects checked")
+ return;
+ }
+ let str = "
";
+
+ const nerrors = mydata['value']['problems'].length;
+
+ if (nerrors > 0) {
+ str += "" + nerrors + " errors found !";
+ } else {
+ str += " no errors !";
+ }
+ $(o["prefix"] + '-abstract').html(str);
+ })
+ .catch(function(err) {
+ $(o["prefix"] + '-abstract').html("something went wrong")
+ });
+ })
+}
+
const fetchMyMd = (localgnbaseurl) => {
fetch(baseurl + '/api/geonetwork/metadatas.json')
.then(response => response.json())
@@ -290,6 +324,8 @@ const GetPbStr = (p) => {
return `RasterData '${p.skey.replaceAll('~','/')}' is unused`
case 'UnusedVectorData':
return `VectorData '${p.skey.replaceAll('~','/')}' is unused`
+ case 'UnusedFileResTotal':
+ return `In total ${bytesFormatter(p.size)} could be saved on ${bytesFormatter(p.total)}`
default:
return `Unhandled error code ${p.type} for problem ${p}`
}
@@ -446,11 +482,13 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle')
data["value"].problems = probs.flat(1)
} else {
// if problems is undef, single task badly failed and returned the python exception as value
- if (data["value"].problems !== undefined) {
- const probs = data["value"].problems.map(i => {
- return GetPbStr(i)
- })
- data["value"].problems = probs
+ if (!data['task'].includes('gn_datadir')) {
+ if (data["value"].problems !== undefined) {
+ const probs = data["value"].problems.map(i => {
+ return GetPbStr(i)
+ })
+ data["value"].problems = probs
+ }
}
}
if (data["value"].problems !== undefined && data["value"].problems.length > 0) {
@@ -466,10 +504,20 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle')
const missing = all.filter(x => !done.includes(x));
$(targetdivid).text("jobs on " + missing + " failed, did " + data["completed"] + " - on those, " + data["value"].problems.length + ' problems found');
} else {
+ if (data['task'].includes('gn_datadir')) {
+ // if gn_datadir will remove last problem as it is a total count
+ const targetpboverviewdivid = targetdivid.replace('#pbtitle', '#pboverviews')
+ totalgndatadir = data["value"].problems.pop()
+ const exporttotalgndatadir = $("
"+GetPbStr(totalgndatadir)+" within the path "+data["value"]["searching_path"]+"
" ) + $(targetpboverviewdivid).html(exporttotalgndatadir) + } $(targetdivid).text(data["value"].problems.length + ' problems found'); } - if (Array.isArray(data["value"])) { + if (Array.isArray(data["value"]) || Array.isArray(data["value"]['problems'])) { var argtitle = 'Layer' + var argcolumn2 = 'Problem' + var columns2Formatter = 'None' if (data['task'].includes('csw')) { argtitle = 'Metadata' } else if (data['task'].includes('check_resources')) { @@ -480,6 +528,10 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') argtitle = 'Config url' } else if (data['task'].includes('gsd.gsdatadir')) { argtitle = 'Item' + } else if (data['task'].includes('gn_datadir')) { + argtitle = 'Path' + argcolumn2 = 'Size' + columns2Formatter = 'bytesFormatter' } var prevexp = $(targetpbdivid + '-export') if (prevexp.length > 0) { @@ -506,7 +558,7 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') columns: [ {'title': 'Index', 'formatter': 'runningFormatter'}, {'field': 'url', 'title': argtitle, 'sortable': true, 'formatter': 'urlFormatter'}, - {'field': 'problem', 'title': 'Problem', 'sortable': true} + {'field': 'problem', 'title': argcolumn2, 'sortable': true, 'formatter': columns2Formatter} ] }); } @@ -529,6 +581,10 @@ const PollTaskRes = (type, resid, taskid, showdelete, targetdivid = '#pbtitle') $(targetdivid).html('No problemo!') } $(targetpbdivid).empty(); + if (data['task'].includes('gn_datadir')) { + const targetpboverviewdivid = targetdivid.replace('#pbtitle', '#pboverviews'); + $(targetpboverviewdivid).empty(); + } } const d = new Date(data["finished"] * 1000); $(targetpbdetdivid).text('vérification faite le '+ d.toLocaleString("fr-FR")); @@ -565,6 +621,13 @@ function urlFormatter(value, row) { return row.url } } +function bytesFormatter(bytes, row="") { + var sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB']; + if (bytes == 0) return 'n/a'; + var i = parseInt(Math.floor(Math.log(bytes) / Math.log(1024))); + if (i == 0) return bytes + ' ' + sizes[i]; + return (bytes / Math.pow(1024, i)).toFixed(1) + ' ' + sizes[i]; +} function runningFormatter(value, row, index) { return 1 + index; } diff --git a/geordash/templates/admin/geonetwork_datadir.html b/geordash/templates/admin/geonetwork_datadir.html new file mode 100644 index 0000000..81b3832 --- /dev/null +++ b/geordash/templates/admin/geonetwork_datadir.html @@ -0,0 +1,42 @@ +{% set title = 'admin geonetwork' %} +{% include 'header.tmpl' %} + + {% block scripts %} + + {% endblock %} + + + {% block content %} +