Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
8143ed2
first tries for geonetwork datadir checker useless ressources
jeanmi151 May 27, 2025
4118c63
continue work for adding geonetwork datadir check inside gaia
jeanmi151 Jun 25, 2025
711c282
feat(datadir): some desgin to show result of datadir check
mmohadIGN Jun 26, 2025
09587e1
add debug function entry point
jeanmi151 Jun 26, 2025
ee47cdd
add back the build in docker compose
jeanmi151 Jun 26, 2025
1fa5c5d
rework WIP as a task the geonetwork datadir checker
jeanmi151 Aug 6, 2025
a726891
continue but still not working
jeanmi151 Aug 6, 2025
20971c2
continue testing
jeanmi151 Sep 3, 2025
7902e43
finaly something working
jeanmi151 Sep 3, 2025
038ab38
make dynamic folder search and change home dashboard
jeanmi151 Sep 3, 2025
c9848a0
add total count
jeanmi151 Sep 3, 2025
a471ce2
revert wrong merge
jeanmi151 Sep 3, 2025
ea4ef11
continue test and dev, take care about reviews
jeanmi151 Sep 11, 2025
57c1f49
make geonetwork datadir dynamic + filesizeformat using jinja2
jeanmi151 Sep 12, 2025
4b71648
activate debug route only if debug is ON
jeanmi151 Sep 12, 2025
2fc440f
cleaning before ready for reviews
jeanmi151 Sep 12, 2025
1818d19
cleaning before ready for reviews2
jeanmi151 Sep 12, 2025
87fd6db
taking review into account
jeanmi151 Sep 24, 2025
a6bc3b5
correct database arguments variables
jeanmi151 Oct 20, 2025
503f7c1
correct docker file and compose
jeanmi151 Oct 22, 2025
e6150ea
working way to request database without providing the Metadata class
jeanmi151 Oct 22, 2025
1aab1e6
move the query to get all metadata for dynamic updates
jeanmi151 Oct 23, 2025
60ae883
Merge branch 'master' into datadir_gn_checker
jeanmi151 Oct 23, 2025
880b596
result of gn checker in beautifull table
jeanmi151 Oct 23, 2025
bab4179
changing the path printed
jeanmi151 Oct 27, 2025
1f339ac
removing UnusedFileRes from GetPbStr
jeanmi151 Oct 27, 2025
5746c68
changing total path count
jeanmi151 Oct 29, 2025
ee317d6
taking reviews into account
jeanmi151 Oct 29, 2025
b3914f7
reset pboverviews when there is no problemo!
jeanmi151 Oct 29, 2025
154b395
remove useless imports
jeanmi151 Oct 29, 2025
ab94d92
name_for_collection_relationship drop
jeanmi151 Oct 29, 2025
8587893
run black tool in all files
jeanmi151 Oct 29, 2025
0b9e744
use formatter for printing bytes from frontend
jeanmi151 Oct 29, 2025
e142a86
refactor bytes Formatter
jeanmi151 Oct 29, 2025
4822c19
remove useless import now
jeanmi151 Oct 29, 2025
8e6e4e8
remove useless print for debug
jeanmi151 Oct 29, 2025
412e2c9
change way to print debug
jeanmi151 Oct 30, 2025
9344e95
remove useless Base
jeanmi151 Oct 30, 2025
de5f08b
remove import declarative_base gn_datadir.py
jeanmi151 Oct 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/Dockerfile_celery
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ USER celery
COPY . /geordash
WORKDIR /geordash

CMD ["sh", "-c", "python3 -m celery -A make_celery worker --loglevel INFO -P solo -B -E -s workdir/celerybeat-schedule -s celerybeat-schedule"]
CMD ["sh", "-c", "python3 -m celery -A make_celery worker --loglevel INFO -P solo -B -E -s workdir/celerybeat-schedule"]
1 change: 1 addition & 0 deletions docker/Dockerfile_flask
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ ENV FLASK_APP=geordash
ENV FLASK_OPTS="-h 0.0.0.0 -p 5002"
ENV georchestradatadir=/etc/georchestra
ENV REDISURL=""
#ENV FLASK_DEBUG=1

# set fixed UID and GID - see github.com/hexops/dockerfile
ARG UID=10000
Expand Down
6 changes: 5 additions & 1 deletion docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ services:
redis:
image: "redis:alpine"
command: redis-server

environment:
- REDIS_REPLICATION_MODE=master
-
gaia-back:
# if you want to switch back to official image
# image: "georchestra/gaia:latest"
build:
context: ../
dockerfile: ./docker/Dockerfile_flask
Expand Down Expand Up @@ -65,6 +67,8 @@ services:


gaia-celery:
# if you want to switch back to official image
# image: "georchestra/gaia-celery:latest"
build:
context: ../
dockerfile: ./docker/Dockerfile_celery
Expand Down
2 changes: 2 additions & 0 deletions geordash/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from geordash.georchestraconfig import GeorchestraConfig
from geordash.result_backend.redisbackend import RedisClient
from geordash.checks.mapstore import MapstoreChecker
from geordash.checks.gn_datadir import GeonetworkDatadirChecker
from geordash.decorators import is_superuser
from config import url as redisurl
import threading
Expand Down Expand Up @@ -86,6 +87,7 @@ def inject_globals():
app.extensions["conf"] = conf
app.extensions["owscache"] = OwsCapCache(conf, app)
app.extensions["msc"] = MapstoreChecker(conf)
app.extensions["gndc"] = GeonetworkDatadirChecker(conf)
app.extensions["rcli"] = RedisClient(redisurl)
from . import views, api, admin, dashboard

Expand Down
11 changes: 11 additions & 0 deletions geordash/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ def geonetwork():
return render_template("admin/geonetwork.html", portals=portals)


@admin_bp.route("/geonetwork/datadir")
@check_role(role="GN_ADMIN")
def geonetwork_datadir():
all_jobs_for_gnconfigs = app.extensions["rcli"].get_taskids_by_taskname_and_args(
"geordash.checks.gn_datadir.check_gn_meta", []
)
return render_template(
"admin/geonetwork_datadir.html", previous_configs_jobs=all_jobs_for_gnconfigs
)


@admin_bp.route("/geoserver")
@check_role(role="ADMINISTRATOR")
def geoserver():
Expand Down
8 changes: 7 additions & 1 deletion geordash/celeryconfig.py.example
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ imports = (
"geordash.checks.csw",
"geordash.checks.mviewer",
"geordash.checks.gsd",
"geordash.checks.gn_datadir",
)
# worker_pool = solo
worker_log_format = (
Expand Down Expand Up @@ -56,7 +57,7 @@ beat_schedule = {
"check-gs-datadir-every-sunday": {
"task": "geordash.checks.gsd.gsdatadir",
"args": [],
"schedule": crontab(day_of_week='sunday',minute=30, hour=1),
"schedule": crontab(day_of_week="sunday", minute=30, hour=1),
},
"check-gn-metadatas-every-night": {
"task": "geordash.checks.csw.check_catalog",
Expand All @@ -68,6 +69,11 @@ beat_schedule = {
# "args": [],
# "schedule": crontab(minute=0, hour=1),
# },
"check-gn-metadatadir-every-night": {
"task": "geordash.checks.gn_datadir.check_gn_meta",
"args": [""],
"schedule": crontab(minute=55, hour=0),
},
}
# otherwise scheduled hours is taken as UTC
timezone = "Europe/Paris"
107 changes: 107 additions & 0 deletions geordash/checks/gn_datadir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/bin/env python3
# -*- coding: utf-8 -*-
# vim: ts=4 sw=4 et
from celery import shared_task
from geordash.logwrap import get_logger
from flask import current_app as app
from sqlalchemy import create_engine, MetaData, select, Table
from sqlalchemy.engine import URL
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.exc import OperationalError
from sqlalchemy.orm import sessionmaker
import glob
from pathlib import Path


def get_folder_size(folder):
return sum(file.stat().st_size for file in Path(folder).rglob("*"))


class GeonetworkDatadirChecker:
def __init__(self, conf):
url = URL.create(
drivername="postgresql",
username=conf.get("jdbc.username", "geonetwork"),
host=conf.get("jdbc.host", "geonetwork"),
port=conf.get("jdbc.port", "geonetwork"),
password=conf.get("jdbc.password", "geonetwork"),
database=conf.get("jdbc.database", "geonetwork"),
)

engine = create_engine(
url,
connect_args={
"options": f"-csearch_path={conf.get('jdbc.schema', 'geonetwork')}"
},
)
self.sessionm = sessionmaker(bind=engine)
self.sessiono = self.sessionm()

# Perform database reflection to analyze tables and relationships
m = MetaData(schema=conf.get("jdbc.schema", "geonetwork"))
Table("metadata", m, autoload_with=engine)
Base = automap_base(metadata=m)
Base.prepare()
self.Metadata = Base.classes.metadata

def session(self):
try:
self.sessiono.execute(select(1))
except OperationalError:
print("Reconnecting to the database...")
self.sessiono = self.sessionm()
return self.sessiono

def get_meta_list(self):
return self.session().query(self.Metadata).all()


@shared_task(bind=True)
def check_gn_meta(self):
get_logger("CheckGNDatadir").debug("Start gn datadir checker")
metadatabase = app.extensions["gndc"]
gnmetadatas = metadatabase.get_meta_list()
geonetwork_dir_path = app.extensions["conf"].get("geonetwork.dir", "geonetwork")
geonetwork_datadir_path = (
app.extensions["conf"]
.get("geonetwork.data.dir", "geonetwork")
.replace("${geonetwork.dir}", geonetwork_dir_path)
)
# self.gnmetadatas.sort(key=lambda x: x.id)
meta = dict()
meta["searching_path"] = geonetwork_datadir_path
meta["problems"] = list()
total_could_be_deleted = 0
for foldermeta in glob.glob(geonetwork_datadir_path + "*/*"):
idmeta = foldermeta.split("/")[-1]
subpath = foldermeta.split("/")[-2]
get_logger("CheckGNDatadir").debug(foldermeta)
existing_index = 0

for index, item in enumerate(gnmetadatas):
if item.id == int(idmeta):
existing_index = index
break
if existing_index:
continue
else:
# append useless folder
meta["problems"].append(
{
"url": subpath + "/" + idmeta,
"problem": get_folder_size(foldermeta),
}
)
total_could_be_deleted += get_folder_size(foldermeta)
get_logger("CheckGNDatadir").debug("finish gn datadir checker")

if len(meta["problems"]) > 0:
meta["problems"].append(
{
"type": "UnusedFileResTotal",
"size": total_could_be_deleted,
"total": get_folder_size(geonetwork_datadir_path),
}
)

return meta
17 changes: 17 additions & 0 deletions geordash/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from flask import request, render_template, abort, url_for
from flask import current_app as app
import requests
from functools import wraps

from geordash.decorators import is_superuser, check_role
from geordash.checks.mapstore import get_resources_using_ows, get_res
Expand All @@ -23,6 +24,16 @@
)


def debug_only(f):
@wraps(f)
def wrapped(**kwargs):
if not app.debug:
abort(404)
return f(**kwargs)

return wrapped


def get_rescontent_from_resid(restype, resid):
r = mapstore_get(request, f"rest/geostore/data/{resid}", False)
res = dict()
Expand Down Expand Up @@ -67,6 +78,12 @@ def home():
return render_template("home.html")


@dash_bp.route("/debug")
@debug_only
def debug():
return app.extensions["conf"].tostr()


@dash_bp.route("/my-metadata")
@check_role(role="GN_EDITOR")
def my_metadata():
Expand Down
24 changes: 24 additions & 0 deletions geordash/georchestraconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ def __init__(self):
except:
# safe default value
self.sections["urls"]["localgs"] = "geoserver"

with open(f"{self.datadirpath}/geonetwork/geonetwork.properties") as lines:
lines = chain(("[section]",), lines) # This line does the trick.
parser.read_file(lines)
self.sections["geonetwork"] = parser["section"]

# read current commit from .git/HEAD which might lead to the branch tip
prefix = getcwd() + "/.git/"
self.sections["gaia"] = {"commit": None}
Expand All @@ -67,6 +73,24 @@ def __init__(self):
# failed to read .git/HEAD or .git/refs/heads/* ?
pass

def tostr(self):
str = ""
for key in self.sections:
str += key + ":\r\n<br>"
for key2 in self.sections[key]:
str += " \t&emsp;" + key2 + " : "
if self.sections[key][key2] == self.get(key2, section=key):
str += " \t&emsp;" + self.sections[key][key2] + "\r\n<br> "
else:
str += (
" \t&emsp;"
+ self.sections[key][key2]
+ " = "
+ self.get(key2, section=key)
+ "\r\n<br> "
)
return str

def get(self, key, section="default"):
if section not in self.sections:
return None
Expand Down
Loading
Loading