Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion invenio_cern_sync/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@

from .ext import InvenioCERNSync

__version__ = "0.1.1"
__version__ = "0.1.2"

__all__ = ("__version__", "InvenioCERNSync")
4 changes: 3 additions & 1 deletion invenio_cern_sync/sso/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ def cern_info_serializer(remote, resp, token_user_info, user_info):
"email": email,
"profile": {
"affiliations": user_info.get("home_institute", ""),
"full_name": user_info["name"],
"full_name": user_info.get(
"name", token_user_info.get("name", "")
), # user_info might be missing
"username": username,
},
"prefs": {
Expand Down
2 changes: 1 addition & 1 deletion invenio_cern_sync/users/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def _create_user(cern_user):
)
db.session.add(user)
# necessary to get the auto-generated `id`
db.session.commit()
db.session.flush()
return user


Expand Down
38 changes: 30 additions & 8 deletions invenio_cern_sync/users/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,15 @@ def _log_identity_id_changed(
return ra_extra_data


def _update_existing(users, serializer_fn, log_uuid, log_name):
"""Update existing users and return a list of missing users to insert."""
def _update_existing(users, serializer_fn, log_uuid, log_name, persist_every=500):
"""Update existing users in batches and return a list of missing users to insert."""
missing = []
updated = set()
log_action = "updating-existing-users"
log_info(log_name, dict(action=log_action, status="started"), log_uuid=log_uuid)

processed_count = 0

for invenio_user in serializer_fn(users):
user = user_identity = None

Expand Down Expand Up @@ -162,9 +164,16 @@ def _update_existing(users, serializer_fn, log_uuid, log_name):
), f"User and UserIdentity are not correctly linked for user #{user.id} and user_identity #{user_identity.id}"

if update_existing_user(user, user_identity, invenio_user):
db.session.commit()
updated.add(user.id)

processed_count += 1
# Commit every `persist_every` iterations
if processed_count % persist_every == 0:
db.session.commit()

# Final commit for any remaining uncommitted changes
db.session.commit()

log_info(
log_name,
dict(action=log_action, status="completed", updated_count=len(updated)),
Expand All @@ -173,12 +182,14 @@ def _update_existing(users, serializer_fn, log_uuid, log_name):
return missing, updated


def _insert_missing(invenio_users, log_uuid, log_name):
"""Insert users."""
def _insert_missing(invenio_users, log_uuid, log_name, persist_every=500):
"""Insert users in batches."""
log_action = "inserting-missing-users"
log_info(log_name, dict(action=log_action, status="started"), log_uuid=log_uuid)

inserted = set()
processed_count = 0

for invenio_user in invenio_users:
try:
if invenio_user["username"].startswith("_"):
Expand All @@ -188,14 +199,25 @@ def _insert_missing(invenio_users, log_uuid, log_name):
f"Skipping user with username starting with `_`: {invenio_user}"
)
continue
_id = create_user(invenio_user)

with db.session.begin_nested():
_id = create_user(invenio_user)
inserted.add(_id)

processed_count += 1

# Commit every `persist_every` iterations
if processed_count % persist_every == 0:
db.session.commit()

except Exception as e:
current_app.logger.error(
f"Error creating user from CERN data: {e}. Skipping this user... User: {invenio_user}"
)
continue
db.session.commit()
inserted.add(_id)

# Final commit for any remaining uncommitted changes
db.session.commit()

log_info(
log_name,
Expand Down
Loading