Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ build*
dash.cfg
env_var_defs.py
.pytest_cache*
.testmondata
zeeguu_api/machine_specific.py
*.db-journal

Expand Down Expand Up @@ -53,5 +52,3 @@ diagrams/*.json
# Keep internal technical design documents private
experiments/
docs/particle_verb_detection.md
/.testmondata-wal
/.testmondata-shm
66 changes: 0 additions & 66 deletions docs/SMART_TESTS.md

This file was deleted.

1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ sentry-sdk[flask]

# Testing
pytest
pytest-testmon

# Version constraints
pydantic>=1.10.8
17 changes: 3 additions & 14 deletions run_tests.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,10 @@
#!/bin/sh
# Smart test runner using pytest-testmon
# Only runs tests affected by code changes
# Run all tests

export PYTHONWARNINGS='ignore'

# Check if this is the first run or if --full is passed
if [ "$1" = "--full" ] || [ "$1" = "-f" ]; then
echo "Running FULL test suite (refreshing testmon cache)..."
python -m pytest --testmon-off "$@"
elif [ "$1" = "--nocache" ]; then
echo "Running tests with fresh testmon cache..."
rm -f .testmondata
python -m pytest --testmon "${@:2}"
else
echo "Running SMART tests (only affected by changes)..."
python -m pytest --testmon "$@"
fi
echo "Running test suite..."
python -m pytest "$@"

ret_code=$?
export PYTHONWARNINGS='default'
Expand Down
7 changes: 0 additions & 7 deletions run_tests_full.sh

This file was deleted.

36 changes: 36 additions & 0 deletions test_durations.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
Test durations — 161 tests (was 189)
=====================================
Optimizations applied:
1. Session-scoped app (no per-test create_app)
2. JSON fixture for article/NLP data (no Stanza at setup)
3. Wordstats mock (no 400k WordInfo load)
4. Feed test consolidation (5→1), then deletion
5. Article test consolidation (4→1)
6. Deleted redundant slow tests (feed download, Article.find_or_create, test_create_article)
7. Fixed cache-before-tokenizer bug in article.get_tokenized_content()
8. Removed unused hardcoded feeds from FeedRule (was creating 4 feeds per BookmarkRule)
9. Bookmark test consolidation (18→3)

Full suite history:
ORIGINAL: ~160s (2:40) 189 tests — function-scoped app, live Stanza + wordstats
AFTER 1-3: ~110s (1:50) 184 tests — session-scoped app, JSON fixture, wordstats mock
AFTER 1-6: ~52s (0:52) 176 tests — + deletions and consolidations
AFTER 1-8: ~55s (0:55) 176 tests — + cache fix (test_starred_or_liked: 6.5s → 0.25s)
AFTER 1-9: ~46s (0:46) 161 tests — + bookmark consolidation (18→3)

Production bug fixed:
article.get_tokenized_content() loaded Stanza BEFORE checking cache.
Now checks cache first — on hit, returns immediately without loading tokenizer.
test_starred_or_liked: 6.50s → 0.25s (also saves time in production)

Current top 10 slowest (46s total, 161 tests):
15.18s test_bookmark_quality_and_study — creates 10+ bookmarks inline (consolidated)
6.83s test_contribute_own_translation — API bookmark contribution
4.38s session setup (one-time) — create_app loads spaCy models at import time
2.21s test_article_topics_and_basics — ArticleRule creates 2 fake articles
1.89s test_exercises — creates bookmarks + exercises (consolidated)
1.65s test_add_exercise_to_session — exercise scheduling logic
0.94s test_number_detection — tokenizer test
0.71s test_no_null_positions — bookmark positions
0.68s test_bookmark_counts_by_date — user bookmark stats
0.63s test_ml_classification — quality filter
83 changes: 83 additions & 0 deletions tools/regenerate_test_fixture.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/usr/bin/env python
"""
Regenerate the test fixture file for session-scoped test data.

Run this after schema changes:
source ~/.venvs/z_env/bin/activate && python tools/regenerate_test_fixture.py
"""

import json
import os
import sys

# Add project root to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import requests_mock as rm
from zeeguu.api.app import create_app
from zeeguu.core.model.db import db
from zeeguu.core.test.mocking_the_web import (
mock_requests_get,
URL_SPIEGEL_VENEZUELA,
)
from sqlalchemy import text

FIXTURE_PATH = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"zeeguu/core/test/test_data/session_fixture.json"
)

KEEP_TABLES = [
"language", "context_type", "source_type", "domain_name",
"url", "url_keyword", "source", "source_text",
"article", "article_fragment", "article_cefr_assessment",
"article_tokenization_cache", "article_url_keyword_map", "new_text",
]


def main():
app = create_app(testing=True)
with rm.Mocker() as m:
mock_requests_get(m)
with app.test_client() as client:
with app.app_context():
db.create_all()

from zeeguu.core.model.context_type import ContextType as CT
for t in CT.ALL_TYPES:
CT.find_or_create(db.session, t, commit=False)
from zeeguu.core.model.source_type import SourceType as ST
for t in ST.ALL_TYPES:
ST.find_or_create(db.session, t, commit=False)
db.session.commit()

resp = client.post(
"/add_user/dump@test.com",
data=dict(password="test", username="dump", learned_language="de"),
)
session_token = resp.data.decode("utf-8")
client.post(
f"/find_or_create_article?session={session_token}",
data=dict(url=URL_SPIEGEL_VENEZUELA),
)

dump = {}
for tname in KEEP_TABLES:
rows = db.session.execute(text(f'SELECT * FROM "{tname}"')).fetchall()
keys = list(db.session.execute(text(f'SELECT * FROM "{tname}"')).keys())
dump[tname] = {
"columns": keys,
"rows": [
[str(v) if v is not None else None for v in row]
for row in rows
],
}
print(f"{tname}: {len(rows)} rows")

with open(FIXTURE_PATH, "w") as f:
json.dump(dump, f)
print(f"Wrote {FIXTURE_PATH}")


if __name__ == "__main__":
main()
80 changes: 7 additions & 73 deletions zeeguu/api/endpoints/feature_toggles.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
from zeeguu.api.endpoints import api
from zeeguu.api.utils import cross_domain, requires_session
from zeeguu.core.model import User
from zeeguu.core.user_feature_toggles import (
features_for_user,
is_feature_enabled_for_user,
)


@api.route("/is_feature_enabled/<feature_name>", methods=["GET"])
Expand All @@ -15,82 +19,12 @@ def is_feature_enabled(feature_name):

will return YES or NO
"""

func = _feature_map().get(feature_name, None)

if not func:
return "NO"
user = User.find_by_id(flask.g.user_id)
if func(user):
if is_feature_enabled_for_user(feature_name, user):
return "YES"

return "NO"


def features_for_user(user):
features = []
for name, detector_function in _feature_map().items():
if detector_function(user):
features.append(name)
return features


"""
We have a code 'zeeguu-preview' which is used to invite
general users and should give access to the latest feature set
of Zeeguu. It can be used for usability tests and can be also
spread by word of mouth to new participants.
"""


def is_feature_enabled_for_user(feature_name, user):
return feature_name in features_for_user(user)


def _feature_map():
return {
"audio_exercises": _audio_exercises,
"extension_experiment_1": _extension_experiment_1,
"no_audio_exercises": _no_audio_exercises,
"tiago_exercises": _tiago_exercises,
"new_topics": _new_topics,
"daily_audio": _daily_audio,
}


def _new_topics(user):
return True


def _tiago_exercises(user):
right_user = user.invitation_code == "Tiago" or user.id == 534 or user.id == 4022
right_language = user.learned_language.code in ["da"]
return right_user and right_language


def _no_audio_exercises(user):
return user.is_member_of_cohort(447)


def _audio_exercises(user):
return user.is_member_of_cohort(444)


def _extension_experiment_1(user):
return (
(user.is_member_of_cohort(437))
or user.id in [3372, 3373, 2953, 3427, 2705]
or user.id > 3555
)


def _daily_audio(user):
return True
# return (
# user.id in [8, 4607, 4022]
# or user.id > 5228
# or user.is_member_of_cohort(532) # daily-audio
# or user.is_member_of_cohort(529) # pg25
# or user.is_member_of_cohort(530) # ada's cohort with invite code: summer25
# or user.invitation_code in ["zeeguu-preview", "ec25"]
# )
# Re-export for backward compatibility
__all__ = ["features_for_user", "is_feature_enabled_for_user"]
18 changes: 12 additions & 6 deletions zeeguu/api/endpoints/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,12 @@ def subscribe_to_email_search(search_terms):
user = User.find_by_id(flask.g.user_id)
search = Search.find(search_terms, user.learned_language_id)
receive_email = True
subscription = SearchSubscription.update_receive_email(
db_session, user, search, receive_email
)
try:
subscription = SearchSubscription.update_receive_email(
db_session, user, search, receive_email
)
except ValueError as e:
return make_error(401, str(e))

return json_result(subscription.as_dictionary())

Expand All @@ -320,8 +323,11 @@ def unsubscribe_from_email_search(search_terms):
search = Search.find(search_terms, user.learned_language_id)

receive_email = False
subscription = SearchSubscription.update_receive_email(
db_session, user, search, receive_email
)
try:
subscription = SearchSubscription.update_receive_email(
db_session, user, search, receive_email
)
except ValueError as e:
return make_error(401, str(e))

return json_result(subscription.as_dictionary())
Loading