Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions opensoar/competition/soaringspot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
The files from SoaringSpot always contain task information, which can be used for competition analysis.
"""
import datetime
import logging
import re
from typing import List, Tuple, Union, Optional
from urllib.error import URLError
Expand All @@ -21,6 +22,8 @@
from opensoar.competition.daily_results_page import DailyResultsPage
from opensoar.utilities.helper_functions import double_iterator

logger = logging.getLogger(__name__)


def get_comment_lines_from_parsed_file(parsed_igc_file: dict) -> List[str]:
"""In the parsed file, lines are split into source and comment. This function stiches them back together"""
Expand Down Expand Up @@ -268,12 +271,18 @@ def _get_competitors_info(self, include_hc_competitors: bool, include_dns_compet
Returns:
List of dictionaries with competitor information:
- ranking: Position in the competition or status (DNF/DNS)
- competition_id: Glider ID
- competition_id: Glider ID (modified with suffix _2, _3, etc. if duplicate)
- igc_url: URL to download the IGC file (None for DNF/DNS)
- pilot_name: Name of the pilot
- plane_model: Type of glider

Note:
If duplicate competition IDs are found, they will be automatically resolved by
appending a suffix (_2, _3, etc.) to ensure unique identification. A warning
will be printed to inform the user.
"""
competitors_info = []
seen_competition_ids = {} # Track competition IDs and their count

table = self._get_html_soup().find("table")
if not table:
Expand Down Expand Up @@ -326,7 +335,7 @@ def _get_competitors_info(self, include_hc_competitors: bool, include_dns_compet
# Extract competition ID from CN column
cn_idx = min(col_indices['cn'], len(cells) - 1)
cn_cell = cells[cn_idx]
competition_id = cn_cell.text.strip()
original_competition_id = cn_cell.text.strip()

# Extract pilot name from pilot/contestant column
pilot_idx = min(col_indices['pilot'], len(cells) - 1)
Expand All @@ -344,6 +353,16 @@ def _get_competitors_info(self, include_hc_competitors: bool, include_dns_compet
if glider_idx < len(cells):
plane_model = cells[glider_idx].text.strip()

# Handle duplicate competition IDs
if original_competition_id in seen_competition_ids:
seen_competition_ids[original_competition_id] += 1
competition_id = f"{original_competition_id}_{seen_competition_ids[original_competition_id]}"
logger.warning(f"Duplicate competition ID '{original_competition_id}' detected for pilot '{pilot_name}'. "
f"Using '{competition_id}' instead.")
else:
seen_competition_ids[original_competition_id] = 1
competition_id = original_competition_id

# Handle HC competitors
if status == "HC":
if not include_hc_competitors:
Expand Down
44 changes: 44 additions & 0 deletions tests/competition/test_soaringspot.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,50 @@ def test_get_competitors_info_relative_downloads(self):
competitors = soaringspot_page._get_competitors_info(include_hc_competitors=False)
self.assertEqual(len(competitors), 10)

def test_get_competitors_with_duplicate_ids(self):
"""Test handling of duplicate competition IDs"""
soaringspot_page = SoaringSpotDaily(
'https://www.soaringspot.com/nl/keiheuvelcup2025/results/open-xpdr/task-1-on-2025-08-23/daily'
)

competitors = soaringspot_page._get_competitors_info(include_hc_competitors=True)

# Check that we got all competitors (34 total on this page)
self.assertEqual(len(competitors), 34)

# Extract all competition IDs
competition_ids = [comp['competition_id'] for comp in competitors]

# Check that all competition IDs are unique after processing
self.assertEqual(len(competition_ids), len(set(competition_ids)),
"Duplicate competition IDs detected after processing")

# Find the pilots who originally had 'FS' as competition ID
fs_pilots = [comp for comp in competitors if comp['competition_id'].startswith('FS')]

# Should have 2 pilots with FS-based IDs
self.assertEqual(len(fs_pilots), 2,
"Expected 2 pilots with FS competition ID")

# Verify the pilots are correct (Daan Spruyt and Flens Piet)
pilot_names = {comp['pilot_name'] for comp in fs_pilots}
self.assertIn('Daan Spruyt', pilot_names)
self.assertIn('Flens Piet', pilot_names)

# One should have 'FS' and the other should have 'FS_2'
fs_ids = {comp['competition_id'] for comp in fs_pilots}
self.assertIn('FS', fs_ids)
self.assertIn('FS_2', fs_ids)

# Verify rankings are preserved
for comp in fs_pilots:
if comp['pilot_name'] == 'Daan Spruyt':
self.assertEqual(comp['ranking'], 2)
self.assertEqual(comp['plane_model'], 'Ventus 2ax')
elif comp['pilot_name'] == 'Flens Piet':
self.assertEqual(comp['ranking'], 32)
self.assertEqual(comp['plane_model'], 'DG 808C Classic 18m')

# disabled because this URL is no longer used. unclear whether all dev urls are cleared
# def test_get_competitors_dev_url(self):
# soaringspot_page = SoaringSpotDaily(
Expand Down