Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions opensoar/competition/daily_results_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self, url):
self.url = 'http://{}'.format(url)

self._igc_directory = None # to be set in subclass
self._html_soup = None # to be set when the page is evaluated

@property
def igc_directory(self):
Expand All @@ -36,20 +37,21 @@ def set_igc_directory(self, target_directory, competition_name, plane_class, dat
def _get_html_soup(self) -> BeautifulSoup:
# fix problem with SSL certificates
# https://stackoverflow.com/questions/30551400/disable-ssl-certificate-validation-in-mechanize#35960702
import ssl
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
# Legacy Python that doesn't verify HTTPS certificates by default
pass
else:
# Handle target environment that doesn't support HTTPS verification
ssl._create_default_https_context = _create_unverified_https_context

# get entire html of page
html = urlopen(self.url).read()
if not self._html_soup:
import ssl
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
# Legacy Python that doesn't verify HTTPS certificates by default
pass
else:
# Handle target environment that doesn't support HTTPS verification
ssl._create_default_https_context = _create_unverified_https_context

return BeautifulSoup(html, "html.parser")
# get entire html of page
html = urlopen(self.url).read()
self._html_soup = BeautifulSoup(html, "html.parser")
return self._html_soup

def igc_file_name(self, competition_id: str) -> str:
"""
Expand Down Expand Up @@ -122,4 +124,4 @@ def _select_task(tasks: List[Task]) -> Task:
number_of_times_present.append(1)

max_index, max_value = max(enumerate(number_of_times_present), key=operator.itemgetter(1))
return tasks[max_index]
return tasks[max_index]
160 changes: 128 additions & 32 deletions opensoar/competition/soaringspot.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,46 +257,142 @@ class SoaringSpotDaily(DailyResultsPage):
def __init__(self, url: str):
super().__init__(url)

def _get_competitors_info(self, include_hc_competitors: bool) -> List[dict]:
def _get_competitors_info(self, include_hc_competitors: bool, include_dns_competitors: bool = False) -> List[dict]:
"""
:param include_hc_competitors: include pilots which do not officially compete
:return:
Extract competitor information from a SoaringSpot daily results page.

Args:
include_hc_competitors: Whether to include pilots competing hors-concours
include_dns_competitors: Whether to include pilots who did not start or finish

Returns:
List of dictionaries with competitor information:
- ranking: Position in the competition or status (DNF/DNS)
- competition_id: Glider ID
- igc_url: URL to download the IGC file (None for DNF/DNS)
- pilot_name: Name of the pilot
- plane_model: Type of glider
"""

competitors_info = list()
competitors_info = []

table = self._get_html_soup().find("table")
for row in table.findAll('tr')[1:]:
if row.findAll('td')[0].text not in ["DNS", "DNF"]:

ranking = row.findAll('td')[0].text
if ranking == 'HC':
if not include_hc_competitors:
continue
else:
ranking = int(ranking[:-1])

igc_url = None
competition_id = None
for link in row.findAll('a'):
data_content = link.get('data-content')
soup = BeautifulSoup(data_content, 'html.parser')
href = None
for a in soup.findAll('a'):
if 'download' in a.text.strip().lower():
href = a.get('href')

if href.startswith("http://") or href.startswith("https://"): # absolute URL
igc_url = href
else: # relative url
igc_url = urljoin(self.url, href)
if not table:
raise ValueError(f"Could not find results table in the page at {self.url}")

# Default column indices (fallback values)
col_indices = {
'ranking': 0, # First column typically has the ranking
'cn': 2, # Third column typically has competition ID
'pilot': 3, # Fourth column typically has pilot name
'glider': 4 # Fifth column typically has glider model
}

# Try to determine accurate column indices from headers
headers = table.find('thead').findAll('th') if table.find('thead') else []

if headers:
for i, header in enumerate(headers):
header_text = header.text.strip().lower()

# Check for ranking column (could be # or empty for the first column)
if header_text == '#' or (i == 0 and header_text == ''):
col_indices['ranking'] = i

# Check for CN column (competition number)
elif header_text == 'cn' or header_text.startswith('comp'):
col_indices['cn'] = i

# Check for pilot/contestant column
elif 'contestant' in header_text or 'pilot' in header_text:
col_indices['pilot'] = i

# Check for glider column
elif 'glider' in header_text or 'plane' in header_text:
col_indices['glider'] = i

for row in table.findAll('tr')[1:]: # Skip header row
cells = row.findAll('td')
if not cells or len(cells) <= col_indices['cn']: # Need at least CN column
continue

competition_id = link.text.strip()
# Extract status/ranking from ranking column
ranking_idx = min(col_indices['ranking'], len(cells) - 1)
status = cells[ranking_idx].text.strip()

# Skip DNF/DNS if not requested
if (status == "DNF" or status == "DNS") and not include_dns_competitors:
continue

competitors_info.append(dict(ranking=ranking, competition_id=competition_id, igc_url=igc_url))
# Extract competition ID from CN column
cn_idx = min(col_indices['cn'], len(cells) - 1)
cn_cell = cells[cn_idx]
competition_id = cn_cell.text.strip()

# Extract pilot name from pilot/contestant column
pilot_idx = min(col_indices['pilot'], len(cells) - 1)
contestant_cell = cells[pilot_idx]
pilot_name = contestant_cell.text.strip()

# Try to find a div with flag inside contestant cell and remove it from pilot name
flag_div = contestant_cell.find('div', class_='flag')
if flag_div:
pilot_name = pilot_name.replace(flag_div.text, '').strip()

# Extract plane model from glider column
plane_model = None
glider_idx = col_indices['glider']
if glider_idx < len(cells):
plane_model = cells[glider_idx].text.strip()

# Handle HC competitors
if status == "HC":
if not include_hc_competitors:
continue
ranking = status
# Handle DNF/DNS
elif status == "DNF" or status == "DNS":
ranking = status
competitors_info.append({
"ranking": ranking,
"competition_id": competition_id,
"igc_url": None,
"pilot_name": pilot_name,
"plane_model": plane_model,
})
continue
else:
# Normal competitors - extract numeric ranking
try:
ranking = int(status.rstrip("."))
except ValueError:
ranking = status

# Extract IGC URL
igc_url = None
for link in cn_cell.findAll('a'):
data_content = link.get('data-content')
if data_content:
popup_soup = BeautifulSoup(data_content, 'html.parser')
for a in popup_soup.findAll('a'):
if 'download' in a.text.lower() or '.igc' in a.text.lower():
href = a.get('href')
if href:
if href.startswith("http://") or href.startswith("https://"):
igc_url = href
else:
igc_url = urljoin(self.url, href)
break

competitors_info.append({
"ranking": ranking,
"competition_id": competition_id,
"igc_url": igc_url,
"pilot_name": pilot_name,
"plane_model": plane_model,
})

return competitors_info

def _get_competition_day_info(self) -> Tuple[str, datetime.date, str]:

if self.url.startswith('https://') or self.url.startswith('http://'):
Expand Down
22 changes: 22 additions & 0 deletions tests/competition/test_soaringspot.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,17 +130,39 @@ def test_get_waypoints(self):
self.assertEqual(w.is_line, is_line)

def test_get_competitors(self):
# old format
soaringspot_page = SoaringSpotDaily(
'https://www.soaringspot.com/en/sallandse-tweedaagse-2014/results/club/task-1-on-2014-06-21/daily')

competitors = soaringspot_page._get_competitors_info(include_hc_competitors=False, include_dns_competitors=True)
self.assertEqual(len(competitors), 10)
competitors = soaringspot_page._get_competitors_info(include_hc_competitors=False, include_dns_competitors=False)
self.assertEqual(len(competitors), 8)
competitor_pk = soaringspot_page._get_competitors_info(include_hc_competitors=False)[2]

self.assertEqual(competitor_pk['competition_id'], 'PK')
self.assertEqual(competitor_pk['ranking'], 3)
self.assertEqual(competitor_pk['pilot_name'], 'Erik Berendes')
self.assertEqual(competitor_pk['plane_model'], 'Pik20D')

expected_igc_url = 'https://archive.soaringspot.com/contest/013/1323/flights/2477/2597322754.igc'
self.assertEqual(competitor_pk['igc_url'], expected_igc_url)

# new format
soaringspot_page = SoaringSpotDaily(
'https://www.soaringspot.com/en_gb/pribina-cup-2025-nitra-2025/results/club/task-6-on-2025-04-24/daily')

competitors = soaringspot_page._get_competitors_info(include_hc_competitors=False, include_dns_competitors=True)
self.assertEqual(len(competitors), 15)
competitors = soaringspot_page._get_competitors_info(include_hc_competitors=False, include_dns_competitors=False)
self.assertEqual(len(competitors), 13)
competitor_pk = soaringspot_page._get_competitors_info(include_hc_competitors=False)[1]

self.assertEqual(competitor_pk['competition_id'], 'X11')
self.assertEqual(competitor_pk['ranking'], 2)
self.assertEqual(competitor_pk['pilot_name'], 'Kengo Matsumoto')
self.assertEqual(competitor_pk['plane_model'], 'ASW-20')

def test_get_competitors_info_relative_downloads(self):
"""relative IGC URLs"""
soaringspot_page = SoaringSpotDaily(
Expand Down