@@ -299,15 +299,101 @@ def wait_for_flags_visible(timeout=30, interval=2):
299299def postgres_container ():
300300 """Create and start PostgreSQL container."""
301301 with PostgresContainer ("postgres:15" , driver = None ) as postgres :
302- postgres .start ()
303302 postgres_url = postgres .get_connection_url ()
304303 logger .info (f"PostgreSQL started at: { postgres_url } " )
305304
306305 yield postgres
307306
308307
308+ def _get_container_health_url (container ):
309+ """Get the health check URL for the container.
310+
311+ Raises if port is not exposed yet.
312+ """
313+ exposed_port = container .get_exposed_port (4242 )
314+ return f"http://localhost:{ exposed_port } "
315+
316+
317+ def _check_container_not_dead (container ):
318+ """Check if container is still running, raise if dead.
319+
320+ Raises RuntimeError with logs if container exited or is dead.
321+ """
322+ docker_container = container .get_wrapped_container ()
323+ if docker_container :
324+ docker_container .reload ()
325+ if docker_container .status in ("exited" , "dead" ):
326+ logs = docker_container .logs ().decode (errors = "replace" )
327+ raise RuntimeError (
328+ f"Unleash container died ({ docker_container .status } ).\n "
329+ f"Logs:\n { logs } "
330+ )
331+
332+
333+ def _get_container_logs (container ):
334+ """Get container logs for debugging."""
335+ docker_container = container .get_wrapped_container ()
336+ if docker_container :
337+ docker_container .reload ()
338+ return docker_container .logs ().decode (errors = "replace" )
339+ return ""
340+
341+
342+ def _log_timeout_and_logs (container ):
343+ """Log timeout error and container logs for debugging."""
344+ try :
345+ logs = _get_container_logs (container )
346+ logger .error ("Unleash container did not become healthy within timeout" )
347+ if logs :
348+ logger .error (f"Logs:\n { logs } " )
349+ except Exception :
350+ logger .exception ("Failed to retrieve container logs" )
351+
352+
353+ def _wait_for_healthy (container , max_wait_time = 120 ):
354+ """Poll the Unleash container until its /health endpoint returns 200.
355+
356+ Returns the base URL on success, raises on timeout or container death.
357+ """
358+ start_time = time .time ()
359+
360+ while time .time () - start_time < max_wait_time :
361+ try :
362+ try :
363+ unleash_url = _get_container_health_url (container )
364+ logger .info (f"Trying health check at: { unleash_url } " )
365+ except Exception as port_error :
366+ _check_container_not_dead (container )
367+ logger .error (f"Port not ready yet: { port_error } " )
368+ time .sleep (2 )
369+ continue
370+
371+ response = requests .get (f"{ unleash_url } /health" , timeout = 5 )
372+ if response .status_code == 200 :
373+ logger .info ("Unleash container is healthy!" )
374+ return unleash_url
375+
376+ logger .error (f"Health check failed, status: { response .status_code } " )
377+ time .sleep (2 )
378+
379+ except RuntimeError :
380+ raise
381+ except Exception as e :
382+ logger .error (f"Health check error: { e } " )
383+ time .sleep (2 )
384+
385+ _log_timeout_and_logs (container )
386+ raise RuntimeError ("Unleash container did not become healthy within timeout" )
387+
388+
389+ # Unleash's migration runner can hit a pg_class_relname_nsp_index race
390+ # condition that kills the process on first start. Retrying is safe because
391+ # the partially-created objects already exist on the second attempt.
392+ MAX_UNLEASH_ATTEMPTS = 3
393+
394+
309395@pytest .fixture (scope = "session" )
310- def unleash_container (postgres_container ): # noqa: PLR0915
396+ def unleash_container (postgres_container ):
311397 """Create and start Unleash container with PostgreSQL dependency."""
312398 global UNLEASH_URL
313399
@@ -322,75 +408,40 @@ def unleash_container(postgres_container): # noqa: PLR0915
322408 f":{ exposed_port } " , ":5432"
323409 )
324410
325- unleash = UnleashContainer ( internal_url )
411+ last_error = None
326412
327- with unleash as container :
328- logger .info ("Starting Unleash container..." )
329- container .start ()
330- logger .info ("Unleash container started" )
413+ for attempt in range (1 , MAX_UNLEASH_ATTEMPTS + 1 ):
414+ unleash = UnleashContainer (internal_url )
331415
332- # Wait for health check to pass
333- logger .info ("Waiting for Unleash container to be healthy ..." )
334- max_wait_time = 120 # 2 minutes; Unleash DB migrations can be slow in CI
335- start_time = time . time ( )
416+ with unleash as container :
417+ logger .info (f"Starting Unleash container (attempt { attempt } ) ..." )
418+ container . start ()
419+ logger . info ( "Unleash container started" )
336420
337- while time .time () - start_time < max_wait_time :
338421 try :
339- try :
340- exposed_port = container .get_exposed_port (4242 )
341- unleash_url = f"http://localhost:{ exposed_port } "
342- logger .info (f"Trying health check at: { unleash_url } " )
343- except Exception as port_error :
344- # if the container exited, fail fast with its logs
345- docker_container = container .get_wrapped_container ()
346- if docker_container :
347- docker_container .reload ()
348- if docker_container .status in ("exited" , "dead" ):
349- logs = docker_container .logs ().decode (errors = "replace" )
350- raise RuntimeError (
351- f"Unleash container died ({ docker_container .status } ).\n "
352- f"Logs:\n { logs } "
353- ) from port_error
354- logger .error (f"Port not ready yet: { port_error } " )
355- time .sleep (2 )
422+ unleash_url = _wait_for_healthy (container )
423+ except RuntimeError as exc :
424+ last_error = exc
425+ if "pg_class_relname_nsp_index" in str (exc ) or "died" in str (exc ):
426+ logger .warning (
427+ f"Unleash failed on attempt { attempt } "
428+ f"(likely migration race); retrying..."
429+ )
356430 continue
357-
358- response = requests .get (f"{ unleash_url } /health" , timeout = 5 )
359- if response .status_code == 200 :
360- logger .info ("Unleash container is healthy!" )
361- break
362-
363- logger .error (f"Health check failed, status: { response .status_code } " )
364- time .sleep (2 )
365-
366- except RuntimeError :
367431 raise
368- except Exception as e :
369- logger .error (f"Health check error: { e } " )
370- time .sleep (2 )
371- else :
372- # timeout; dump container logs for debugging
373- try :
374- docker_container = container .get_wrapped_container ()
375- if docker_container :
376- docker_container .reload ()
377- logs = docker_container .logs ().decode (errors = "replace" )
378- logger .error (
379- f"Unleash container status: { docker_container .status } \n "
380- f"Logs:\n { logs } "
381- )
382- except Exception :
383- logger .exception ("Failed to retrieve container logs" )
384- raise Exception ("Unleash container did not become healthy within timeout" )
385432
386- # Get the exposed port and set global URL
387- UNLEASH_URL = f"http://localhost:{ container .get_exposed_port (4242 )} "
388- logger .info (f"Unleash started at: { unleash_url } " )
433+ UNLEASH_URL = unleash_url
434+ logger .info (f"Unleash started at: { unleash_url } " )
435+
436+ insert_admin_token (postgres_container )
437+ logger .info ("Admin token inserted into database" )
389438
390- insert_admin_token ( postgres_container )
391- logger . info ( "Admin token inserted into database" )
439+ yield container , unleash_url
440+ return
392441
393- yield container , unleash_url
442+ raise RuntimeError (
443+ f"Unleash failed to start after { MAX_UNLEASH_ATTEMPTS } attempts"
444+ ) from last_error
394445
395446
396447@pytest .fixture (scope = "session" , autouse = True )
0 commit comments