@@ -299,15 +299,100 @@ def wait_for_flags_visible(timeout=30, interval=2):
299299def postgres_container ():
300300 """Create and start PostgreSQL container."""
301301 with PostgresContainer ("postgres:15" , driver = None ) as postgres :
302- postgres .start ()
303302 postgres_url = postgres .get_connection_url ()
304303 logger .info (f"PostgreSQL started at: { postgres_url } " )
305304
306305 yield postgres
307306
308307
308+ def _get_container_health_url (container ):
309+ """Get the health check URL for the container.
310+
311+ Raises if port is not exposed yet.
312+ """
313+ exposed_port = container .get_exposed_port (4242 )
314+ return f"http://localhost:{ exposed_port } "
315+
316+
317+ def _check_container_not_dead (container ):
318+ """Check if container is still running, raise if dead.
319+
320+ Raises RuntimeError with logs if container exited or is dead.
321+ """
322+ docker_container = container .get_wrapped_container ()
323+ if docker_container :
324+ docker_container .reload ()
325+ if docker_container .status in ("exited" , "dead" ):
326+ logs = docker_container .logs ().decode (errors = "replace" )
327+ raise RuntimeError (
328+ f"Unleash container died ({ docker_container .status } ).\n Logs:\n { logs } "
329+ )
330+
331+
332+ def _get_container_logs (container ):
333+ """Get container logs for debugging."""
334+ docker_container = container .get_wrapped_container ()
335+ if docker_container :
336+ docker_container .reload ()
337+ return docker_container .logs ().decode (errors = "replace" )
338+ return ""
339+
340+
341+ def _log_timeout_and_logs (container ):
342+ """Log timeout error and container logs for debugging."""
343+ try :
344+ logs = _get_container_logs (container )
345+ logger .error ("Unleash container did not become healthy within timeout" )
346+ if logs :
347+ logger .error (f"Logs:\n { logs } " )
348+ except Exception :
349+ logger .exception ("Failed to retrieve container logs" )
350+
351+
352+ def _wait_for_healthy (container , max_wait_time = 120 ):
353+ """Poll the Unleash container until its /health endpoint returns 200.
354+
355+ Returns the base URL on success, raises on timeout or container death.
356+ """
357+ start_time = time .time ()
358+
359+ while time .time () - start_time < max_wait_time :
360+ try :
361+ try :
362+ unleash_url = _get_container_health_url (container )
363+ logger .info (f"Trying health check at: { unleash_url } " )
364+ except Exception as port_error :
365+ _check_container_not_dead (container )
366+ logger .error (f"Port not ready yet: { port_error } " )
367+ time .sleep (2 )
368+ continue
369+
370+ response = requests .get (f"{ unleash_url } /health" , timeout = 5 )
371+ if response .status_code == 200 :
372+ logger .info ("Unleash container is healthy!" )
373+ return unleash_url
374+
375+ logger .error (f"Health check failed, status: { response .status_code } " )
376+ time .sleep (2 )
377+
378+ except RuntimeError :
379+ raise
380+ except Exception as e :
381+ logger .error (f"Health check error: { e } " )
382+ time .sleep (2 )
383+
384+ _log_timeout_and_logs (container )
385+ raise RuntimeError ("Unleash container did not become healthy within timeout" )
386+
387+
388+ # Unleash's migration runner can hit a pg_class_relname_nsp_index race
389+ # condition that kills the process on first start. Retrying is safe because
390+ # the partially-created objects already exist on the second attempt.
391+ MAX_UNLEASH_ATTEMPTS = 3
392+
393+
309394@pytest .fixture (scope = "session" )
310- def unleash_container (postgres_container ): # noqa: PLR0915
395+ def unleash_container (postgres_container ):
311396 """Create and start Unleash container with PostgreSQL dependency."""
312397 global UNLEASH_URL
313398
@@ -322,75 +407,40 @@ def unleash_container(postgres_container): # noqa: PLR0915
322407 f":{ exposed_port } " , ":5432"
323408 )
324409
325- unleash = UnleashContainer ( internal_url )
410+ last_error = None
326411
327- with unleash as container :
328- logger .info ("Starting Unleash container..." )
329- container .start ()
330- logger .info ("Unleash container started" )
412+ for attempt in range (1 , MAX_UNLEASH_ATTEMPTS + 1 ):
413+ unleash = UnleashContainer (internal_url )
331414
332- # Wait for health check to pass
333- logger .info ("Waiting for Unleash container to be healthy ..." )
334- max_wait_time = 120 # 2 minutes; Unleash DB migrations can be slow in CI
335- start_time = time . time ( )
415+ with unleash as container :
416+ logger .info (f"Starting Unleash container (attempt { attempt } ) ..." )
417+ container . start ()
418+ logger . info ( "Unleash container started" )
336419
337- while time .time () - start_time < max_wait_time :
338420 try :
339- try :
340- exposed_port = container .get_exposed_port (4242 )
341- unleash_url = f"http://localhost:{ exposed_port } "
342- logger .info (f"Trying health check at: { unleash_url } " )
343- except Exception as port_error :
344- # if the container exited, fail fast with its logs
345- docker_container = container .get_wrapped_container ()
346- if docker_container :
347- docker_container .reload ()
348- if docker_container .status in ("exited" , "dead" ):
349- logs = docker_container .logs ().decode (errors = "replace" )
350- raise RuntimeError (
351- f"Unleash container died ({ docker_container .status } ).\n "
352- f"Logs:\n { logs } "
353- ) from port_error
354- logger .error (f"Port not ready yet: { port_error } " )
355- time .sleep (2 )
421+ unleash_url = _wait_for_healthy (container )
422+ except RuntimeError as exc :
423+ last_error = exc
424+ if "pg_class_relname_nsp_index" in str (exc ) or "died" in str (exc ):
425+ logger .warning (
426+ f"Unleash failed on attempt { attempt } "
427+ f"(likely migration race); retrying..."
428+ )
356429 continue
357-
358- response = requests .get (f"{ unleash_url } /health" , timeout = 5 )
359- if response .status_code == 200 :
360- logger .info ("Unleash container is healthy!" )
361- break
362-
363- logger .error (f"Health check failed, status: { response .status_code } " )
364- time .sleep (2 )
365-
366- except RuntimeError :
367430 raise
368- except Exception as e :
369- logger .error (f"Health check error: { e } " )
370- time .sleep (2 )
371- else :
372- # timeout; dump container logs for debugging
373- try :
374- docker_container = container .get_wrapped_container ()
375- if docker_container :
376- docker_container .reload ()
377- logs = docker_container .logs ().decode (errors = "replace" )
378- logger .error (
379- f"Unleash container status: { docker_container .status } \n "
380- f"Logs:\n { logs } "
381- )
382- except Exception :
383- logger .exception ("Failed to retrieve container logs" )
384- raise Exception ("Unleash container did not become healthy within timeout" )
385431
386- # Get the exposed port and set global URL
387- UNLEASH_URL = f"http://localhost:{ container .get_exposed_port (4242 )} "
388- logger .info (f"Unleash started at: { unleash_url } " )
432+ UNLEASH_URL = unleash_url
433+ logger .info (f"Unleash started at: { unleash_url } " )
434+
435+ insert_admin_token (postgres_container )
436+ logger .info ("Admin token inserted into database" )
389437
390- insert_admin_token ( postgres_container )
391- logger . info ( "Admin token inserted into database" )
438+ yield container , unleash_url
439+ return
392440
393- yield container , unleash_url
441+ raise RuntimeError (
442+ f"Unleash failed to start after { MAX_UNLEASH_ATTEMPTS } attempts"
443+ ) from last_error
394444
395445
396446@pytest .fixture (scope = "session" , autouse = True )
0 commit comments