From 9ec2af9a3e15d791b601089309ad594847265dc8 Mon Sep 17 00:00:00 2001 From: Antoine Falisse Date: Fri, 28 Feb 2025 13:32:44 +0100 Subject: [PATCH 1/7] reduce precision to get smaller files and improve loading speed --- main.py | 3 ++- utilsOpenSim.py | 12 +++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index 678fc630..74ec7f35 100644 --- a/main.py +++ b/main.py @@ -603,7 +603,8 @@ def main(sessionName, trialName, trial_id, cameras_to_use=['all'], trialName + '.json') generateVisualizerJson(pathModelIK, pathOutputIK, outputJsonVisPath, - vertical_offset=vertical_offset) + vertical_offset=vertical_offset, + roundToRotations=4, roundToTranslations=4) # %% Rewrite settings, adding offset if not extrinsicsTrial: diff --git a/utilsOpenSim.py b/utilsOpenSim.py index 338b0986..09ef03ed 100644 --- a/utilsOpenSim.py +++ b/utilsOpenSim.py @@ -596,7 +596,7 @@ def compareTRCAndForcesTime(pathTRC,pathForces): # %% This takes model and IK and generates a json of body transforms that can # be passed to the webapp visualizer def generateVisualizerJson(modelPath,ikPath,jsonOutputPath,statesInDegrees=True, - vertical_offset=None): + vertical_offset=None, roundToRotations=None, roundToTranslations=None): opensim.Logger.setLevelString('error') model = opensim.Model(modelPath) @@ -693,8 +693,14 @@ def generateVisualizerJson(modelPath,ikPath,jsonOutputPath,statesInDegrees=True, # geometry origin. Ayman said getting transform to Geometry::Mesh is safest # but we don't have access to it thru API and Ayman said what we're doing # is OK for now - visualizeDict['bodies'][body.getName()]['rotation'].append(body.getTransformInGround(state).R().convertRotationToBodyFixedXYZ().to_numpy().tolist()) - visualizeDict['bodies'][body.getName()]['translation'].append(body.getTransformInGround(state).T().to_numpy().tolist()) + c_rotations = body.getTransformInGround(state).R().convertRotationToBodyFixedXYZ().to_numpy() + c_translations = body.getTransformInGround(state).T().to_numpy() + if roundToRotations is not None: + c_rotations = np.round(c_rotations, roundToRotations) + if roundToTranslations is not None: + c_translations = np.round(c_translations, roundToTranslations) + visualizeDict['bodies'][body.getName()]['rotation'].append(c_rotations.tolist()) + visualizeDict['bodies'][body.getName()]['translation'].append(c_translations.tolist()) with open(jsonOutputPath, 'w') as f: json.dump(visualizeDict, f) From a6736466fe8bd9676194cb52885d47e4c60c78d1 Mon Sep 17 00:00:00 2001 From: carmichaelong Date: Mon, 3 Mar 2025 15:09:51 -0800 Subject: [PATCH 2/7] make the error log human readable --- utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils.py b/utils.py index 1c2445f5..93b97759 100644 --- a/utils.py +++ b/utils.py @@ -1586,7 +1586,7 @@ def checkCudaTF(): sendStatusEmail(message=message) raise Exception("No GPU detected. Exiting.") -def writeToJsonLog(path, new_dict, max_entries=1000): +def writeToJsonLog(path, new_dict, max_entries=1000, indent=2): dir_name = os.path.dirname(path) if not os.path.exists(dir_name): os.makedirs(dir_name) @@ -1603,7 +1603,7 @@ def writeToJsonLog(path, new_dict, max_entries=1000): data.pop(0) with open(path, 'w') as f: - json.dump(data, f) + json.dump(data, f, indent=indent) def writeToErrorLog(path, session_id, trial_id, error, stack, max_entries=1000): error_entry = { From e50b923982056d3d3e3266003506186144a9f1cf Mon Sep 17 00:00:00 2001 From: carmichaelong Date: Mon, 3 Mar 2025 16:13:22 -0800 Subject: [PATCH 3/7] attempt to restart openpose and mmpose docker containers 3 times on failure --- docker/docker-compose.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 55acdbdc..7437a70a 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -38,6 +38,7 @@ services: options: max-size: "100m" # Rotate when the log reaches 10MB max-file: "7" # Keep the last 7 log files + restart: on-failure:3 mmpose: image: ${MMPOSE_IMAGE_NAME} volumes: @@ -55,6 +56,7 @@ services: options: max-size: "100m" # Rotate when the log reaches 10MB max-file: "7" # Keep the last 7 log files + restart: on-failure:3 volumes: data: {} From 5f47cbaac9297f97024e7b508b0e0f3fc8656659 Mon Sep 17 00:00:00 2001 From: carmichaelong Date: Mon, 3 Mar 2025 16:27:01 -0800 Subject: [PATCH 4/7] add commit has to pulling trials message --- app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app.py b/app.py index a1ce13a9..817369c0 100644 --- a/app.py +++ b/app.py @@ -79,7 +79,8 @@ continue if r.status_code == 404: - logging.info("...pulling " + workerType + " trials from " + API_URL) + logging.info(f"...pulling {workerType} trials from {API_URL} " + f"using commit {getCommitHash()}") time.sleep(1) # When using autoscaling, we will remove the instance scale-in protection if it hasn't From 218ce697bfec5206ac4297b4e5d5904b0f532f6c Mon Sep 17 00:00:00 2001 From: carmichaelong Date: Mon, 3 Mar 2025 16:39:55 -0800 Subject: [PATCH 5/7] also retry test trial on URLError --- utilsServer.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/utilsServer.py b/utilsServer.py index 116c5390..8adf03bf 100644 --- a/utilsServer.py +++ b/utilsServer.py @@ -6,6 +6,7 @@ import logging import time import random +import urllib from main import main from utils import getDataDirectory @@ -500,28 +501,28 @@ def runTestSession(pose='all',isDocker=True,maxNumTries=3): logging.info("\n\n\nStatus check succeeded. \n\n") return - # Catch and re-enter while loop if it's an HTTPError (could be more - # than just 404 errors). Wait between 30 and 60 seconds before - # retrying. - except requests.exceptions.HTTPError as e: + # Catch and re-enter while loop if it's an HTTPError or URLError + # (could be more than just 404 errors). Wait between 30 and 60 seconds + # before retrying. + except (requests.exceptions.HTTPError, urllib.error.URLError) as e: if numTries < maxNumTries: - logging.info(f"test trial failed on try #{numTries} due to HTTPError. Retrying.") + logging.info(f"test trial failed on try #{numTries} due to HTTPError or URLError. Retrying.") wait_time = random.randint(30,60) logging.info(f"waiting {wait_time} seconds then retrying...") time.sleep(wait_time) continue else: - logging.info(f"test trial failed on try #{numTries} due to HTTPError.") + logging.info(f"test trial failed on try #{numTries} due to HTTPError or URLError.") # send email - message = "A backend OpenCap machine failed the status check (HTTPError). It has been stopped." + message = "A backend OpenCap machine failed the status check (HTTPError or URLError). It has been stopped." sendStatusEmail(message=message) - raise Exception('Failed status check (HTTPError). Stopped.') + raise Exception('Failed status check (HTTPError or URLError). Stopped.') # Catch other errors and stop except: logging.info("test trial failed. stopping machine.") # send email - message = "A backend OpenCap machine failed the status check. It has been stopped." + message = "A backend OpenCap machine failed the status check (not HTTPError or URLError). It has been stopped." sendStatusEmail(message=message) raise Exception('Failed status check. Stopped.') \ No newline at end of file From 13d4f0c3b4c849686d0a8b7c74bc2572117fd209 Mon Sep 17 00:00:00 2001 From: carmichaelong Date: Mon, 3 Mar 2025 17:01:59 -0800 Subject: [PATCH 6/7] add try/catch to openpose loop --- mmpose/loop_mmpose.py | 4 ++-- openpose/loop_openpose.py | 25 +++++++++++++++---------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/mmpose/loop_mmpose.py b/mmpose/loop_mmpose.py index 6f18a880..e648d937 100644 --- a/mmpose/loop_mmpose.py +++ b/mmpose/loop_mmpose.py @@ -69,8 +69,8 @@ def checkCudaPyTorch(): if os.path.isfile(bboxPath): os.remove(bboxPath) - logging.info("Done. Cleaning up") + logging.info("mmpose: Done. Cleaning up") except: - logging.info("Pose detection failed.") + logging.info("mmpose: Pose detection failed.") os.remove(video_path) diff --git a/openpose/loop_openpose.py b/openpose/loop_openpose.py index 82850aca..b3b319c0 100644 --- a/openpose/loop_openpose.py +++ b/openpose/loop_openpose.py @@ -92,7 +92,7 @@ def getResolutionCommand(resolutionPoseDetection, horizontal): time.sleep(0.1) continue - logging.info("Processing...") + logging.info("Processing openpose...") if os.path.isdir(output_dir): shutil.rmtree(output_dir) @@ -101,13 +101,18 @@ def getResolutionCommand(resolutionPoseDetection, horizontal): horizontal = getVideoOrientation(video_path) cmd_hr = getResolutionCommand(resolutionPoseDetection, horizontal) - check_cuda_device() - command = "/openpose/build/examples/openpose/openpose.bin\ - --video {video_path}\ - --display 0\ - --write_json {output_dir}\ - --render_pose 0{cmd_hr}".format(video_path=video_path, output_dir=output_dir, cmd_hr=cmd_hr) - os.system(command) + try: + check_cuda_device() + command = "/openpose/build/examples/openpose/openpose.bin\ + --video {video_path}\ + --display 0\ + --write_json {output_dir}\ + --render_pose 0{cmd_hr}".format(video_path=video_path, output_dir=output_dir, cmd_hr=cmd_hr) + os.system(command) - logging.info("Done. Cleaning up") - os.remove(video_path) + logging.info("openpose: Done. Cleaning up") + os.remove(video_path) + + except: + logging.info("openpose: Pose detection failed.") + os.remove(video_path) From e8bbe97c76327dbb1e118a4117dff0ddb9a55e02 Mon Sep 17 00:00:00 2001 From: carmichaelong Date: Wed, 26 Mar 2025 17:00:54 -0700 Subject: [PATCH 7/7] include datetime in logging. add try/catch to request --- app.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/app.py b/app.py index 817369c0..ea4963a1 100644 --- a/app.py +++ b/app.py @@ -17,7 +17,10 @@ postProcessedDuration, makeRequestWithRetry, writeToErrorLog) -logging.basicConfig(level=logging.INFO) +logging.basicConfig(format="[%(asctime)s] [%(levelname)s] %(message)s", + level=logging.INFO, + datefmt='%Y-%m-%d %H:%M:%S', + force=True) API_TOKEN = getToken() API_URL = getAPIURL() @@ -125,10 +128,20 @@ error_msg['error_msg'] = 'No videos uploaded. Ensure phones are connected and you have stable internet connection.' error_msg['error_msg_dev'] = 'No videos uploaded.' - r = makeRequestWithRetry('PATCH', - trial_url, - data={"status": "error", "meta": json.dumps(error_msg)}, - headers = {"Authorization": "Token {}".format(API_TOKEN)}) + try: + r = makeRequestWithRetry('PATCH', + trial_url, + data={"status": "error", "meta": json.dumps(error_msg)}, + headers = {"Authorization": "Token {}".format(API_TOKEN)}) + + except Exception as e: + traceback.print_exc() + + if ERROR_LOG: + stack = traceback.format_exc() + writeToErrorLog(error_log_path, trial["session"], trial["id"], + e, stack) + continue # The following is now done in main, to allow reprocessing trials with missing videos