brentyi · brentyi · May 6, 2025 · May 6, 2025 · May 6, 2025 · May 6, 2025
diff --git a/0a_preprocess_training_data.py b/0a_preprocess_training_data.py
diff --git a/0_preprocess_training_data.py → 0b_preprocess_training_data.py b/0_preprocess_training_data.py → 0b_preprocess_training_data.py
@@ -2,12 +2,6 @@
 
 Due to AMASS licensing, we unfortunately can't re-distribute our preprocessed dataset. If you have questions
 or run into issues, please reach out.
-
-To generate the npz format yourself, you can use our updated version of the `process_amass_data.py` script:
-- https://gist.github.com/brentyi/a52bfaee37734d30d60e3ab1a092e645
-
-Instructions for running this script can be found in the HuMoR repository:
-- https://github.com/davrempe/humor/tree/main/data
 """
 
 import queue

diff --git a/README.md b/README.md
@@ -76,7 +76,7 @@ EgoAllo requires Python 3.12 or newer.
 
 4. **Download the SMPL-H model file.**
 
-   You can find the "Extended SMPL+H model" from the [MANO project webpage](https://mano.is.tue.mpg.de/).
+   You can find the "Extended SMPL+H model" (16 shape parameters) from the [MANO project webpage](https://mano.is.tue.mpg.de/).
    Our scripts assumes an npz file located at `./data/smplh/neutral/model.npz`, but this can be overridden at the command-line (`--smplh-npz-path {your path}`).
 
 5. **Visualize model outputs.**
@@ -133,6 +133,41 @@ EgoAllo requires Python 3.12 or newer.
    python 2_run_hamer_on_vrs.py --traj-root ./egoallo_example_trajectories/coffeemachine
    ```
 
+## Preprocessing Training Data
+
+To train the motion prior model, we use data from the [AMASS dataset](https://amass.is.tue.mpg.de/). Due to licensing constraints, we cannot redistribute the preprocessed data. Instead, we provide two sequential preprocessing scripts:
+
+1. **Download the AMASS dataset.**
+
+   Download the AMASS dataset from the [official website](https://amass.is.tue.mpg.de/). We use the following splits:
+
+   - **Training**: ACCAD, BioMotionLab_NTroje, BMLhandball, BMLmovi, CMU, DanceDB, DFaust_67EKUT, Eyes_Japan_Dataset, KIT, MPI_Limits, TCD_handMocap, TotalCapture
+   - **Validation**: HumanEva, MPI_HDM05, SFU, MPI_mosh
+   - **Testing**: Transitions_mocap, SSM_synced
+
+2. **Run the first preprocessing script.**
+
+   ```bash
+   python 0a_preprocess_training_data.py --help
+   python 0a_preprocess_training_data.py --data-root /path/to/amass --smplh-root ./data/smplh
+   ```
+
+   This script, adapted from HuMoR, processes raw AMASS data by:
+
+   - Converting to gender-neutral SMPL-H parameters
+   - Computing contact labels for feet, hands, and knees
+   - Filtering out problematic sequences (treadmill walking, sequences with foot skating)
+   - Downsampling to 30fps
+
+3. **Run the second preprocessing script.**
+
+   ```bash
+   python 0b_preprocess_training_data.py --help
+   python 0b_preprocess_training_data.py --data-npz-dir ./data/processed_30fps_no_skating/
+   ```
+
+   This converts the processed NPZ files to a unified HDF5 format for more efficient training, with optimized chunk sizes for reading sequences.
+
 ## Status
 
 This repository currently contains:

diff --git a/data/smplh_gender_conversion/female_to_male.npz b/data/smplh_gender_conversion/female_to_male.npz
diff --git a/data/smplh_gender_conversion/female_to_neutral.npz b/data/smplh_gender_conversion/female_to_neutral.npz
diff --git a/data/smplh_gender_conversion/male_to_female.npz b/data/smplh_gender_conversion/male_to_female.npz
diff --git a/data/smplh_gender_conversion/male_to_neutral.npz b/data/smplh_gender_conversion/male_to_neutral.npz
diff --git a/data/smplh_gender_conversion/neutral_to_female.npz b/data/smplh_gender_conversion/neutral_to_female.npz
diff --git a/data/smplh_gender_conversion/neutral_to_male.npz b/data/smplh_gender_conversion/neutral_to_male.npz
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,11 +30,16 @@ dependencies = [
     "projectaria-tools[all]",
     "opencv-python",
     "gdown",
+    "scikit-learn", # Only needed for preprocessing
+    "smplx", # Only needed for preprocessing
 ]
 
 [tool.setuptools.package-data]
 egoallo = ["py.typed"]
 
+[tool.pyright]
+ignore = ["**/preprocessing/**", "./0a_preprocess_training_data.py"]
+
 [tool.ruff.lint]
 select = [
     "E",  # pycodestyle errors.
@@ -50,7 +55,6 @@ ignore = [
     "E501",  # Line too long.
     "E721",  # Do not compare types, use `isinstance()`.
     "F722",  # Forward annotation false positive from jaxtyping. Should be caught by pyright.
-    "F731",  # Do not assign a lambda expression, use a def.
     "F821",  # Forward annotation false positive from jaxtyping. Should be caught by pyright.
     "PLR2004",  # Magic value used in comparison.
     "PLR0915",  # Too many statements.

diff --git a/src/egoallo/guidance_optimizer_jax.py b/src/egoallo/guidance_optimizer_jax.py
@@ -353,7 +353,7 @@ def _optimize(
     del contacts
 
     # We'll populate a list of factors (cost terms).
-    factors = list[jaxls.Factor]()
+    factors = list[jaxls.Cost]()
 
     def cost_with_args[*CostArgs](
         *args: Unpack[tuple[*CostArgs]],
@@ -366,7 +366,7 @@ def cost_with_args[*CostArgs](
         def inner(
             cost_func: Callable[[jaxls.VarValues, *CostArgs], jax.Array],
         ) -> Callable[[jaxls.VarValues, *CostArgs], jax.Array]:
-            factors.append(jaxls.Factor.make(cost_func, args))
+            factors.append(jaxls.Cost(cost_func, args))
             return cost_func
 
         return inner
@@ -855,9 +855,9 @@ def skating_cost(
 
     vars_body_pose = _SmplhBodyPosesVar(jnp.arange(timesteps))
     vars_hand_pose = _SmplhSingleHandPosesVar(jnp.arange(timesteps * 2))
-    graph = jaxls.FactorGraph.make(
-        factors=factors, variables=[vars_body_pose, vars_hand_pose], use_onp=False
-    )
+    graph = jaxls.LeastSquaresProblem(
+        costs=factors, variables=[vars_body_pose, vars_hand_pose]
+    ).analyze()
     solutions = graph.solve(
         initial_vals=jaxls.VarValues.make(
             [

diff --git a/src/egoallo/preprocessing/__init__.py b/src/egoallo/preprocessing/__init__.py
diff --git a/src/egoallo/preprocessing/body_model/__init__.py b/src/egoallo/preprocessing/body_model/__init__.py
@@ -0,0 +1,4 @@
+from .body_model import BodyModel
+from .skeleton import *
+from .specs import *
+from .utils import *