diff --git a/.gitignore b/.gitignore index d1e521b..6977b1c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ *.egg-info data alignnet_model.pth - +wandb/ diff --git a/README.md b/README.md index 37f1d29..2452a27 100644 --- a/README.md +++ b/README.md @@ -28,4 +28,27 @@ pip install -e . # Run tests python -m pytest +``` + +### HuggingFace Hub Integration + +```bash +# Record and push to HuggingFace Hub +python -m alignit.record --dataset.hf_username=username --dataset.hf_dataset_name=dataset-name + +# Train using HuggingFace dataset +python -m alignit.train --dataset.hf_dataset_name=username/dataset-name --model.path=./data/test_model.pth + +# Visualize HuggingFace dataset +python -m alignit.visualize --dataset.hf_dataset_name=username/dataset-name +``` + +### Weights & Biases Integration + +```bash +# Train with wandb experiment tracking +python -m alignit.train --wandb_project=my-robot-project --wandb_run_name=experiment-1 + +# Combine HuggingFace Hub and wandb +python -m alignit.train --dataset.hf_dataset_name=username/dataset-name --wandb_project=my-robot-project ``` \ No newline at end of file diff --git a/alignit/config.py b/alignit/config.py index df3b71c..343bee7 100644 --- a/alignit/config.py +++ b/alignit/config.py @@ -8,158 +8,92 @@ @dataclass class DatasetConfig: """Configuration for dataset paths and loading.""" - - path: str = field( - default="./data/duck", metadata={"help": "Path to the dataset directory"} - ) + path: str = field(default="./data/duck", metadata={"help": "Path to the dataset directory"}) + hf_username: Optional[str] = field(default=None, metadata={"help": "Hugging Face username for dataset sync"}) + hf_dataset_name: Optional[str] = field(default=None, metadata={"help": "Hugging Face dataset name for sync"}) + hf_token: Optional[str] = field(default=None, metadata={"help": "Hugging Face token for authentication"}) + prefer_local: bool = field(default=True, metadata={"help": "Prefer local dataset over Hugging Face Hub"}) + push_to_hub: bool = field(default=False, metadata={"help": "Automatically push datasets to Hugging Face Hub after saving"}) @dataclass class ModelConfig: """Configuration for AlignNet model.""" - - backbone: str = field( - default="efficientnet_b0", - metadata={"help": "Backbone architecture: 'efficientnet_b0' or 'resnet18'"}, - ) - backbone_weights: str = field( - default="DEFAULT", metadata={"help": "Backbone weights: 'DEFAULT' or None"} - ) - use_vector_input: bool = field( - default=False, metadata={"help": "Whether to use vector input"} - ) - fc_layers: List[int] = field( - default_factory=lambda: [256, 128], - metadata={"help": "Hidden layer sizes for FC head"}, - ) - vector_hidden_dim: int = field( - default=64, metadata={"help": "Output dimension of vector MLP"} - ) - output_dim: int = field( - default=9, - metadata={"help": "Final output dimension (3 translation + 6 rotation)"}, - ) - feature_agg: str = field( - default="mean", metadata={"help": "Feature aggregation method: 'mean' or 'max'"} - ) - path: str = field( - default="alignnet_model.pth", - metadata={"help": "Path to save/load trained model"}, - ) + backbone: str = field(default="efficientnet_b0", metadata={"help": "Backbone architecture: 'efficientnet_b0' or 'resnet18'"}) + backbone_weights: str = field(default="DEFAULT", metadata={"help": "Backbone weights: 'DEFAULT' or None"}) + use_vector_input: bool = field(default=False, metadata={"help": "Whether to use vector input"}) + fc_layers: List[int] = field(default_factory=lambda: [256, 128], metadata={"help": "Hidden layer sizes for FC head"}) + vector_hidden_dim: int = field(default=64, metadata={"help": "Output dimension of vector MLP"}) + output_dim: int = field(default=9, metadata={"help": "Final output dimension (3 translation + 6 rotation)"}) + feature_agg: str = field(default="mean", metadata={"help": "Feature aggregation method: 'mean' or 'max'"}) + path: str = field(default="alignnet_model.pth", metadata={"help": "Path to save/load trained model"}) @dataclass class TrajectoryConfig: """Configuration for spiral trajectory generation.""" - - z_step: float = field( - default=0.002, metadata={"help": "Z step size for spiral trajectory"} - ) - radius_step: float = field( - default=0.001, metadata={"help": "Radius step size for spiral trajectory"} - ) - num_steps: int = field( - default=50, metadata={"help": "Number of steps in spiral trajectory"} - ) + z_step: float = field(default=0.0007, metadata={"help": "Z step size for spiral trajectory"}) + radius_step: float = field(default=0.001, metadata={"help": "Radius step size for spiral trajectory"}) + num_steps: int = field(default=100, metadata={"help": "Number of steps in spiral trajectory"}) cone_angle: float = field(default=30.0, metadata={"help": "Cone angle in degrees"}) - visible_sweep: float = field( - default=60.0, metadata={"help": "Visible sweep angle in degrees"} - ) - viewing_angle_offset: float = field( - default=-120.0, metadata={"help": "Viewing angle offset in degrees"} - ) - angular_resolution: float = field( - default=10.0, metadata={"help": "Angular resolution in degrees"} - ) - include_cone_poses: bool = field( - default=False, metadata={"help": "Include cone poses in trajectory"} - ) - lift_height_before_spiral: float = field( - default=0.01, metadata={"help": "Lift height before spiral in meters"} - ) + visible_sweep: float = field(default=60.0, metadata={"help": "Visible sweep angle in degrees"}) + viewing_angle_offset: float = field(default=-120.0, metadata={"help": "Viewing angle offset in degrees"}) + angular_resolution: float = field(default=10.0, metadata={"help": "Angular resolution in degrees"}) + include_cone_poses: bool = field(default=False, metadata={"help": "Include cone poses in trajectory"}) + lift_height_before_spiral: float = field(default=0.01, metadata={"help": "Lift height before spiral in meters"}) @dataclass class RecordConfig: """Configuration for data recording.""" - dataset: DatasetConfig = field(default_factory=DatasetConfig) trajectory: TrajectoryConfig = field(default_factory=TrajectoryConfig) - episodes: int = field(default=10, metadata={"help": "Number of episodes to record"}) - lin_tol_alignment: float = field( - default=0.015, metadata={"help": "Linear tolerance for alignment servo"} - ) - ang_tol_alignment: float = field( - default=0.015, metadata={"help": "Angular tolerance for alignment servo"} - ) - lin_tol_trajectory: float = field( - default=0.05, metadata={"help": "Linear tolerance for trajectory servo"} - ) - ang_tol_trajectory: float = field( - default=0.05, metadata={"help": "Angular tolerance for trajectory servo"} - ) + episodes: int = field(default=20, metadata={"help": "Number of episodes to record"}) + lin_tol_alignment: float = field(default=0.015, metadata={"help": "Linear tolerance for alignment servo"}) + ang_tol_alignment: float = field(default=0.015, metadata={"help": "Angular tolerance for alignment servo"}) + lin_tol_trajectory: float = field(default=0.05, metadata={"help": "Linear tolerance for trajectory servo"}) + ang_tol_trajectory: float = field(default=0.05, metadata={"help": "Angular tolerance for trajectory servo"}) @dataclass class TrainConfig: """Configuration for model training.""" - dataset: DatasetConfig = field(default_factory=DatasetConfig) model: ModelConfig = field(default_factory=ModelConfig) batch_size: int = field(default=8, metadata={"help": "Training batch size"}) - learning_rate: float = field( - default=1e-4, metadata={"help": "Learning rate for optimizer"} - ) + learning_rate: float = field(default=1e-4, metadata={"help": "Learning rate for optimizer"}) epochs: int = field(default=100, metadata={"help": "Number of training epochs"}) - test_size: float = field( - default=0.2, metadata={"help": "Fraction of data for testing"} + test_size: float = field(default=0.2, metadata={"help": "Fraction of data for testing"}) + random_seed: int = field(default=42, metadata={"help": "Random seed for train/test split"}) + # Weights & Biases integration + wandb_project: Optional[str] = field( + default=None, metadata={"help": "Weights & Biases project name"} + ) + wandb_run_name: Optional[str] = field( + default=None, metadata={"help": "Weights & Biases run name"} ) - random_seed: int = field( - default=42, metadata={"help": "Random seed for train/test split"} + wandb_tags: Optional[List[str]] = field( + default=None, metadata={"help": "Weights & Biases tags for the run"} ) @dataclass class InferConfig: """Configuration for inference/alignment.""" - model: ModelConfig = field(default_factory=ModelConfig) - start_pose_xyz: List[float] = field( - default_factory=lambda: [0.33, 0.0, 0.35], - metadata={"help": "Starting pose XYZ coordinates"}, - ) - start_pose_rpy: List[float] = field( - default_factory=lambda: [np.pi, 0.0, 0.0], - metadata={"help": "Starting pose RPY angles"}, - ) - lin_tolerance: float = field( - default=2e-3, metadata={"help": "Linear tolerance for convergence (meters)"} - ) - ang_tolerance: float = field( - default=2, metadata={"help": "Angular tolerance for convergence (degrees)"} - ) - max_iterations: Optional[int] = field( - default=None, - metadata={"help": "Maximum iterations before stopping (None = infinite)"}, - ) - debug_output: bool = field( - default=True, metadata={"help": "Print debug information during inference"} - ) - debouncing_count: int = field( - default=5, - metadata={"help": "Number of iterations within tolerance before stopping"}, - ) + start_pose_xyz: List[float] = field(default_factory=lambda: [0.33, 0.0, 0.35], metadata={"help": "Starting pose XYZ coordinates"}) + start_pose_rpy: List[float] = field(default_factory=lambda: [np.pi, 0.0, 0.0], metadata={"help": "Starting pose RPY angles"}) + lin_tolerance: float = field(default=2e-3, metadata={"help": "Linear tolerance for convergence (meters)"}) + ang_tolerance: float = field(default=2.0, metadata={"help": "Angular tolerance for convergence (degrees)"}) + max_iterations: Optional[int] = field(default=None, metadata={"help": "Maximum iterations before stopping (None = infinite)"}) + debug_output: bool = field(default=True, metadata={"help": "Print debug information during inference"}) + debouncing_count: int = field(default=5, metadata={"help": "Number of consecutive iterations within tolerance before stopping"}) @dataclass class VisualizeConfig: """Configuration for dataset visualization.""" - dataset: DatasetConfig = field(default_factory=DatasetConfig) share: bool = field(default=False, metadata={"help": "Create a public Gradio link"}) - server_name: Optional[str] = field( - default=None, metadata={"help": "Server name for Gradio interface"} - ) - server_port: Optional[int] = field( - default=None, metadata={"help": "Server port for Gradio interface"} - ) + server_name: Optional[str] = field(default=None, metadata={"help": "Server name for Gradio interface"}) + server_port: Optional[int] = field(default=None, metadata={"help": "Server port for Gradio interface"}) \ No newline at end of file diff --git a/alignit/record.py b/alignit/record.py index 93fc7d5..b723dbe 100644 --- a/alignit/record.py +++ b/alignit/record.py @@ -13,10 +13,10 @@ load_from_disk, concatenate_datasets, ) - from alignit.robots.xarmsim import XarmSim from alignit.robots.xarm import Xarm from alignit.utils.zhou import se3_sixd +from alignit.utils.dataset import push_dataset_to_hub import draccus from alignit.config import RecordConfig @@ -132,6 +132,22 @@ def main(cfg: RecordConfig): shutil.rmtree(cfg.dataset.path) shutil.move(temp_path, cfg.dataset.path) + # Push to HuggingFace Hub if configured + if cfg.dataset.hf_username and cfg.dataset.hf_dataset_name: + print(f"Pushing dataset to HuggingFace Hub: {cfg.dataset.hf_username}/{cfg.dataset.hf_dataset_name}") + try: + repo_id = push_dataset_to_hub( + combined_dataset, + cfg.dataset.hf_username, + cfg.dataset.hf_dataset_name, + cfg.dataset.hf_token, + private=True # Make private by default for safety + ) + print(f"Successfully pushed dataset to {repo_id}") + except Exception as e: + print(f"Failed to push dataset to HuggingFace Hub: {e}") + print("Dataset saved locally but not uploaded to Hub") + robot.disconnect() diff --git a/alignit/train.py b/alignit/train.py index 6f05a33..510859e 100644 --- a/alignit/train.py +++ b/alignit/train.py @@ -9,6 +9,14 @@ from alignit.config import TrainConfig from alignit.models.alignnet import AlignNet +from alignit.utils.dataset import load_dataset_smart + +# Optional wandb import +try: + import wandb + WANDB_AVAILABLE = True +except ImportError: + WANDB_AVAILABLE = False def collate_fn(batch): @@ -22,8 +30,28 @@ def main(cfg: TrainConfig): """Train AlignNet model using configuration parameters.""" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - # Load the dataset from disk - dataset = load_from_disk(cfg.dataset.path) + # Initialize Weights & Biases if configured + use_wandb = WANDB_AVAILABLE and cfg.wandb_project is not None + if use_wandb: + wandb.init( + project=cfg.wandb_project, + name=cfg.wandb_run_name, + tags=cfg.wandb_tags, + config=cfg.__dict__ + ) + print(f"Initialized Weights & Biases project: {cfg.wandb_project}") + elif cfg.wandb_project and not WANDB_AVAILABLE: + print("Warning: wandb project specified but wandb not installed. Install with: pip install wandb") + + # Load the dataset from disk or HuggingFace Hub + if cfg.dataset.hf_dataset_name: + print(f"Loading dataset from HuggingFace Hub: {cfg.dataset.hf_dataset_name}") + dataset_path = cfg.dataset.hf_dataset_name + else: + print(f"Loading dataset from disk: {cfg.dataset.path}") + dataset_path = cfg.dataset.path + + dataset = load_dataset_smart(dataset_path) # Create model using config parameters net = AlignNet( @@ -53,7 +81,14 @@ def main(cfg: TrainConfig): criterion = MSELoss() net.train() + # Watch model with wandb if enabled + if use_wandb: + wandb.watch(net, log_freq=100) + for epoch in range(cfg.epochs): + epoch_loss = 0.0 + num_batches = 0 + for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"): images = batch["images"] actions = batch["action"].to(device) @@ -78,13 +113,32 @@ def main(cfg: TrainConfig): loss = criterion(outputs, actions) loss.backward() optimizer.step() + + epoch_loss += loss.item() + num_batches += 1 + tqdm.write(f"Loss: {loss.item():.4f}") + # Calculate average epoch loss + avg_epoch_loss = epoch_loss / num_batches + + # Log to wandb if enabled + if use_wandb: + wandb.log({ + "epoch": epoch + 1, + "loss": avg_epoch_loss, + "learning_rate": cfg.learning_rate + }) + # Save the trained model torch.save(net.state_dict(), cfg.model.path) tqdm.write(f"Model saved as {cfg.model.path}") print("Training complete.") + + # Finish wandb run + if use_wandb: + wandb.finish() if __name__ == "__main__": diff --git a/alignit/utils/dataset.py b/alignit/utils/dataset.py index c9f5563..e93fe57 100644 --- a/alignit/utils/dataset.py +++ b/alignit/utils/dataset.py @@ -1,12 +1,48 @@ from datasets import load_from_disk, load_dataset as hf_load_dataset -def load_dataset(path: str): - """Load a dataset from disk if given a local path, otherwise from the hub. - - This function wraps Hugging Face Datasets loaders while accepting either a - filesystem path (./ or /) or a dataset name. +def load_dataset_smart(path: str): + """Load dataset either from local disk or HuggingFace Hub. + + Args: + path: Local path (starts with . or /) or HuggingFace dataset name """ if path.startswith(".") or path.startswith("/"): return load_from_disk(path) - return hf_load_dataset(path) + + # Load from HuggingFace Hub + dataset = hf_load_dataset(path) + + # Handle DatasetDict vs Dataset - return the train split if it exists + if hasattr(dataset, 'keys') and 'train' in dataset: + return dataset['train'] + elif hasattr(dataset, 'keys') and len(dataset.keys()) == 1: + # If there's only one split, return it + split_name = list(dataset.keys())[0] + return dataset[split_name] + else: + # Return as-is if it's already a Dataset + return dataset + + +# Backward compatibility alias for existing tests +load_dataset = load_dataset_smart + + +def push_dataset_to_hub(dataset, username: str, dataset_name: str, token: str = None, private: bool = True): + """Push dataset to HuggingFace Hub. + + Args: + dataset: The dataset to push + username: HuggingFace username + dataset_name: Name for the dataset on HuggingFace Hub + token: HuggingFace token for authentication + private: Whether to make the dataset private (default: True) + """ + repo_id = f"{username}/{dataset_name}" + dataset.push_to_hub( + repo_id=repo_id, + token=token, + private=private + ) + return repo_id diff --git a/alignit/visualize.py b/alignit/visualize.py index 26ee81f..cd672ee 100644 --- a/alignit/visualize.py +++ b/alignit/visualize.py @@ -1,7 +1,9 @@ import gradio as gr import draccus +from PIL import Image +import numpy as np -from alignit.utils.dataset import load_dataset +from alignit.utils.dataset import load_dataset_smart from alignit.utils.zhou import sixd_se3 from alignit.utils.tfs import get_pose_str from alignit.config import VisualizeConfig @@ -9,11 +11,19 @@ @draccus.wrap() def visualize(cfg: VisualizeConfig): - dataset = load_dataset(cfg.dataset.path) + # Load the dataset from disk or HuggingFace Hub + if cfg.dataset.hf_dataset_name: + print(f"Loading dataset from HuggingFace Hub: {cfg.dataset.hf_dataset_name}") + dataset_path = cfg.dataset.hf_dataset_name + else: + print(f"Loading dataset from disk: {cfg.dataset.path}") + dataset_path = cfg.dataset.path + + dataset = load_dataset_smart(dataset_path) def get_data(index): item = dataset[index] - image = item["images"][0] + image = item["images"][0] # Should now be a PIL Image for both local and Hub datasets action_sixd = item["action"] action = sixd_se3(action_sixd) label = get_pose_str(action, degrees=True)