diff --git a/docs/tools/dev_system/all.replace_common_files_with_script_links.md b/docs/tools/dev_system/all.replace_common_files_with_script_links.md index d50147a15..32b676040 100644 --- a/docs/tools/dev_system/all.replace_common_files_with_script_links.md +++ b/docs/tools/dev_system/all.replace_common_files_with_script_links.md @@ -1,18 +1,15 @@ - - - [Managing Symbolic Links Between Directories](#managing-symbolic-links-between-directories) - * [Define](#define) + * [Summary](#summary) * [Why Do We Need This Approach?](#why-do-we-need-this-approach) + * [Nomenclature](#nomenclature) * [Workflow and Commands](#workflow-and-commands) + [Step 1: Replace Files with Symbolic Links](#step-1-replace-files-with-symbolic-links) + [Step 2: Stage Files for Modification](#step-2-stage-files-for-modification) + [Step 3: Restore Symbolic Links After Modifications](#step-3-restore-symbolic-links-after-modifications) + [Workflow Summary](#workflow-summary) + [Example Directory Structure](#example-directory-structure) - + [Notes and Best Practices](#notes-and-best-practices) - + [Conclusion](#conclusion) @@ -21,16 +18,16 @@ ## Summary - This document describes two scripts, `create_links.py` and - `stage_linked_file.py` used to manage symbolic links between a - source directory and a destination directory + `stage_linked_file.py` used to manage symbolic links between a source + directory and a destination directory - These tools simplify workflows where you want to create read-only symbolic links for files, stage modifications, and later restore the links ## Why Do We Need This Approach? -- In our codebases, it is common to have duplicate files or files - that are identical between two directories. Maintaining these files manually - can lead to inefficiencies and errors: +- In our codebases, it is common to have files that are identical between two + directories. Maintaining these files manually can lead to inefficiencies and + errors: - Synchronization: If changes are made in one location, they may not reflect in the other, leading to inconsistencies - Accidental Modifications: Directly modifying files that should remain @@ -38,12 +35,31 @@ - With our approach: - We avoid file duplication by creating links that point to the original files - - Links in the destination directory remain read-only, reducing the risk of - accidental changes + - Links in the destination directory are marked as read-only, reducing the + risk of accidental changes - If modifications are needed, the "staging process" ensures you can work safely on copies without altering the original source files - - After the code has been developed, one can then convert copies of files, back - to links + - After the code has been developed, one can then convert copies of files, + back to links + +## Nomenclature + +- Links are often confusing since it's not clear what is linked to and what is + linked from, e.g., + - `ln -s foo bar` creates a symbolic link named `foo` that points to `bar` + ```bash + foo -> bar + ``` + - This convention seems the opposite of `cp foo bar` where a new file called + `bar` is created with the content of `foo` + +- Also referring to "source" and "destination" is confusing since it is unclear + if "destination" is the "destination" of the link (i.e., the head of the + arrow) or the "destination" of the operation of copy (the tail of the arrow) + +- In the rest of this document we will refer to the file being created as + "destination" + - E.g., `ln -s new_file old_file` ## Workflow and Commands @@ -55,7 +71,8 @@ links to the corresponding files in `src_dir` Command: - ``` + + ```bash > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links ``` @@ -76,16 +93,20 @@ - If you want to edit the files in `dst_dir` (which are currently symbolic links), use `stage_linked_file.py` to stage them. Staging replaces the symbolic links with writable copies of the original files +- At this point, you can just modify the files in `dst_dir` to achieve the + desired goal, without worries of altering the source files + - Often you don't know which files need to be changed and how to change files + so all the files are staged for modification - Command: - ``` + + ```bash > stage_linked_file.py --dst_dir /path/to/dst ``` - What it does: - Finds all the symbolic links in `dst_dir` - - Replaces each symbolic link with a writable copy of the file it points - to + - Replaces each symbolic link with a writable copy of the file it points to - Sets file permissions to `644` (writable) - Why it is important: @@ -95,11 +116,12 @@ ### Step 3: Restore Symbolic Links After Modifications -- Once you’ve finished modifying the files, you can restore the symbolic links +- Once you've finished modifying the files, you can restore the symbolic links by running `create_links.py` again with the `--replace_links` flag - Command: - ``` + + ```bash > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links ``` @@ -115,22 +137,23 @@ ### Workflow Summary -- Set up `symbolic links`: - ``` - > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links - ``` +1. Set up symbolic links: -- Stage `symbolic links` for modification: - ``` - > stage_linked_file.py --dst_dir /path/to/dst - ``` + ```bash + > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links + ``` -- Modify files as required +2. Stage symbolic links for modification: + ``` + > stage_linked_file.py --dst_dir /path/to/dst + ``` -- After modifications, restore the `symbolic links`: - ``` - > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links - ``` +3. Modify files as required + +4. After modifications, restore the symbolic links: + ``` + > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links + ``` ### Example Directory Structure diff --git a/helpers/create_links.py b/helpers/create_links.py index 481b0f152..7cf93a503 100644 --- a/helpers/create_links.py +++ b/helpers/create_links.py @@ -1,4 +1,7 @@ +#!/usr/bin/env python + """ + Usage Example: - Using absolute links @@ -20,10 +23,6 @@ > create_links.py --src_dir /path/to/src --dst_dir /path/to/dst --replace_links --use_relative_paths - Other steps remain same. - -Import as: - -import helpers.create_links as hcrelink """ import argparse @@ -35,123 +34,61 @@ from typing import List, Tuple import helpers.hdbg as hdbg -import helpers.hio as hio import helpers.hparser as hparser _LOG = logging.getLogger(__name__) -# ############################################################################# - - -def _main(parser: argparse.ArgumentParser) -> None: - """ - Entry point for the script to manage symbolic links between directories. - - Depending on the command-line arguments, this script either: - - - Replaces matching files in `dst_dir` with symbolic links to `src_dir`. - - Stages all symbolic links in `dst_dir` for modification by replacing them - with writable file copies. - - Usage: - - `--replace_links`: Replace files with symbolic links - - `--stage_links`: Replace symbolic links with writable file copies - :return: None - """ - args = parser.parse_args() - hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) - if args.replace_links: - common_files = _find_common_files(args.src_dir, args.dst_dir) - _replace_with_links( - common_files, use_relative_paths=args.use_relative_paths - ) - _LOG.info("Replaced %d files with symbolic links.", len(common_files)) - elif args.stage_links: - symlinks = _find_symlinks(args.dst_dir) - if not symlinks: - _LOG.info("No symbolic links found to stage.") - _stage_links(symlinks) - _LOG.info("Staged %d symbolic links for modification.", len(symlinks)) - else: - _LOG.error("You must specify either --replace_links or --stage_links.") - -def _parse() -> argparse.ArgumentParser: - """ - Parse command-line arguments. - - :return: Argument parser object. - """ - parser = argparse.ArgumentParser( - description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter - ) - parser.add_argument("--src_dir", required=True, help="Source directory.") - parser.add_argument("--dst_dir", required=True, help="Destination directory.") - parser.add_argument( - "--replace_links", - action="store_true", - help="Replace files with symbolic links.", - ) - parser.add_argument( - "--stage_links", - action="store_true", - help="Replace symbolic links with writable copies.", - ) - parser.add_argument( - "--use_relative_paths", - action="store_true", - help="Use relative paths for symbolic links instead of absolute paths.", - ) - hparser.add_verbosity_arg(parser) - return parser +# ############################################################################# def _find_common_files(src_dir: str, dst_dir: str) -> List[Tuple[str, str]]: """ Find common files in dst_dir and change to links. - If a destination dir is not found, the functions makes a dest dir and copies all files from - source to destination after users approval. All matching files are identified based on their - name and content. The matches are returned as the file paths from both directories. + If a destination dir is not found, the functions makes a dest dir and copies + all files from source to destination after users approval. All matching + files are identified based on their name and content. The matches are + returned as the file paths from both directories. :param src_dir: The source directory containing the original files :param dst_dir: The destination directory to compare files against :return: paths of matching files from `src_dir` and `dst_dir` """ - # Ensure the destination directory exists; create it if it doesn't. - if not os.path.exists(dst_dir): - user_input = input( - "Destination directory %s does not exist. Would you like to create copy all files from source? (y/n): " - ) - if user_input.lower() == "y": - hio.create_dir( - dir_name=dst_dir, - incremental=True, - abort_if_exists=True, - ask_to_delete=False, - backup_dir_if_exists=False, - ) - _LOG.info("Created destination directory: %s", dst_dir) - for root, _, files in os.walk(src_dir): - for file in files: - src_file = os.path.join(root, file) - dst_file = os.path.join( - dst_dir, os.path.relpath(src_file, src_dir) - ) - dst_file_dir = os.path.dirname(dst_file) - # Ensure the destination file directory exists. - if not os.path.exists(dst_file_dir): - os.makedirs(dst_file_dir) - _LOG.info("Created subdirectory: %s", dst_file_dir) - # Copy the file from source to destination. - shutil.copy2(src_file, dst_file) - _LOG.info("Copied file: %s -> %s", src_file, dst_file) - else: - _LOG.error( - "Destination directory %s not created. Exiting function.", - dst_dir, - ) - return [] + # # Ensure the destination directory exists; create it if it doesn't. + # if not os.path.exists(dst_dir): + # user_input = input( + # "Destination directory %s does not exist. Would you like to create copy all files from source? (y/n): " + # ) + # if user_input.lower() == "y": + # hio.create_dir( + # dir_name=dst_dir, + # incremental=True, + # abort_if_exists=True, + # ask_to_delete=False, + # backup_dir_if_exists=False, + # ) + # _LOG.info("Created destination directory: %s", dst_dir) + # for root, _, files in os.walk(src_dir): + # for file in files: + # src_file = os.path.join(root, file) + # dst_file = os.path.join( + # dst_dir, os.path.relpath(src_file, src_dir) + # ) + # dst_file_dir = os.path.dirname(dst_file) + # # Ensure the destination file directory exists. + # if not os.path.exists(dst_file_dir): + # os.makedirs(dst_file_dir) + # _LOG.info("Created subdirectory: %s", dst_file_dir) + # # Copy the file from source to destination. + # shutil.copy2(src_file, dst_file) + # _LOG.info("Copied file: %s -> %s", src_file, dst_file) + # else: + # _LOG.error( + # "Destination directory %s not created. Exiting function.", + # dst_dir, + # ) + # return [] # After copying files, continue with comparing files. common_files = [] for root, _, files in os.walk(src_dir): @@ -159,77 +96,107 @@ def _find_common_files(src_dir: str, dst_dir: str) -> List[Tuple[str, str]]: src_file = os.path.join(root, file) dst_file = os.path.join(dst_dir, os.path.relpath(src_file, src_dir)) # Check if the file exists in the destination folder. - # Certain files do not need to be copied, so we skip them. if not os.path.exists(dst_file): - _LOG.warning( - "Warning: %s is missing in the destination directory.", + _LOG.debug( + "File %s is missing in the destination directory", dst_file, ) continue - # Compare file contents after copying. + # Check if the file is a symbolic link. + if os.path.islink(dst_file): + _LOG.debug( + "File %s is a symbolic link", + dst_file, + ) + continue + # Compare file contents. if filecmp.cmp(src_file, dst_file, shallow=False): - _LOG.info( - "Files are the same and will be replaced: %s -> %s", + _LOG.debug( + "Files src_file=%s, dst_file=%s are the same", src_file, dst_file, ) common_files.append((src_file, dst_file)) else: - _LOG.warning( - "Warning: %s and %s have different content.", - dst_file, + _LOG.debug( + "Files src_file=%s, dst_file=%s are not the same", src_file, + dst_file, ) return common_files +def _create_single_link( + src_file: str, + dst_file: str, + use_relative_paths: bool, + abort_on_first_error: bool, +) -> None: + """ + Create a single symbolic link from dst_file to src_file. + + :param src_file: Source file path + :param dst_file: Destination file path where symlink will be created + :param use_relative_paths: If True, create relative symlinks; if + False, use absolute paths + :param abort_on_first_error: If True, abort on the first error; if + False, continue processing + """ + hdbg.dassert_file_exists(src_file) + hdbg.dassert_file_exists(dst_file) + # Remove the destination file. + os.remove(dst_file) + try: + if use_relative_paths: + link_target = os.path.relpath(src_file, os.path.dirname(dst_file)) + else: + link_target = os.path.abspath(src_file) + os.symlink(link_target, dst_file) + # Remove write permissions from the file to prevent accidental + # modifications. + current_permissions = os.stat(dst_file).st_mode + new_permissions = ( + current_permissions & ~stat.S_IWUSR & ~stat.S_IWGRP & ~stat.S_IWOTH + ) + os.chmod(dst_file, new_permissions) + _LOG.debug("Created symlink: %s -> %s", dst_file, link_target) + except Exception as e: + msg = "Failed to create symlink %s -> %s with error %s" % ( + dst_file, + link_target, + str(e), + ) + if abort_on_first_error: + raise RuntimeError(msg) + else: + _LOG.warning(msg) + + def _replace_with_links( common_files: List[Tuple[str, str]], use_relative_paths: bool, *, abort_on_first_error: bool = False, + dry_run: bool = False, ) -> None: """ Replace matching files in the destination directory with symbolic links. :param common_files: Matching file paths from `src_dir` and `dst_dir` - :param use_relative_paths: If True, create relative symlinks; if False, use absolute paths. - :param abort_on_first_error: If True, abort on the first error; if False, continue processing + :param use_relative_paths: If True, create relative symlinks; if False, use + absolute paths. + :param abort_on_first_error: If True, abort on the first error; if False, + continue processing + :param dry_run: If True, print what will be done without actually doing it. """ for src_file, dst_file in common_files: - try: - hdbg.dassert_file_exists(src_file) - except FileNotFoundError as e: - _LOG.error("Error: %s", str(e)) - if abort_on_first_error: - _LOG.error("Aborting: Source file %s doesn't exist.", src_file) - continue - if os.path.exists(dst_file): - os.remove(dst_file) - try: - if use_relative_paths: - link_target = os.path.relpath(src_file, os.path.dirname(dst_file)) - else: - link_target = os.path.abspath(src_file) - os.symlink(link_target, dst_file) - # Remove write permissions from the file to prevent accidental - # modifications. - current_permissions = os.stat(dst_file).st_mode - new_permissions = ( - current_permissions - & ~stat.S_IWUSR - & ~stat.S_IWGRP - & ~stat.S_IWOTH - ) - os.chmod(dst_file, new_permissions) - _LOG.info("Created symlink: %s -> %s", dst_file, link_target) - except Exception as e: - _LOG.error("Error creating symlink for %s: %s", dst_file, e) - if abort_on_first_error: - _LOG.warning( - "Aborting: Failed to create symlink for %s.", dst_file - ) - continue + + _create_single_link( + src_file, dst_file, use_relative_paths, abort_on_first_error + ) + + +# ############################################################################# def _find_symlinks(dst_dir: str) -> List[str]: @@ -237,8 +204,10 @@ def _find_symlinks(dst_dir: str) -> List[str]: Find all symbolic links in the destination directory. :param dst_dir: Directory to search for symbolic links - :return: List of paths to symbolic links + :return: List of absolute paths to symbolic links """ + dst_dir = os.path.abspath(dst_dir) + hdbg.dassert_dir_exists(dst_dir) symlinks = [] for root, _, files in os.walk(dst_dir): for file in files: @@ -248,36 +217,128 @@ def _find_symlinks(dst_dir: str) -> List[str]: return symlinks -def _stage_links(symlinks: List[str]) -> None: +def _stage_single_link( + link: str, target_file: str, abort_on_first_error: bool, dry_run: bool +) -> None: + """ + Replace a single symlink with a writable copy of the linked file. + + :param link: The symlink to replace. + :param target_file: The file to copy to the symlink location. + :param abort_on_first_error: If True, abort on the first error; if + False, continue processing + :param dry_run: If True, print what will be done without actually + doing it. + """ + # Resolve the original file the symlink points to. + target_file = os.readlink(link) + if not os.path.exists(target_file): + msg = "Target file does not exist for link %s -> %s" % (link, target_file) + if abort_on_first_error: + raise RuntimeError(msg) + else: + _LOG.warning(msg) + return + try: + os.remove(link) + # Copy file to the symlink location. + shutil.copy2(target_file, link) + # Make the file writable to allow for modifications. + current_permissions = os.stat(link).st_mode + new_permissions = ( + current_permissions | stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH + ) + os.chmod(link, new_permissions) + _LOG.debug("Staged: %s -> %s", link, target_file) + except Exception as e: + msg = "Error staging link %s: %s" % (link, str(e)) + if abort_on_first_error: + raise RuntimeError(msg) + else: + _LOG.warning(msg) + + +def _stage_links( + symlinks: List[str], abort_on_first_error: bool, dry_run: bool +) -> None: """ Replace symbolic links with writable copies of the linked files. :param symlinks: List of symbolic links to replace. """ for link in symlinks: - # Resolve the original file the symlink points to. - target_file = os.readlink(link) - if not os.path.exists(target_file): - _LOG.warning( - "Warning: Target file does not exist for link %s -> %s", - link, - target_file, - ) - continue - # Replace the symlink with a writable copy of the target file. - try: - os.remove(link) - # Copy file to the symlink location. - shutil.copy2(target_file, link) - # Make the file writable to allow for modifications. - current_permissions = os.stat(link).st_mode - new_permissions = ( - current_permissions | stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH - ) - os.chmod(link, new_permissions) - _LOG.info("Staged: %s -> %s", link, target_file) - except Exception as e: - _LOG.error("Error staging link %s: %s", link, e) + _stage_single_link(link, abort_on_first_error, dry_run) + + +# ############################################################################# + + +def _parse() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--src_dir", required=True, help="Source directory.") + parser.add_argument("--dst_dir", required=True, help="Destination directory.") + parser.add_argument( + "--replace_links", + action="store_true", + help="Replace equal files with symbolic links.", + ) + parser.add_argument( + "--stage_links", + action="store_true", + help="Replace symbolic links with writable copies.", + ) + parser.add_argument( + "--compare_files", + action="store_true", + help="Compare files in the directories.", + ) + parser.add_argument( + "--use_relative_paths", + action="store_true", + help="Use relative paths for symbolic links instead of absolute paths.", + ) + parser.add_argument( + "--dry_run", + action="store_true", + help="Print what will be done without actually doing it.", + ) + hparser.add_verbosity_arg(parser) + return parser + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + hdbg.dassert_dir_exists(args.src_dir) + hdbg.dassert_dir_exists(args.dst_dir) + # + hdbg.dassert_eq( + sum([args.replace_links, args.stage_links, args.compare_files]), + 1, + "You must specify exactly one of --replace_links, --stage_links, or --compare_files.", + ) + if args.compare_files: + # Compare files. + common_files = _find_common_files(args.src_dir, args.dst_dir) + _LOG.info("Found %d common files.", len(common_files)) + elif args.replace_links: + # Replace with links. + common_files = _find_common_files(args.src_dir, args.dst_dir) + hdbg.dassert_ne(len(symlinks), 0, "No files found to replace.") + _replace_with_links( + common_files, use_relative_paths=args.use_relative_paths + ) + _LOG.info("Replaced %d files with symbolic links.", len(common_files)) + elif args.stage_links: + # Stage links for modification. + symlinks = _find_symlinks(args.dst_dir) + hdbg.dassert_ne(len(symlinks), 0, "No symbolic links found to stage.") + _stage_links(symlinks) + _LOG.info("Staged %d symbolic links for modification.", len(symlinks)) + else: + raise RuntimeError("Internal error") if __name__ == "__main__": diff --git a/helpers/stage_linked_file.py b/helpers/stage_linked_file.py index 43d83e881..5f9945834 100644 --- a/helpers/stage_linked_file.py +++ b/helpers/stage_linked_file.py @@ -1,68 +1,65 @@ -""" -Import as: +#!/usr/bin/env python -import helpers.stage_linked_file as hstlifil +""" +Usage + - python3 stage_linked_file.py --dst_dir /path/to/dst """ import argparse import logging -import os -import shutil -from typing import List _LOG = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) -def find_symlinks(dst_dir: str) -> List[str]: - """ - Find all symbolic links in the destination directory. - - :param dst_dir: Directory to search for symbolic links. - :return: List of paths to symbolic links. - """ - symlinks = [] - for root, _, files in os.walk(dst_dir): - for file in files: - file_path = os.path.join(root, file) - if os.path.islink(file_path): - symlinks.append(file_path) - return symlinks - - -def stage_links(symlinks: List[str]) -> None: - """ - Replace symbolic links with writable copies of the linked files. - - :param symlinks: List of symbolic links to replace. - """ - for link in symlinks: - # Resolve the original file the symlink points to. - target_file = os.readlink(link) - if not os.path.exists(target_file): - _LOG.warning( - f"Warning: Target file does not exist for link {link} -> {target_file}" - ) - continue - # Replace the symlink with a writable copy of the target file. - try: - os.remove(link) - # Copy file to the symlink location. - shutil.copy2(target_file, link) - # Make the file writable. - os.chmod(link, 0o644) - _LOG.info(f"Staged: {link} -> {target_file}") - except Exception as e: - _LOG.error(f"Error staging link {link}: {e}") - - -def main(): +# def find_symlinks(dst_dir: str) -> List[str]: +# """ +# Find all symbolic links in the destination directory. + +# :param dst_dir: Directory to search for symbolic links. +# :return: List of paths to symbolic links. +# """ +# symlinks = [] +# for root, _, files in os.walk(dst_dir): +# for file in files: +# file_path = os.path.join(root, file) +# if os.path.islink(file_path): +# symlinks.append(file_path) +# return symlinks + + +# def stage_links(symlinks: List[str]) -> None: +# """ +# Replace symbolic links with writable copies of the linked files. + +# :param symlinks: List of symbolic links to replace. +# """ +# for link in symlinks: +# # Resolve the original file the symlink points to. +# target_file = os.readlink(link) +# if not os.path.exists(target_file): +# _LOG.warning( +# f"Warning: Target file does not exist for link {link} -> {target_file}" +# ) +# continue +# # Replace the symlink with a writable copy of the target file. +# try: +# os.remove(link) +# # Copy file to the symlink location. +# shutil.copy2(target_file, link) +# # Make the file writable. +# os.chmod(link, 0o644) +# _LOG.info(f"Staged: {link} -> {target_file}") +# except Exception as e: +# _LOG.error(f"Error staging link {link}: {e}") + + +def main() -> None: parser = argparse.ArgumentParser( description="Stage symbolic links for modification." ) parser.add_argument("--dst_dir", required=True, help="Destination directory.") args = parser.parse_args() - symlinks = find_symlinks(args.dst_dir) if not symlinks: _LOG.info("No symbolic links found to stage.") @@ -73,10 +70,3 @@ def main(): if __name__ == "__main__": main() - -""" -Usage - - - python3 stage_linked_file.py --dst_dir /path/to/dst - -"""