Skip to content

Azure batch blob container mount caching issue #194

@natbprice

Description

@natbprice

Problem Description

  • Tasks are accessing cached version of files instead of latest version from blob storage when using blob container mount

Steps to Reproduce

  • Create a pool with blob storage mount with no caching
    • --use-adls=true --file-cache-timeout=0 --attr-cache-timeout=0 --attr-timeout=0
  • Create a job with a task to read file from blob storage mount
    • e.g., bash -c 'cat $AZ_BATCH_NODE_MOUNTS_DIR/mycontainer/myfile.txt
  • Externally, update the file in blob storage
  • Wait a minute and then create a new job on same pool with a task to read file from blob storage mount

Expected Results

  • If the file cache timeout is expired, the latest version of file will be downloaded from blob storage when accessed by the task
    • In this case the timeout is 0 seconds so file should always be downloaded from blob storage
  • The task will read the latest version of file from blob storage

Actual Results

  • Repeated jobs on the same cluster appear to access a cached version of the blob file
  • The file available to the task is not updated despite testing various cache settings

Additional Logs

NA

Additonal Comments

  • If browsing the fsmounts directory on the node with Azure Portal, it appears the latest file is removed from the cache directory (${TMP_CACHE_PATH}) and the new version of the file is downloaded when file is accessed again
  • The correct version of the file is displayed when browsing the node in Azure Portal, however, the task still seems to access an outdated cached version of file
  • The documentation of mount options is not very clear
    • Are these Fuse options? Are they blobfuse2 options? Do I need to preface each option with -o? What is the default cache behavior?
  • See /opt/batch/bin/mount_blobfuse.sh for others encountering similar issues
#!/usr/bin/env bash

# Linux distribution independent mount blobfuse script

# shellcheck disable=SC1091

# const
readonly UNKNONW_DISTRIB_ID_OR_RELEASE=50
readonly BLOBFUSE_NOT_SUPPORTED_DISTRIB_ID=52
readonly INSTALL_PACKAGE_FAILURE=54
readonly DOWNLOAD_FILE_FAILURE=55
readonly NOT_RUNNING_AS_ROOT_FAILURE=56
readonly NOT_SUPPORTED_DISTRIB_ID=57
readonly MOUNT_INSTALL_FAILURE_UNATTENDED_UPGRADES=58

# functions
log() {
    echo "$(date -u -Ins) - $1: $2"
}

log_error() {
    log ERROR "$1"
}

log_info() {
    log INFO "$1"
}

# This function should only install single package at once
install_packages() {
    set +e
    local retries=60
    local rc
    local output
    while [ $retries -gt 0 ]; do
        if [ "$PACKAGER" == "apt" ]; then
            # shellcheck disable=SC2016
            dpkg-query --show --showformat='${Status}' "$@" | grep -qi 'install ok installed'
            rc=$?
            if [ $rc -eq 0 ]; then
                log_info "Already installed packages ($PACKAGER): $*"
                break
            fi
            apt-get update
            rc=$?
            if [ $rc -eq 0 ]; then
                log_info "Successfully updated package index"
                output=$(apt-get install -y -q -o Dpkg::Options::="--force-confnew" --no-install-recommends "$@")
                rc=$?
                if [[ "$output" == *"(unattended-upgr)"* ]]; then
                    sleep 30
                fi
            fi
        elif [ "$PACKAGER" == "yum" ]; then
            yum list installed | grep "^$*\\."
            rc=$?
            if [ $rc -eq 0 ]; then
                log_info "Already installed packages ($PACKAGER): $*"
                break
            fi
            yum makecache -y
            rc=$?
            if [ $rc -eq 0 ]; then
                log_info "Successfully updated package index"
                yum install -y "$@"
                rc=$?
            fi
        elif [ "$PACKAGER" == "tdnf" ]; then
            tdnf list installed | grep "^$*\\."
            rc=$?
            if [ $rc -eq 0 ]; then
                log_info "Already installed packages ($PACKAGER): $*"
                break
            fi
            tdnf makecache -y
            rc=$?
            if [ $rc -eq 0 ]; then
                log_info "Successfully updated package index"
                tdnf install -y "$@"
                rc=$?
            fi
        fi

        if [ $rc -eq 0 ]; then
            log_info "installed packages succeeded."
            break
        fi
        retries=$((retries-1))
        if [ $retries -eq 0 ]; then
            if [[ "$output" == *"(unattended-upgr)"* ]]; then
                exit "${MOUNT_INSTALL_FAILURE_UNATTENDED_UPGRADES}"
            fi
            log ERROR "Could not install packages ($PACKAGER): $*"
            exit "$INSTALL_PACKAGE_FAILURE"
        fi
        sleep 1
    done
    set -e
}

download_file_as() {
    log INFO "Downloading: $1 as $2"
    local retries=20
    set +e
    while [ $retries -gt 0 ]; do
        if [ "$DOWNLOADER" == "curl" ]; then
            if curl -fSsL -o "$2" "$1"; then
                break
            fi
        else
            if wget -O "$2" "$1"; then
                break
            fi
        fi
        retries=$((retries-1))
        if [ $retries -eq 0 ]; then
            log_error "Could not download: $1"
            exit "${DOWNLOAD_FILE_FAILURE}"
        fi
        sleep 1
    done
    set -e
}

prep_azureblob_mount() {
    local pkg_distro
    # check for supported distros
    if [ "$DISTRIB_ID" == "ubuntu" ]; then
        if [ "$DISTRIB_RELEASE" == "20.04" ] || [ "$DISTRIB_RELEASE" == "22.04" ] || [ "$DISTRIB_RELEASE" == "24.04" ]; then
            pkg_distro="$DISTRIB_ID"
        fi
    elif [ "$DISTRIB_ID" == "rhel" ]; then
        pkg_distro="$DISTRIB_ID"
    elif [[ "$DISTRIB_ID" == centos* ]]; then
        # Temporarily restrict CentOS 8 as there is no source repo and RHEL 8 blobfuse is broken on CentOS
        if [ "$DISTRIB_RELEASE" == "7" ]; then
            pkg_distro=centos
        fi
    elif [[ "$DISTRIB_LIKE" == *rhel* ]]; then
        if [[ "$DISTRIB_RELEASE" =~ ^8 ]] || [[ "$DISTRIB_RELEASE" =~ ^9 ]]; then
            pkg_distro=rhel
        fi
    elif [ "$DISTRIB_ID" == "mariner" ] || [ "$DISTRIB_ID" == "azurelinux" ]; then
        pkg_distro="$DISTRIB_ID"
    fi

    if [ -z "$pkg_distro" ]; then
        log_error "The ${DISTRIB_ID} version ${DISTRIB_RELEASE} is not supported."
        exit "${BLOBFUSE_NOT_SUPPORTED_DISTRIB_ID}"
    fi
    # Download Microsoft GPG public key and install repository configuration
    download_file_as "https://packages.microsoft.com/keys/microsoft.asc" "microsoft.asc"
    if [ "$PACKAGER" == "apt" ]; then
        # ensure gpg exists
        set +e
        if ! command -v gpg > /dev/null 2>&1; then
            install_packages gpg
        fi
        set -e
        download_file_as "https://packages.microsoft.com/config/${pkg_distro}/${DISTRIB_RELEASE}/prod.list" "/etc/apt/sources.list.d/microsoft-prod.list"
        if [ "${DISTRIB_RELEASE}" == "24.04" ]; then
            rm -f /usr/share/keyrings/microsoft-prod.gpg
            gpg --batch --no-tty -o /usr/share/keyrings/microsoft-prod.gpg --dearmor microsoft.asc
        else
            rm -f /etc/apt/trusted.gpg.d/microsoft.gpg
            gpg --batch --no-tty -o /etc/apt/trusted.gpg.d/microsoft.gpg --dearmor microsoft.asc
        fi
    elif [ "$PACKAGER" == "yum" ] && [ "$DISTRIB_ID" != "mariner" ]; then
        download_file_as "https://packages.microsoft.com/config/${pkg_distro}/${DISTRIB_RELEASE}/prod.repo" "/etc/yum.repos.d/microsoft-prod.repo"
        rpm --import microsoft.asc
    fi
}

parse_args() {
    while getopts "ha:k:c:d:l:o:r:e:" opt; do
        case "${opt}" in
            h|\?)
                echo "Usage: mount_blobfuse.sh -a <acc name> -k <acc or sas key> -c <container name> -d <mount dir> -l <mount location> -o <mount options> -r <identity resource id> -e <blob endpoint>"
                exit 1
                ;;
            a)
                ACCOUNT_NAME="${OPTARG}"
                ;;
            k)
                KEY_STRING="${OPTARG}"
                ;;
            c)
                CONTAINER_NAME="${OPTARG}"
                ;;
            d)
                MOUNT_POINT="${OPTARG}"
                ;;
            l)
                MOUNT_LOCATION="${OPTARG}"
                ;;
            o)
                MOUNT_OPTIONS="${OPTARG}"
                ;;
            r)
                IDENTITY_RESOURCE_ID="${OPTARG}"
                ;;
            e)
                BLOB_ENDPOINT="${OPTARG}"
                ;;
        esac
    done
}

if [ "$EUID" -ne 0 ]; then
  log_error "Please run as root"
  exit "${NOT_RUNNING_AS_ROOT_FAILURE}"
fi

set -e

# script start
# parse script arguments
parse_args "$@"

if [ -e /etc/os-release ]; then
    . /etc/os-release
    DISTRIB_ID=$ID
    DISTRIB_RELEASE=$VERSION_ID
    DISTRIB_LIKE=$ID_LIKE
fi
if [ -z "${DISTRIB_ID+x}" ] || [ -z "${DISTRIB_RELEASE+x}" ]; then
    # fallback to /etc/lsb-release
    if [ -e /etc/lsb-release ]; then
        . /etc/lsb-release
        DISTRIB_LIKE=
    fi
fi
if [ -z "${DISTRIB_ID+x}" ] || [ -z "${DISTRIB_RELEASE+x}" ]; then
    log_error "Unknown DISTRIB_ID or DISTRIB_RELEASE."
    exit "${UNKNONW_DISTRIB_ID_OR_RELEASE}"
fi
DISTRIB_ID=${DISTRIB_ID,,}
DISTRIB_RELEASE=${DISTRIB_RELEASE,,}
DISTRIB_LIKE=${DISTRIB_LIKE,,}


# set distribution specific vars
PACKAGER=
DOWNLOADER=

if [ "$DISTRIB_ID" == "ubuntu" ] || [ "$DISTRIB_ID" == "debian" ]; then
    PACKAGER=apt
    export "${DEBIAN_FRONTEND=noninteractive}"
elif [ "$DISTRIB_ID" == "rhel" ] || [[ "$DISTRIB_LIKE" == *rhel* ]] || [ "$DISTRIB_ID" == "mariner" ]; then
    PACKAGER=yum
    # Ensure that DISTRIB_RELEASE is only the major version
    DISTRIB_RELEASE=$(cut -d '.' -f 1 <<< "$DISTRIB_RELEASE")
elif [ "$DISTRIB_ID" == "azurelinux" ]; then
    PACKAGER=tdnf
    DISTRIB_RELEASE=$(cut -d '.' -f 1 <<< "$DISTRIB_RELEASE")
else
    log_error "Unsupported OS: $DISTRIB_ID $DISTRIB_RELEASE."
    exit "${NOT_SUPPORTED_DISTRIB_ID}"
fi

set +e
if command -v curl > /dev/null 2>&1; then
    DOWNLOADER=curl
elif command -v wget > /dev/null 2>&1; then
    DOWNLOADER=wget
else
    log_error "No available downloader program for $DISTRIB_ID $DISTRIB_RELEASE."
    exit "${NOT_SUPPORTED_DISTRIB_ID}"
fi
set -e

BLOBFUSE_CONFIG_DIR="${MOUNT_LOCATION}/bfuseconnect"

mkdir -p "${BLOBFUSE_CONFIG_DIR}"

BLOBFUSE_CONFIG_FILE="${BLOBFUSE_CONFIG_DIR}/${MOUNT_POINT}connection.yaml"
MOUNT_POINT_PATH="${MOUNT_LOCATION}/${MOUNT_POINT}"
TMP_CACHE_PATH="${MOUNT_LOCATION}/${MOUNT_POINT}blobfusetmp"

# Get the account name from the SAS URL
log_info "Setting account name, key and SAS URL for ${BLOBFUSE_CONFIG_FILE}"
echo "azstorage:" > "${BLOBFUSE_CONFIG_FILE}"
echo "  account-name: $ACCOUNT_NAME" >> "${BLOBFUSE_CONFIG_FILE}"
chmod 600 "${BLOBFUSE_CONFIG_FILE}"

# Either KEY_STRING or IDENTITY_RESOURCE_ID will be defined
if [[  -n ${KEY_STRING} ]]; then
    if [[ ${KEY_STRING} == \?* ]]; then
        echo "  mode: SAS" >> "${BLOBFUSE_CONFIG_FILE}"
        echo "  sas: ${KEY_STRING}" >> "${BLOBFUSE_CONFIG_FILE}"
    else
        echo "  mode: Key" >> "${BLOBFUSE_CONFIG_FILE}"
        echo "  account-key: ${KEY_STRING}" >> "${BLOBFUSE_CONFIG_FILE}"
    fi
else
    echo "  mode: MSI" >> "${BLOBFUSE_CONFIG_FILE}"
    echo "  resid: ${IDENTITY_RESOURCE_ID}" >> "${BLOBFUSE_CONFIG_FILE}"
fi
echo "  container: ${CONTAINER_NAME}" >> "${BLOBFUSE_CONFIG_FILE}"
if [[  -n ${BLOB_ENDPOINT} ]]; then
    echo "  endpoint: ${BLOB_ENDPOINT}" >> "${BLOBFUSE_CONFIG_FILE}"
fi

# prepare blobfuse mount
prep_azureblob_mount

# Install blobfuse
install_packages blobfuse2

# Configuring and Running.
mkdir -p "${TMP_CACHE_PATH}"
log_info "${TMP_CACHE_PATH} temp cache path is created."

# This is folder naming with the filegroup name.
mkdir -p "${MOUNT_POINT_PATH}"
chmod 755 "${MOUNT_POINT_PATH}"
log_info "${MOUNT_POINT_PATH} mount point path is created."

# Below is the config file approach
# shellcheck disable=SC2086
# Note: we want to make sure that the arguments supplied in the mount options are not grouped,
# hence the SC2086 spellcheck disable for the line below.

blobfuse2 mount "${MOUNT_POINT_PATH}" --config-file="${BLOBFUSE_CONFIG_FILE}" --tmp-path="${TMP_CACHE_PATH}" ${MOUNT_OPTIONS} 

# remove connection config file
rm -f "${BLOBFUSE_CONFIG_FILE}"
log_info "Config file is removed successfully."

set +e
log_info "Dumping mount data"
df -h "${MOUNT_POINT_PATH}"
set -e

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions