Globus CLI tutorial

This one-hour tutorial shows participants how to use the Globus command-line interface (CLI) both interactively and within scripts as part of larger workflows. The Globus CLI enables programmatic access to NCAR and external data endpoints, including the new NCAR Campaign Storage system.

Topics include:

  • Initiating, monitoring, and modifying transfers between endpoints
  • Incorporating CLI commands into data mover scripts
  • Best practices for storing data at NCAR
  • Strategies for using the Campaign Storage platform

Review text from the tutorial.

Go to example scripts below.

Example scripts

preserve.sh

#!/bin/bash

# Declare paths to use in script (EDIT THESE BEFORE RUNNING!)
CASE=BF2013-ens
CASENAME="Sept 2013 WRF Ensemble"
LOGDIR=/glade/work/${USER}/WRF/${CASE}/logs
SRCDIR=/glade/scratch/${USER}/WRF/$CASE
DESTDIR=/gpfs/csfs1/cisl/csg/vanderwb/$CASE
TDATE=$(date +%y%m%d-%H%M%S)

# Mail message to send for endpoint failure
function errormail {
mail -s "ENDPOINT INACTIVE - transfer to CS failed"         \
     -r "${USER}"  $1
EOM
}

function warnmail {
mail -s "NCAR CISL Globus credential expires soon"          \
     -r "${USER}"  $1
EOM
}

# Load Python to get the CLI
module load python
ncar_pylib 20181024

cd $LOGDIR

# Retrieve endpoint IDs and store them as variables
EPGLADE=$(globus endpoint search 'NCAR GLADE'               \
            --filter-owner-id ncar@globusid.org             \
            --jq 'DATA[0].id' --format UNIX)
EPSTORE=$(globus endpoint search 'NCAR Campaign Storage'    \
            --filter-owner-id ncar@globusid.org             \
            --jq 'DATA[0].id' --format UNIX)

# Check if endpoint is activated
# (we dont't care about output, only return code)
globus endpoint is-activated $EPGLADE >& /dev/null

if [[ $? != 0 ]]; then
    echo "Fatal: NCAR endpoints aren't activated." > log.$TDATE
    echo "Aborting transfer..." >> log.$TDATE
    epmail $EPGLADE
    exit 1
else
    EXPIRE=$(globus endpoint is-activated                   \
                --jq "expire_time" -F unix $EPGLADE)
    echo "NCAR endpoints active until $EXPIRE" > log.$TDATE

    # If credential has less than five days until expiry,
    # send a warning email
    TIMELEFT=$(globus endpoint is-activated                 \
                --jq "expires_in" -F unix $EPGLADE)

    if [[ $TIMELEFT -le 432000 ]]; then
        warnmail $EPGLADE
    fi
fi

# Start copy of GLADE data holdings to CS
# Use modification time to determine which files to copy
TID=$(globus transfer --recursive --sync-level mtime        \
        --label "$CASENAME - $TDATE backup"                 \
        ${EPGLADE}:$SRCDIR ${EPSTORE}:$DESTDIR              \
        --jq task_id --format UNIX)

# Wait for task to complete so that we can log what happened
# (make sure we don't wait forever)
globus task wait $TID --timeout 21600

# Output information about transfer
globus task show $TID >> log.$TDATE
globus task show -t $TID > files.$TDATE
globus task event-list $TID > events.$TDATE
globus ls ${EPSTORE}:${DESTDIR} > ls.$TDATE

workflow.csh

#!/bin/tcsh

# In this example, we run a hypothetical CFD model to produce daily
# forecasts. Analysis data is stored on Campaign Storage after it is
# produced from the raw output.

# Use input forecast start time, or use yesterday
if ( $#argv == 1 ) then
    set TIMESTR="$1"
else
    set TIMESTR="yesterday"
endif

set FY=`date -d $TIMESTR '+%Y'`
set FM=`date -d $TIMESTR '+%m'`
set FD=`date -d $TIMESTR '+%d'`

# Declare paths to use in script (EDIT THESE BEFORE RUNNING!)
set FCST=${FY}${FM}${FD}
set RDADIR=/glade/collections/rda/data/ds083.3
set SRCDIR=/glade/work/${USER}/FCSTMOD
set RUNDIR=/glade/scratch/${USER}/FCSTMOD/$FCST
set CSDIR=/gpfs/csfs1/cisl/csg/vanderwb/fcst_archive

# Load Python to get the CLI
module load python
ncar_pylib 20181024

# Create and populate run directory
mkdir -p $RUNDIR
cd $RUNDIR
ln -s ${SRCDIR}/*.exe .

# Link the static grid data
ln -s ${SRCDIR}/data/grid.dat .

# Gather initial data from RDA
ln -s ${RDADIR}/${FY}/${FY}${FM}/*${FY}${FM}${FD}00.f00* init.dat

# Run our model
./model.exe

# Run post-processing to generate analysis
./analysis.exe

# Retrieve endpoint IDs and store them as variables
set EPGLADE=`globus endpoint search 'NCAR GLADE'            \
                --filter-owner-id ncar@globusid.org         \
                --jq 'DATA[0].id' --format UNIX`
set EPSTORE=`globus endpoint search 'NCAR Campaign Storage' \
                --filter-owner-id ncar@globusid.org         \
                --jq 'DATA[0].id' --format UNIX`

# Check if endpoint is activated
# (we dont't care about output, only return code)
globus endpoint is-activated $EPGLADE >& /dev/null

if ( $status > 0 ) then
    echo "Fatal: NCAR endpoints aren't activated." > globus.log
    echo "Aborting transfer..." >> globus.log
    echo "Failed: $FCST to Campaign Storage!" > ~/GLOBUS-ERROR.$FCST
    exit 1
else
    set EXPIRE=`globus endpoint is-activated                \
                    --jq expire_time -F unix $EPGLADE`
    echo "NCAR endpoints active until $EXPIRE" > globus.log
endif

# Check if destination directory exists; if not, create it
globus ls ${EPSTORE}:$CSDIR >& /dev/null

if ( $status != 0 ) then
    globus mkdir ${EPSTORE}:$CSDIR >>& globus.log
endif

set DESTDIR=${CSDIR}/$FCST
globus mkdir ${EPSTORE}:${DESTDIR} >>& globus.log

# Start copy of GLADE data holdings to CS
set BATCHFMT="${RUNDIR}/\1 ${DESTDIR}/\1"
ls -1 fcst*.nc | sed "s|\(.*\)|${BATCHFMT}|" > globus-batch.txt

globus transfer $EPGLADE $EPSTORE                           \
    --label "$FCST - copy forecast to CS"                   \
    --batch >& globus.log