#!/bin/bash -e

wd=$(pwd)
jobid=$(squeue --me | head -2 | tail -n1 | awk '{print $1}')


#
# Example assume allocation was created, e.g.:
# N=2 ; salloc -p standard-g  --threads-per-core 1 --exclusive -N $N --gpus $((N*8)) -t 4:00:00 --mem 0
#

set -x
N=1

#
# Using 7 cores per L3 
#
srun \
    --jobid=$jobid \
    -c 7 \
    -N $((N)) \
    -n $((N*8)) \
    --gpus $((N*8)) \
    bash -c 'echo "Rank $SLURM_PROCID -- GPUS $ROCR_VISIBLE_DEVICES -- $(taskset -p $$)"' | sort -n -k1

set +x
echo " ^^^^^^^^^ "
echo "  WRONG!!! "
echo ""
echo ""
echo ""


#
# Using 7 cores per L3 but ordered by the correct NUMA domain.
#

c=fe
MYMASKS="0x${c}000000000000,0x${c}00000000000000,0x${c}0000,0x${c}000000,0x${c},0x${c}00,0x${c}00000000,0x${c}0000000000"

set -x 
srun \
    --jobid=$jobid \
    --cpu-bind=mask_cpu:$MYMASKS \
    -N $((N)) \
    -n $((N*8)) \
    --gpus $((N*8)) \
    bash -c 'echo "Rank $SLURM_PROCID -- GPUS $ROCR_VISIBLE_DEVICES -- $(taskset -p $$)"' | sort -n -k1

set +x
echo " ^^^^^^^^^ "
echo "  CORRECT! "
