Using MareNostrum

Login

There are 8 login nodes and 3 gateway that redirect to any of the login nodes in a load-balanced way:

Hostname

Node type

glogin1.bsc.es

GPP login node

glogin2.bsc.es

GPP login node

alogin1.bsc.es

ACC login node

alogin2.bsc.es

ACC login node

transfer1.bsc.es

Storage 5 login node and password management node

transfer2.bsc.es

Storage 5 login node

transfer3.bsc.es

Storage 5 login node

transfer4.bsc.es

Storage 5 login node

Host key fingerprint for compute (GPP and ACC) login nodes:

Algorithm

Fingerprint (SHA256)

RSA

SHA256:OHo850hfR0cZ3beKasVXdS3fIrtS015DOeHGFFviYqw

ECDSA

SHA256:b+qUhVkpCKVZuU07vCg8H7pELs5RTfxbziCah+bixGo

ED25519

SHA256:c3d+hIywoqNHYQlYsmXFisK3wPXiPoGNS/V5NYFsvlQ

Host key fingerprint for storage login nodes:

Algorithm

Fingerprint (SHA256)

RSA

SHA256:DhOYLjO0ysbnflg7ooS+hoipmainGf5BM6AcHT0tVxI

ECDSA

SHA256:QWHnMLQqk4+lg3qnEx6gA/6AF2s5AXI4WgUNFaQGTrY

ED25519

SHA256:ZlZZ5J+iKWyGdSXttz6cksPV6M5OekLMDNCoO4qY9vA

More details can be found in the wiki page MareNostrum / Logging in.

Building dependencies

Boost

Send source code:

curl -Lo boost_1_88_0.tar.bz2 "https://archives.boost.io/release/1.88.0/source/boost_1_88_0.tar.bz2"
scp boost_1_88_0.tar.bz2 username@transfer1.bsc.es:.

Build on the ACC partition:

# last update: February 2026
mkdir -p ~/bin
module load bsc/1.0 \
            nvidia-hpc-sdk/25.7 \
            gcc/13.2.0-nvidia-hpc-sdk \
            cmake/3.30.5
mkdir boost-build
cd boost-build
tar xfj ~/boost_1_88_0.tar.bz2
cd boost_1_88_0
echo 'using mpi ;' > tools/build/src/user-config.jam
./bootstrap.sh --with-libraries=filesystem,system,mpi,serialization,test
time ./b2 -j 4 install --prefix="${HOME}/bin/boost-1.88.0-nvhpcx25.7"
cd ../..
rm -r boost-build

Python

Send source code:

curl -Lo cpython-3.14.2.tar.gz https://github.com/python/cpython/archive/refs/tags/v3.14.2.tar.gz
pip download cython numpy scipy h5py vtk pint pandas setuptools packaging \
    --progress-bar=on --python-version 3.14.2 --only-binary=":all:" -d "./wheels/"
tar cfz wheels.tar.gz "./wheels/"
scp cpython-3.14.2.tar.gz wheels.tar.gz username@transfer1.bsc.es:.

Build on the ACC partition:

# last update: February 2026
mkdir -p ~/bin
module load bsc/1.0 \
            nvidia-hpc-sdk/25.7 \
            gcc/13.2.0-nvidia-hpc-sdk \
            cmake/3.30.5
mkdir python-build
cd python-build
tar xfz ~/cpython-3.14.2.tar.gz
cd cpython-3.14.2
./configure --prefix="${HOME}/bin/cpython-3.14.2" --enable-optimizations
make -j4
make -j4 install
cd ../..
rm -r python-build
~/bin/cpython-3.14.2/bin/python3 -m venv venv-acc
source venv-acc/bin/activate
mkdir pip-build
cd pip-build
tar xfz ~/wheels.tar.gz
python -m pip install ./wheels/*
cd ..
rm -r pip-build

EESSI

See MareNostrum 5 / Package Managers / EESSI.

# last update: March 2026
module load eessi_mn5/1
module load EESSI/2025.06

Building software

ESPResSo

Send source code or release 5.0 and dependencies:

curl -Lo espresso.tar.gz "https://github.com/espressomd/espresso/archive/c3e75358d5aa9cbf8a5ca658457d649a8b0a23ca.tar.gz"
curl -Lo kokkos.tar.gz "https://github.com/kokkos/kokkos/archive/refs/tags/5.0.2.tar.gz"
curl -Lo heffte.tar.gz "https://github.com/icl-utk-edu/heffte/archive/refs/tags/v2.4.1.tar.gz"
curl -Lo walberla.tar.gz "https://i10git.cs.fau.de/api/v4/projects/walberla%2Fwalberla/repository/archive?sha=3247aa73"
curl -Lo Cabana.tar.gz "https://github.com/ECP-copa/Cabana/archive/e76c1a1.tar.gz"
curl -Lo highfive.tar.gz "https://github.com/highfive-devs/highfive/archive/v3.3.0.tar.gz"
tar cf tarball.tar *.tar.gz
scp tarball.tar user@glogin1.bsc.es:tarball.tar
scp tarball.tar username@transfer1.bsc.es:.

Unpack dependencies:

tar xf tarball.tar
for f in *.tar.gz; do tar xfz $f; done
sed -ri "s|GIT_REPOSITORY +https://github.com/icl-utk-edu/heffte.git|URL $(realpath heffte-*/)|" espresso-*/CMakeLists.txt
sed -ri "s|GIT_REPOSITORY +https://github.com/kokkos/kokkos.git|URL $(realpath kokkos-*/)|" espresso-*/CMakeLists.txt
sed -ri "s|GIT_REPOSITORY +https://github.com/ECP-copa/Cabana.git|URL $(realpath Cabana-*/)|" espresso-*/CMakeLists.txt
sed -ri "s|GIT_REPOSITORY +https://github.com/highfive-devs/highfive.git|URL $(realpath highfive-*/)|" espresso-*/CMakeLists.txt
sed -ri "s|GIT_REPOSITORY +https://i10git.cs.fau.de/walberla/walberla.git|URL $(realpath walberla-*/)|" espresso-*/CMakeLists.txt
sed -ri "/GIT_TAG/d" espresso-*/CMakeLists.txt

Build on the GPP partition:

# last update: December 2025
module load bsc/1.0 \
            gcc/12.3.0 \
            impi/2021.10.0 \
            mkl/2023.2.0 \
            fftw/3.3.10 \
            ucx/1.15.0 \
            boost/1.88.0-intel-impi \
            hdf5/1.10.11 \
            gsl/2.7.1 \
            cmake/3.30.5
module load EB/apps \
            Python/3.13.5-GCCcore-14.3.0 \
            Cython/3.1.2-GCCcore-14.3.0 \
            SciPy-bundle/2025.07-gfbf-2025b
cd espresso-*
mkdir build
cd build
cp ../maintainer/configs/maxset.hpp myconfig.hpp
sed -i "/ADDITIONAL_CHECKS/d" myconfig.hpp
CC=mpiicx CXX=mpiicpx FFTW3_ROOT=$FFTW_ROOT cmake .. \
    -D CMAKE_BUILD_TYPE=Release \
    -D ESPRESSO_BUILD_WITH_CCACHE=OFF \
    -D ESPRESSO_BUILD_WITH_CUDA=OFF \
    -D ESPRESSO_BUILD_WITH_FFTW=ON \
    -D ESPRESSO_BUILD_WITH_HDF5=ON \
    -D ESPRESSO_BUILD_WITH_WALBERLA=ON \
    -D ESPRESSO_BUILD_WITH_WALBERLA_AVX=ON \
    -D ESPRESSO_BUILD_WITH_SHARED_MEMORY_PARALLELISM=ON \
    -D ESPRESSO_BUILD_WITH_GSL=ON
make -j 4
module purge

Build on the ACC partition:

# last update: February 2026
module load bsc/1.0 \
            nvidia-hpc-sdk/25.7 \
            fftw/3.3.10-gcc-nvhpcx \
            gcc/13.2.0-nvidia-hpc-sdk \
            hdf5/1.14.4.2-nvidia-nvhpcx \
            cmake/3.30.5
source ../venv-acc/bin/activate
mkdir build-cuda
cd build-cuda
cp ../maintainer/configs/maxset.hpp myconfig.hpp
sed -i "/ADDITIONAL_CHECKS/d" myconfig.hpp
CC=mpicc CXX=nvc++ CUDACXX=nvcc CUDAARCHS=90 FFTW3_ROOT="${FFTW_HOME}" cmake .. \
    -D CMAKE_BUILD_TYPE=Release \
    -D ESPRESSO_BUILD_WITH_CCACHE=OFF \
    -D BOOST_ROOT="${HOME}/bin/boost-1.88.0-nvhpcx25.7" \
    -D CMAKE_CXX_FLAGS="--gcc-toolchain=gcc -L${EBROOTGCCCORE}/lib64" \
    -D CMAKE_CUDA_FLAGS="-I${NVHPC_ROOT}/comm_libs/12.9/hpcx/hpcx-2.22.1/ompi/include/" \
    -D ESPRESSO_BUILD_WITH_CUDA=ON \
    -D ESPRESSO_BUILD_WITH_FFTW=ON \
    -D ESPRESSO_BUILD_WITH_HDF5=ON \
    -D ESPRESSO_BUILD_WITH_WALBERLA=ON \
    -D ESPRESSO_BUILD_WITH_WALBERLA_AVX=ON \
    -D ESPRESSO_BUILD_WITH_SHARED_MEMORY_PARALLELISM=ON \
    -D ESPRESSO_BUILD_WITH_GSL=OFF

The application must be built using CUDA architecture SM90, otherwise runtime exceptions such as “GPU Error: 222 cudaErrorUnsupportedPtxVersion: the provided PTX was compiled with an unsupported toolchain” or “thrust::THRUST_200302_750_860_890_NS::system::system_error: This program was not compiled for SM 90” will be thrown. Version 25.7 of the NVHPC is required here, because the default 24.7 is affected by bug id 4717351. The warning about “Boost toolset is unknown (compiler NVHPC 25.7.0)” is harmless, Boost doesn’t implement checks for NVHPC (boost-1.88.0/BoostDetectToolset.cmake).

The following environment variable is needed to avoid error message error code -125 hwloc_set_cpubind returned "Error" for bitmap "20,100":

# disable OpenMPI CPU pinning
# https://github.com/lexming/easybuild-easyblocks/commit/0bdc18b5754c227a405a7322ec355718a6406758
export OMPI_MCA_hwloc_base_binding_policy=none

To get the full git history, initialize an empty git repository on the cluster, then run these commands from your client workstation:

git remote add marenostrum username@glogin1.bsc.es:/gpfs/home/usergroup/username/espresso
git push marenostrum branch_name

Submitting jobs

Jobs need to be allocated for a specific project and for a specific queue (MareNostrum / Standard queues) in that project.

To list projects for the current user:

bsc_project list

To list queues in each projects:

bsc_queues

To list queues in a specific project ehpc123:

bsc_queues ehpc123

Batch command for a benchmark job:

for n in 32 64 128 256 512 1024 2048 4096 ; do
  sbatch --account=ehpc123 --qos=gp_ehpc \
         --ntasks=${n} --hint=nomultithread --exclusive \
         --ntasks-per-node=$((n<128 ? n : 128)) job.sh
done

Job script:

#!/bin/bash
#SBATCH --job-name=multixscale
#SBATCH --time=00:10:00
#SBATCH --output %j.stdout
#SBATCH --error  %j.stderr

# last update: December 2025
module load bsc/1.0 \
            gcc/12.3.0 \
            impi/2021.10.0 \
            mkl/2023.2.0 \
            fftw/3.3.10 \
            ucx/1.15.0 \
            boost/1.88.0-intel-impi \
            hdf5/1.10.11 \
            gsl/2.7.1 \
            cmake/3.30.5
module load EB/apps \
            Python/3.13.5-GCCcore-14.3.0 \
            Cython/3.1.2-GCCcore-14.3.0 \
            SciPy-bundle/2025.07-gfbf-2025b
srun --cpu-bind=threads,verbose --distribution=cyclic:block ./pypresso ../maintainer/benchmarks/lb.py

Run an interactive job:

srun --account=ehpc123 --qos=gp_interactive --ntasks=4 --job-name=test \
     --time=00:10:00 --pty /usr/bin/bash

Run a job with extra debug information via I_MPI_DEBUG:

sbatch --account=ehpc123 --qos=gp_debug --ntasks=4 --job-name=test \
       --export="ALL,I_MPI_DEBUG=4" --time=00:10:00 job.sh

Output:

cpu-bind=MASK - gs05r2b01, task  0  0 [1387936]: mask 0x40000000000000000000000000004000000000 set
cpu-bind=MASK - gs05r2b01, task  1  1 [1387937]: mask 0x80000000000000000000000000008000000000 set
cpu-bind=MASK - gs05r2b01, task  2  2 [1387938]: mask 0x10000000000000000000000000001000000000 set
cpu-bind=MASK - gs05r2b01, task  3  3 [1387939]: mask 0x40000000000000000000000000004000000000 set
[0] MPI startup(): Intel(R) MPI Library, Version 2021.10  Build 20230619 (id: c2e19c2f3e)
[0] MPI startup(): Copyright (C) 2003-2023 Intel Corporation.  All rights reserved.
[0] MPI startup(): library kind: release
[0] MPI startup(): libfabric version: 1.18.0-impi
[0] MPI startup(): libfabric provider: mlx
[0] MPI startup(): Load tuning file: "/gpfs/apps/MN5/GPP/ONEAPI/2023.2.0/mpi/2021.10.0/etc/tuning_spr_shm-ofi.dat"
[0] MPI startup(): Rank    Pid      Node name  Pin cpu
[0] MPI startup(): 0       1387936  gs05r2b01  6,118
[0] MPI startup(): 1       1387937  gs05r2b01  59,171
[0] MPI startup(): 2       1387938  gs05r2b01  8,120
[0] MPI startup(): 3       1387939  gs05r2b01  78,190
...

To run an MPI+OpenMP process with Intel MPI, use the following syntax:

I_MPI_PIN_ORDER=compact I_MPI_PIN_DOMAIN=omp I_MPI_DEBUG=4 OMP_PROC_BIND=close OMP_PLACES=cores OMP_NUM_THREADS=7 mpiexec -n 6 -print-rank-map ./a.out

Note: while the default python/3.12.1 module comes with NumPy, SciPy and Cython, the following runtime error may be generated when importing certain modules in SciPy:

Traceback (most recent call last):
  File "/home/user/espresso/build/testsuite/python/thermalized_bond.py", line 22, in <module>
    import thermostats_common
  File "/gpfs/home/user/espresso/build/testsuite/python/thermostats_common.py", line 20, in <module>
    import scipy.integrate
  File "/apps/GPP/PYTHON/3.12.1/INTEL/lib/python3.12/site-packages/scipy/integrate/__init__.py", line 94, in <module>
    from ._quadrature import *
  File "/apps/GPP/PYTHON/3.12.1/INTEL/lib/python3.12/site-packages/scipy/integrate/_quadrature.py", line 9, in <module>
    from scipy.special import roots_legendre
  File "/apps/GPP/PYTHON/3.12.1/INTEL/lib/python3.12/site-packages/scipy/special/__init__.py", line 777, in <module>
    from . import _ufuncs
ImportError: libhdf5.so.310: cannot open shared object file: No such file or directory

On the accelerator partition, srun cannot be used, instead use mpirun and disable slurm binding with SLURM_CPU_BIND=none:

#!/bin/bash
#SBATCH --output %j.stdout
#SBATCH --error  %j.stderr
set -x
# https://github.com/lexming/easybuild-easyblocks/commit/0bdc18b5754c227a405a7322ec355718a6406758
export OMPI_MCA_hwloc_base_binding_policy=none
export OMPI_MCA_hwloc_base_report_bindings=true
# disable slurm binding
# https://www.bsc.es/supportkc/docs/MareNostrum5/slurm/#special-considerations
export SLURM_CPU_BIND=none

module load bsc/1.0 \
            nvidia-hpc-sdk/25.7 \
            fftw/3.3.10-gcc-nvhpcx \
            gcc/13.2.0-nvidia-hpc-sdk \
            hdf5/1.14.4.2-nvidia-nvhpcx \
            cmake/3.30.5
source ${HOME}/2026-02-16-benchmarks/venv-acc/bin/activate
export PYTHONPATH=$(realpath ${HOME}/2026-02-16-benchmarks/espresso/build-cuda2/src/python)

echo "SLURM_NTASKS=${SLURM_NTASKS}"
echo "SLURM_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK}"
echo "SLURM_JOB_CPUS_PER_NODE=${SLURM_JOB_CPUS_PER_NODE}"
taskset -p $$

OMP_PROC_BIND=close OMP_PLACES=cores OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} mpirun \
    -n ${SLURM_NTASKS} --bind-to numa --map-by numa --report-bindings \
    python ~/2026-02-16-benchmarks/lb/job.py --output="${SLURM_JOB_ID}.results.csv" \
    --gpu --single_precision --particles_per_core=0 \
    --unit_cell 256 256 224 --shape=rectangle

Submit the job with:

sbatch --account=ehpc123 --qos=acc_ehpc --nodes=2 \
    --gres=gpu:4 --ntasks-per-node=4 --cpus-per-task=20 \
    --job-name=lb --time=24:00:00 --exclusive job.sh

Important: don’t use --ntasks-per-gpu=1, because it’s not compatible with the --cpus-per-task and --gres options! Mixing them together generates the following error when invoking mpirun:

An ORTE daemon has unexpectedly failed after launch and before
communicating back to mpirun. This could be caused by a number
of factors, including an inability to create a connection back
to mpirun due to a lack of common network interfaces and/or no
route found between them. Please check network connectivity
(including firewalls and network routing requirements).