diff --git a/.tekton/odh-base-image-cpu-py312-c9s-pull-request.yaml b/.tekton/odh-base-image-cpu-py312-c9s-pull-request.yaml index 46f97fe8ee..2c0de0bcbd 100644 --- a/.tekton/odh-base-image-cpu-py312-c9s-pull-request.yaml +++ b/.tekton/odh-base-image-cpu-py312-c9s-pull-request.yaml @@ -39,8 +39,7 @@ spec: - linux/x86_64 - linux/arm64 - linux/ppc64le - # TODO(jdanek): Enable s390x once it is supported in the base image. - #- linux/s390x + - linux/s390x pipelineRef: name: multiarch-pull-request-pipeline taskRunTemplate: diff --git a/.tekton/odh-base-image-cpu-py312-c9s-push.yaml b/.tekton/odh-base-image-cpu-py312-c9s-push.yaml index e9cb519386..936ca25533 100644 --- a/.tekton/odh-base-image-cpu-py312-c9s-push.yaml +++ b/.tekton/odh-base-image-cpu-py312-c9s-push.yaml @@ -37,8 +37,7 @@ spec: - linux/x86_64 - linux/arm64 - linux/ppc64le - # TODO(jdanek): Enable s390x once it is supported in the base image. - #- linux/s390x + - linux/s390x pipelineRef: name: multiarch-push-pipeline taskRunTemplate: diff --git a/base-images/cpu/c9s-python-3.12/Dockerfile.cpu b/base-images/cpu/c9s-python-3.12/Dockerfile.cpu index 6b97639989..3c0172e814 100644 --- a/base-images/cpu/c9s-python-3.12/Dockerfile.cpu +++ b/base-images/cpu/c9s-python-3.12/Dockerfile.cpu @@ -1,22 +1,78 @@ ARG TARGETARCH -FROM quay.io/sclorg/python-312-c9s:c9s AS buildscripts +FROM quay.io/centos/centos:stream9 AS buildscripts COPY base-images/utils/aipcc.sh /mnt/aipcc.sh +COPY base-images/utils/fix-permissions base-images/utils/rpm-file-permissions /mnt/usr/bin/ #################### # base # #################### -FROM quay.io/sclorg/python-312-c9s:c9s AS base +FROM quay.io/centos/centos:stream9 AS base + +ARG PYTHON_VERSION=3.12 +ENV PYTHON=python${PYTHON_VERSION} + +ARG VARIANT=cpu +ARG NAME=odh-base-image-cpu-py312-c9s +ARG SUMMARY="Open Data Hub Notebooks Base Image for ${VARIANT} with Python ${PYTHON_VERSION}" +# https://github.com/projectatomic/ContainerApplicationGenericLabels +ARG DESCRIPTION="${SUMMARY} with Python ${PYTHON_VERSION}" + +LABEL summary="${SUMMARY}" \ + description="${DESCRIPTION}" \ + io.k8s.display-name="${SUMMARY}" \ + io.k8s.description="${DESCRIPTION}" USER 0 +ARG TARGETARCH +ENV TARGETARCH=${TARGETARCH} + +# MPI implementation (default: OpenMPI) +ENV MPI_HOME=/usr/lib64/openmpi + +ENV APP_ROOT=/opt/app-root +ENV HOME=${APP_ROOT}/src +ENV PATH=${HOME}/bin:${HOME}/.local/bin:${APP_ROOT}/bin:/usr/local/sbin:/usr/local/bin:${MPI_HOME}/bin:/usr/sbin:/usr/bin:/sbin:/bin + +# Python and virtual env settings +ENV VIRTUAL_ENV=${APP_ROOT} \ + PIP_NO_CACHE_DIR=off \ + UV_NO_CACHE=true \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONIOENCODING=utf-8 \ + LANG=en_US.UTF-8 \ + LC_ALL=en_US.UTF-8 \ + PS1="(app-root) \w\$ " + +# OpenShift s2i / Cloud Native Buildpack user +ENV CNB_USER_ID=1001 \ + CNB_GROUP_ID=0 + +# Create home directory and XDG cache directory group-writable. $HOME +# is owned by 1001:0. All directories are writable by gid 0. +# hadolint ignore=DL3046 +RUN useradd -u ${CNB_USER_ID} -g ${CNB_GROUP_ID} -d ${HOME} -K HOME_MODE=0770 -K UMASK=0007 -m -s /bin/bash -c "Default Application User" default \ + && mkdir -m 770 "${HOME}/.cache" + +# permission fixer from github.com/sclorg +COPY --from=buildscripts /mnt/usr/bin/ /usr/bin/ RUN \ --mount=from=buildscripts,source=/mnt,target=/mnt \ --mount=type=cache,sharing=locked,id=dnf-c9s,target=/var/cache/dnf \ /bin/bash <<'EOF' +set -Eeuxo pipefail /mnt/aipcc.sh +fix-permissions ${APP_ROOT} -P EOF +RUN rpm-file-permissions + # Restore user workspace -USER 1001 -WORKDIR /opt/app-root/src +WORKDIR ${APP_ROOT} +USER ${CNB_USER_ID}:${CNB_GROUP_ID} + +# RHELAI-2417, RHELAI-1720: workaround for PyArrow +# libjemalloc.so.2: cannot allocate memory in static TLS block +ENV LD_PRELOAD=/usr/lib64/libjemalloc.so.2 diff --git a/base-images/cpu/c9s-python-3.12/README.md b/base-images/cpu/c9s-python-3.12/README.md new file mode 100644 index 0000000000..bc28862a04 --- /dev/null +++ b/base-images/cpu/c9s-python-3.12/README.md @@ -0,0 +1,37 @@ +# AIPCC-like Python 3.12 base image + +## Generating the list of additional packages + +```commandline +podman run --rm --pull=always quay.io/sclorg/python-312-c9s:c9s rpm -qa '*' | sort > /tmp/scl_packages.txt +podman run --rm --pull=always quay.io/aipcc/base-images/cpu:3.1 rpm -qa '*' | sort > /tmp/aipcc_packages.txt +``` + +```python +def get_packages_from_file(filename: str) -> set[str]: + """Reads a file and returns a set of package names.""" + with open(filename, 'r') as f: + # We strip the version info from the package names (e.g., 'bash-5.1.8-6.el9_1.x86_64') + # to get just the package name ('bash'). + return {line.strip().rsplit('-', 2)[0] for line in f if line.strip()} + +def main(): + """ + Compares package lists from two files and prints the difference, + formatted for inclusion in a bash script. + """ + scl_packages = get_packages_from_file('/tmp/scl_packages.txt') + aipcc_packages = get_packages_from_file('/tmp/aipcc_packages.txt') + + # Find packages in scl but not in aipcc + difference = sorted(list(scl_packages - aipcc_packages)) + + # Format for bash array + print("SCL_PACKAGES=(") + for pkg in difference: + print(f' "{pkg}"') + print(")") + +if __name__ == "__main__": + main() +``` diff --git a/base-images/utils/aipcc.sh b/base-images/utils/aipcc.sh index 91e703e3b6..a4468bdf50 100755 --- a/base-images/utils/aipcc.sh +++ b/base-images/utils/aipcc.sh @@ -1,7 +1,331 @@ #!/usr/bin/env bash set -Eeuxo pipefail -DNF_OPTS=(-y --nodocs --setopt=install_weak_deps=False --setopt=keepcache=True) +ARCH=${TARGETARCH} + +DNF_OPTS=(-y --nodocs --setopt=install_weak_deps=False --setopt=keepcache=True --setopt=max_parallel_downloads=10) + +function install_packages() { + PKGS=() + + # common tools + PKGS+=("git-core" "wget" "numactl" "file") + # additional tools + PKGS+=("skopeo" "jq" "nvtop") + # additional developer tools + PKGS+=("make" "ninja-build" "gdb") + # PKGS+=("vim") + + # for LANG / LC_ALL=en_US.UTF-8 + PKGS+=("glibc-langpack-en") + + # compiler for Torch Dynamo JIT and Triton + PKGS+=("gcc") + + # font and image libraries + PKGS+=("freetype" "lcms2" "libjpeg" "libpng" "libtiff" "libwebp" "openjpeg2") + + # compression libraries and tools + PKGS+=("bzip2" "cpio" "lz4" "libzstd" "gzip" "snappy" "xz" "xz-libs" "zlib" "zstd") + + # Mathematics libraries used by various packages + PKGS+=("fftw" "gmp" "mpfr" "libmpc" "openblas" "libomp") + + # additional math libraries + # TODO: check if we need all variants + PKGS+=( + "openblas-openmp" "openblas-serial" + "openblas-openmp64" "openblas-serial64" "openblas-threads" "openblas-threads64" + ) + + # XML bindings for lxml + PKGS+=("libxml2" "libxslt") + + # OpenMPI depends on openmpi-devel (Perl, GCC, glibc-devel) + PKGS+=("openmpi") + + # async io for DeepSpeed + PKGS+=("libaio") + + # PyArrow + PKGS+=( + "utf8proc" + # RHELAI + "re2" "thrift" + ) + + # PyTorch threading building blocks + PKGS+=("tbb") + + # For opencv-python-headless + # libva depends on libX11 and MESA + PKGS+=("libva") + + # For soundfile + PKGS+=("libsndfile") + + # docling + PKGS+=( + "qpdf" + # tesserocr + "tesseract" + # RHELAI: loguru + "loguru" + ) + + # RHELAI: pyzmq for vLLM + PKGS+=("zeromq") + + # RHELAI: for h5py + PKGS+=("hdf5") + + # RHELAI: faster memory allocator / PyArrow + PKGS+=("jemalloc") + + # RHELAI: for shapely + PKGS+=("geos") + + # RHELAI: for rtree + PKGS+=("spatialindex") + + # For pyodbc + PKGS+=("unixODBC") + + # For psycopg2-binary Postgres driver + PKGS+=("libpq") + + # For matplotlib + PKGS+=("libqhull_r") + + PKGS+=( + "${PYTHON:?}" + "${PYTHON}-devel" + ) + + dnf install "${DNF_OPTS[@]}" "${PKGS[@]}" +} + +# This is a hack, AIPCC bases lack many packages that the python-3.12 scl image provides +# so we install them temporarily, to avoid breaking the build. +# The list is obtained as explained in c9s-python-3.12/README.md +function install_scl_packages() { + SCL_PACKAGES=( + "annobin" + "apr" + "apr-devel" + "apr-util" + "apr-util-bdb" + "apr-util-devel" + "apr-util-ldap" + "apr-util-openssl" + "atlas" + "atlas-devel" + "autoconf" + "automake" + "brotli" + "brotli-devel" + "bsdtar" + "bzip2-devel" + "centos-gpg-keys" + "centos-logos-httpd" + "centos-stream-release" + "centos-stream-repos" + "cmake-filesystem" + "cyrus-sasl" + "cyrus-sasl-devel" + "dwz" + "ed" + "efi-srpm-macros" + "enchant" + "expat-devel" + "fontconfig-devel" + "fonts-srpm-macros" + "freetype-devel" + "gcc-c++" + "gcc-gfortran" + "gcc-plugin-annobin" + "gd" + "gd-devel" + "gettext" + "gettext-libs" + "ghc-srpm-macros" + "git" + "git-core-doc" + "glib2-devel" + "glibc-gconv-extra" + "glibc-locale-source" + "go-srpm-macros" + "graphite2-devel" + "harfbuzz-devel" + "harfbuzz-icu" + "hostname" + "httpd" + "httpd-core" + "httpd-devel" + "httpd-filesystem" + "httpd-tools" + "hunspell" + "hunspell-en" + "hunspell-en-GB" + "hunspell-en-US" + "hunspell-filesystem" + "info" + "kernel-srpm-macros" + "keyutils-libs-devel" + "krb5-devel" + "libICE" + "libSM" + "libX11-devel" + "libXau-devel" + "libXpm" + "libXpm-devel" + "libXt" + "libblkid-devel" + "libcom_err-devel" + "libcurl-devel" + "libdb-devel" + "libffi-devel" + "libgpg-error-devel" + "libicu-devel" + "libjpeg-turbo-devel" + "libkadm5" + "libmount-devel" + "libpath_utils" + "libpng-devel" + "libpq-devel" + "libselinux-devel" + "libsepol-devel" + "libstdc++-devel" + "libtalloc" + "libtiff-devel" + "libverto-devel" + "libwebp-devel" + "libxcb-devel" + "libxml2-devel" + "libxslt-devel" + "llvm-filesystem" + "lsof" + "lua-srpm-macros" + "m4" + "mailcap" + "mariadb-connector-c" + "mariadb-connector-c-config" + "mariadb-connector-c-devel" + "mod_auth_gssapi" + "mod_http2" + "mod_ldap" + "mod_lua" + "mod_session" + "mod_ssl" + "ncurses" + "nodejs" + "nodejs-docs" + "nodejs-full-i18n" + "nodejs-libs" + "npm" + "nss_wrapper-libs" + "ocaml-srpm-macros" + "openblas-srpm-macros" + "openldap-devel" + "openssl-devel" + "patch" + "pcre-cpp" + "pcre-devel" + "pcre-utf16" + "pcre-utf32" + "pcre2-devel" + "pcre2-utf16" + "pcre2-utf32" + "perl-AutoLoader" + "perl-B" + "perl-Carp" + "perl-Class-Struct" + "perl-Data-Dumper" + "perl-Digest" + "perl-Digest-MD5" + "perl-DynaLoader" + "perl-Encode" + "perl-Errno" + "perl-Error" + "perl-Exporter" + "perl-Fcntl" + "perl-File-Basename" + "perl-File-Compare" + "perl-File-Copy" + "perl-File-Find" + "perl-File-Path" + "perl-File-Temp" + "perl-File-stat" + "perl-FileHandle" + "perl-Getopt-Long" + "perl-Getopt-Std" + "perl-Git" + "perl-HTTP-Tiny" + "perl-IO" + "perl-IO-Socket-IP" + "perl-IO-Socket-SSL" + "perl-IPC-Open3" + "perl-MIME-Base64" + "perl-Mozilla-CA" + "perl-NDBM_File" + "perl-Net-SSLeay" + "perl-POSIX" + "perl-PathTools" + "perl-Pod-Escapes" + "perl-Pod-Perldoc" + "perl-Pod-Simple" + "perl-Pod-Usage" + "perl-Scalar-List-Utils" + "perl-SelectSaver" + "perl-Socket" + "perl-Storable" + "perl-Symbol" + "perl-Term-ANSIColor" + "perl-Term-Cap" + "perl-TermReadKey" + "perl-Text-ParseWords" + "perl-Text-Tabs+Wrap" + "perl-Thread-Queue" + "perl-Time-Local" + "perl-URI" + "perl-base" + "perl-constant" + "perl-if" + "perl-interpreter" + "perl-lib" + "perl-libnet" + "perl-libs" + "perl-mro" + "perl-overload" + "perl-overloading" + "perl-parent" + "perl-podlators" + "perl-srpm-macros" + "perl-subs" + "perl-threads" + "perl-threads-shared" + "perl-vars" + "pyproject-srpm-macros" + "python-srpm-macros" + "python3.12-pip" + "python3.12-setuptools" + "qt5-srpm-macros" + "redhat-rpm-config" + "rsync" + "rust-srpm-macros" + "scl-utils" + "sqlite" + "sqlite-devel" + "sscg" + "sysprof-capture-devel" + "unzip" + "xorg-x11-proto-devel" + "xz-devel" + "zip" + "zlib-devel" + ) + dnf install "${DNF_OPTS[@]}" "${SCL_PACKAGES[@]}" +} function install_epel() { dnf install "${DNF_OPTS[@]}" https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm @@ -11,15 +335,46 @@ function uninstall_epel() { dnf remove "${DNF_OPTS[@]}" epel-release } +# AIPCC bases enable codeready-builder, so we need to do the CentOS equivalent +# In RHEL this is codeready-builder-for-rhel-${RELEASEVER_MAJOR}-${ARCH}-eus-rpms +# or codeready-builder-for-rhel-${RELEASEVER_MAJOR}-${ARCH}-rpms +function install_csb() { + dnf install "${DNF_OPTS[@]}" dnf-plugins-core + dnf config-manager --set-enabled crb +} + +# create Python virtual env and update pip inside the venv +function install_python_venv() { + # install venv with bundled pip (no --upgrade-deps) + "${PYTHON}" -m venv "${VIRTUAL_ENV}" + + "${PYTHON}" -m pip install --force-reinstall --upgrade \ + --index-url https://pypi.org/simple/ \ + pip setuptools wheel +} + function main() { + install_csb + install_epel - trap uninstall_epel EXIT - dnf install "${DNF_OPTS[@]}" zeromq + # install security updates + dnf update "${DNF_OPTS[@]}" --security + + install_packages if ! test -f /usr/lib64/libzmq.so.5; then echo "Error: libzmq.so.5 was not found after installation" exit 1 fi + + install_python_venv + + # TODO(jdanek): we want to eventually remove this + install_scl_packages + # Makefile: REQUIRED_RUNTIME_IMAGE_COMMANDS="curl python3" + dnf install "${DNF_OPTS[@]}" which + + uninstall_epel } if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then diff --git a/base-images/utils/fix-permissions b/base-images/utils/fix-permissions new file mode 100755 index 0000000000..58956c426d --- /dev/null +++ b/base-images/utils/fix-permissions @@ -0,0 +1,32 @@ +#!/bin/sh + +# based on https://github.com/sclorg/container-common-scripts/blob/4f4f13847fd372c304503bc0bf8e11fae6ce9e06/shared-scripts/core/usr/bin/fix-permissions + +# Allow this script to fail without failing a build +set +e + +SYMLINK_OPT=${2:--L} + +# Fix permissions on the given directory or file to allow group read/write of +# regular files and execute of directories. + +[ $(id -u) -ne 0 ] && CHECK_OWNER=" -uid $(id -u)" + +# If argument does not exist, script will still exit with 0, +# but at least we'll see something went wrong in the log +if ! [ -e "$1" ] ; then + echo "ERROR: File or directory $1 does not exist." >&2 + # We still want to end successfully + exit 0 +fi + +# added line +find $SYMLINK_OPT "$1" ${CHECK_OWNER} \! -uid ${CNB_USER_ID:?} -exec chown ${CNB_USER_ID:?} {} + +# modified line +find $SYMLINK_OPT "$1" ${CHECK_OWNER} \! -gid ${CNB_GROUP_ID:?} -exec chgrp ${CNB_GROUP_ID:?} {} + +find $SYMLINK_OPT "$1" ${CHECK_OWNER} \! -perm -g+rw -exec chmod g+rw {} + +find $SYMLINK_OPT "$1" ${CHECK_OWNER} -perm /u+x -a \! -perm /g+x -exec chmod g+x {} + +find $SYMLINK_OPT "$1" ${CHECK_OWNER} -type d \! -perm /g+x -exec chmod g+x {} + + +# Always end successfully +exit 0 diff --git a/base-images/utils/rpm-file-permissions b/base-images/utils/rpm-file-permissions new file mode 100755 index 0000000000..522bb8dbb2 --- /dev/null +++ b/base-images/utils/rpm-file-permissions @@ -0,0 +1,23 @@ +#!/bin/sh + +# copied from https://github.com/sclorg/container-common-scripts/blob/4f4f13847fd372c304503bc0bf8e11fae6ce9e06/shared-scripts/core/usr/bin/rpm-file-permissions + +CHECK_DIRS="/ /opt /etc /usr /usr/bin /usr/lib /usr/lib64 /usr/share /usr/libexec" + +rpm_format="[%{FILESTATES:fstate} %7{FILEMODES:octal} %{FILENAMES:shescape}\n]" + +rpm -q --qf "$rpm_format" filesystem | while read line +do + eval "set -- $line" + + case $1 in + normal) ;; + *) continue ;; + esac + + case " $CHECK_DIRS " in + *" $3 "*) + chmod "${2: -4}" "$3" + ;; + esac +done