{"host": "an-test-client1002.eqiad.wmnet", "state": "core_diff", "description": "Differences to core resources", "diff": {"full": {"total": 4896, "only_in_self": [], "only_in_other": ["Class[Profile::Hadoop::Spark35]", "Ferm::Service[spark35_driver]", "Ferm::Service[spark35_driver_blockmanager]", "Ferm::Service[spark35_ui_port]", "File[/etc/ferm/conf.d/10_spark35_driver]", "File[/etc/ferm/conf.d/10_spark35_driver_blockmanager]", "File[/etc/ferm/conf.d/10_spark35_ui_port]", "File[/etc/spark35/conf/hive-site.xml]", "File[/etc/spark35/conf/log4j2.properties]", "File[/etc/spark35/conf/spark-defaults.conf]", "File[/etc/spark35/conf/spark-env.sh]", "File[/etc/spark35/conf]", "File[/etc/spark35]", "File[/usr/bin/spark35-class]", "File[/usr/bin/spark35-pyspark]", "File[/usr/bin/spark35-shell]", "File[/usr/bin/spark35-sql]", "File[/usr/bin/spark35-submit]", "File[/usr/lib/spark35]", "Firewall::Service[spark35-driver-blockmanager]", "Firewall::Service[spark35-driver]", "Firewall::Service[spark35-ui-port]"], "resource_diffs": [{"resource": "File[/usr/bin/spark35-pyspark]", "content": "--- /usr/bin/spark35-pyspark.orig\n+++ /usr/bin/spark35-pyspark\n@@ -0,0 +1,4 @@\n+#!/usr/bin/env bash\n+# Managed by Puppet\n+export SPARK_CONF_DIR=/etc/spark35/conf\n+exec /usr/lib/spark35/bin/pyspark \"$@\"", "parameters": "--- File[/usr/bin/spark35-pyspark].orig\n+++ File[/usr/bin/spark35-pyspark]\n\n+    owner  => root\n+    group  => root\n+    ensure => file\n+    mode   => 0755\n"}, {"resource": "File[/usr/bin/spark35-submit]", "content": "--- /usr/bin/spark35-submit.orig\n+++ /usr/bin/spark35-submit\n@@ -0,0 +1,4 @@\n+#!/usr/bin/env bash\n+# Managed by Puppet\n+export SPARK_CONF_DIR=/etc/spark35/conf\n+exec /usr/lib/spark35/bin/spark-submit \"$@\"", "parameters": "--- File[/usr/bin/spark35-submit].orig\n+++ File[/usr/bin/spark35-submit]\n\n+    owner  => root\n+    group  => root\n+    ensure => file\n+    mode   => 0755\n"}, {"resource": "File[/usr/lib/spark35]", "parameters": "--- File[/usr/lib/spark35].orig\n+++ File[/usr/lib/spark35]\n\n+    owner  => root\n+    group  => root\n+    ensure => link\n"}, {"resource": "Class[Profile::Analytics::Cluster::Client]", "parameters": "--- Class[Profile::Analytics::Cluster::Client].orig\n+++ Class[Profile::Analytics::Cluster::Client]\n\n@@\n-    require => ['Class[Profile::Analytics::Cluster::Packages::Common]', 'Class[Profile::Hadoop::Common]', 'Class[Profile::Hive::Client]', 'Class[Profile::Hadoop::Spark3]']\n+    require => ['Class[Profile::Analytics::Cluster::Packages::Common]', 'Class[Profile::Hadoop::Common]', 'Class[Profile::Hive::Client]']\n"}, {"resource": "File[/etc/ferm/conf.d/10_spark35_driver]", "content": "--- /etc/ferm/conf.d/10_spark35_driver.orig\n+++ /etc/ferm/conf.d/10_spark35_driver\n@@ -0,0 +1,6 @@\n+# Autogenerated by puppet. DO NOT EDIT BY HAND!\n+#\n+# \n+&R_SERVICE(tcp, 12000:12100, $ANALYTICS_NETWORKS);\n+\n+", "parameters": "--- File[/etc/ferm/conf.d/10_spark35_driver].orig\n+++ File[/etc/ferm/conf.d/10_spark35_driver]\n\n+    require => File[/etc/ferm/conf.d]\n+    notify  => Service[ferm]\n+    ensure  => present\n+    owner   => root\n+    group   => root\n+    tag     => ferm\n+    mode    => 0400\n"}, {"resource": "File[/etc/ferm/conf.d/10_spark35_driver_blockmanager]", "content": "--- /etc/ferm/conf.d/10_spark35_driver_blockmanager.orig\n+++ /etc/ferm/conf.d/10_spark35_driver_blockmanager\n@@ -0,0 +1,6 @@\n+# Autogenerated by puppet. DO NOT EDIT BY HAND!\n+#\n+# \n+&R_SERVICE(tcp, 13000:13100, $ANALYTICS_NETWORKS);\n+\n+", "parameters": "--- File[/etc/ferm/conf.d/10_spark35_driver_blockmanager].orig\n+++ File[/etc/ferm/conf.d/10_spark35_driver_blockmanager]\n\n+    require => File[/etc/ferm/conf.d]\n+    notify  => Service[ferm]\n+    ensure  => present\n+    owner   => root\n+    group   => root\n+    tag     => ferm\n+    mode    => 0400\n"}, {"resource": "File[/etc/spark35]", "parameters": "--- File[/etc/spark35].orig\n+++ File[/etc/spark35]\n\n+    owner  => root\n+    group  => root\n+    ensure => directory\n+    mode   => 0755\n"}, {"resource": "File[/usr/bin/spark35-sql]", "content": "--- /usr/bin/spark35-sql.orig\n+++ /usr/bin/spark35-sql\n@@ -0,0 +1,4 @@\n+#!/usr/bin/env bash\n+# Managed by Puppet\n+export SPARK_CONF_DIR=/etc/spark35/conf\n+exec /usr/lib/spark35/bin/spark-sql \"$@\"", "parameters": "--- File[/usr/bin/spark35-sql].orig\n+++ File[/usr/bin/spark35-sql]\n\n+    owner  => root\n+    group  => root\n+    ensure => file\n+    mode   => 0755\n"}, {"resource": "Ferm::Service[spark35_driver_blockmanager]", "parameters": "--- Ferm::Service[spark35_driver_blockmanager].orig\n+++ Ferm::Service[spark35_driver_blockmanager]\n\n+    notrack             => False\n+    proto               => tcp\n+    prio                => 10\n+    port_range          => [13000, 13100]\n+    unrestricted_access => False\n+    src_sets            => ['ANALYTICS_NETWORKS']\n+    ensure              => present\n+    desc                => \n"}, {"resource": "File[/etc/spark35/conf]", "parameters": "--- File[/etc/spark35/conf].orig\n+++ File[/etc/spark35/conf]\n\n+    owner  => root\n+    group  => root\n+    ensure => directory\n+    mode   => 0755\n"}, {"resource": "Ferm::Service[spark35_driver]", "parameters": "--- Ferm::Service[spark35_driver].orig\n+++ Ferm::Service[spark35_driver]\n\n+    notrack             => False\n+    proto               => tcp\n+    prio                => 10\n+    port_range          => [12000, 12100]\n+    unrestricted_access => False\n+    src_sets            => ['ANALYTICS_NETWORKS']\n+    ensure              => present\n+    desc                => \n"}, {"resource": "File[/etc/spark35/conf/hive-site.xml]", "parameters": "--- File[/etc/spark35/conf/hive-site.xml].orig\n+++ File[/etc/spark35/conf/hive-site.xml]\n\n+    target => /etc/hive/conf.analytics-test-hadoop/hive-site.xml\n+    owner  => root\n+    group  => root\n+    ensure => link\n"}, {"resource": "File[/etc/ferm/conf.d/10_spark35_ui_port]", "content": "--- /etc/ferm/conf.d/10_spark35_ui_port.orig\n+++ /etc/ferm/conf.d/10_spark35_ui_port\n@@ -0,0 +1,6 @@\n+# Autogenerated by puppet. DO NOT EDIT BY HAND!\n+#\n+# \n+&R_SERVICE(tcp, 4040:4140, $ANALYTICS_NETWORKS);\n+\n+", "parameters": "--- File[/etc/ferm/conf.d/10_spark35_ui_port].orig\n+++ File[/etc/ferm/conf.d/10_spark35_ui_port]\n\n+    require => File[/etc/ferm/conf.d]\n+    notify  => Service[ferm]\n+    ensure  => present\n+    owner   => root\n+    group   => root\n+    tag     => ferm\n+    mode    => 0400\n"}, {"resource": "Firewall::Service[spark35-driver]", "parameters": "--- Firewall::Service[spark35-driver].orig\n+++ Firewall::Service[spark35-driver]\n\n+    notrack             => False\n+    proto               => tcp\n+    prio                => 10\n+    port_range          => [12000, 12100]\n+    unrestricted_access => False\n+    src_sets            => ['ANALYTICS_NETWORKS']\n+    ensure              => present\n+    desc                => \n"}, {"resource": "Firewall::Service[spark35-ui-port]", "parameters": "--- Firewall::Service[spark35-ui-port].orig\n+++ Firewall::Service[spark35-ui-port]\n\n+    notrack             => False\n+    proto               => tcp\n+    prio                => 10\n+    port_range          => [4040, 4140]\n+    unrestricted_access => False\n+    src_sets            => ['ANALYTICS_NETWORKS']\n+    ensure              => present\n+    desc                => \n"}, {"resource": "Ferm::Service[spark35_ui_port]", "parameters": "--- Ferm::Service[spark35_ui_port].orig\n+++ Ferm::Service[spark35_ui_port]\n\n+    notrack             => False\n+    proto               => tcp\n+    prio                => 10\n+    port_range          => [4040, 4140]\n+    unrestricted_access => False\n+    src_sets            => ['ANALYTICS_NETWORKS']\n+    ensure              => present\n+    desc                => \n"}, {"resource": "File[/etc/spark35/conf/spark-defaults.conf]", "content": "--- /etc/spark35/conf/spark-defaults.conf.orig\n+++ /etc/spark35/conf/spark-defaults.conf\n@@ -0,0 +1,74 @@\n+# NOTE: This file is managed by Puppet.\n+\n+# Default system properties included when running spark-submit.\n+# This is useful for setting default environmental settings.\n+\n+# Example:\n+# spark.master                     spark://master:7077\n+# spark.eventLog.enabled           true\n+# spark.eventLog.dir               hdfs://namenode:8021/directory\n+# spark.serializer                 org.apache.spark.serializer.KryoSerializer\n+# spark.driver.memory              5g\n+# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers=\"one two three\"\n+\n+# We configure the default value of spark.mater to be yarn since this is by far the most common\n+# type of usage in our environment. It can be overridden on the command-line to use 'local' and to\n+# limit the number of cores allocated. Please see #T393181 and the following docs for more info.\n+# https://wikitech.wikimedia.org/wiki/Data_Platform/Systems/Spark#Application_Master\n+spark.master                                        yarn\n+# Dynamic allocation allows Spark to dynamically scale the cluster resources\n+# allocated for an application based on the workload. Only available in YARN mode.\n+# More info: https://spark.apache.org/docs/2.1.2/configuration.html#dynamic-allocation\n+spark.dynamicAllocation.enabled                     true\n+spark.shuffle.service.enabled                       true\n+spark.dynamicAllocation.executorIdleTimeout         60s\n+spark.dynamicAllocation.cachedExecutorIdleTimeout   3600s\n+spark.shuffle.io.maxRetries                         10\n+spark.shuffle.io.retryWait                          10s\n+# The following two settings are only set if the cluster is configured\n+# to have multiple yarn shufflers available and the version is greater than 3.1\n+spark.shuffle.service.name = spark_shuffle_3_5\n+spark.shuffle.service.port = 7340\n+spark.executorEnv.LD_LIBRARY_PATH                   /usr/lib/hadoop/lib/native\n+spark.sql.catalogImplementation                     hive\n+# This setting adds support for Iceberg SQL extensions like CALL for stored procedures or ALTER TABLE ... WRITE ORDERED BY.\n+# See https://iceberg.apache.org/docs/1.2.1/spark-configuration/#sql-extensions\n+spark.sql.extensions                                org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions\n+# These 'spark.sql.catalog.spark_catalog*' settings add support for Iceberg to the default Spark Catalog by wrapping it.\n+# See https://iceberg.apache.org/docs/1.2.1/spark-configuration/#replacing-the-session-catalog\n+spark.sql.catalog.spark_catalog                     org.apache.iceberg.spark.SparkSessionCatalog\n+spark.sql.catalog.spark_catalog.type                hive\n+spark.driver.port                                   12000\n+spark.port.maxRetries                               100\n+spark.ui.port                                       4040\n+spark.driver.blockManager.port                      13000\n+spark.sql.files.maxPartitionBytes                   268435456\n+spark.sql.warehouse.dir                             hdfs:///user/hive/warehouse\n+spark.yarn.archive                                  hdfs:///user/spark/share/lib/spark-3.5.8-assembly.jar\n+\n+# JVMs should use system proxy settings.\n+# The system proxy settings are configured via the env vars http_proxy, https_proxy, and no_proxy.\n+spark.driver.defaultJavaOptions                     -Djava.net.useSystemProxies=True\n+spark.executor.defaultJavaOptions                   -Djava.net.useSystemProxies=True\n+\n+\n+spark.authenticate                                  true\n+# Spark IO encryption settings are not enabled (but listed anyway)\n+# since in some use cases (like Refine) they caused exceptions like\n+# 'java.io.IOException: Stream is corrupted' when shuffle files were\n+# compressed with lz4.\n+# spark.io.encryption.enabled                         true\n+# spark.io.encryption.keySizeBits                     256\n+# spark.io.encryption.keygen.algorithm                HmacSHA256\n+spark.network.crypto.enabled                        true\n+spark.network.crypto.keyFactoryAlgorithm            PBKDF2WithHmacSHA256\n+spark.network.crypto.keyLength                      256\n+spark.network.crypto.saslFallback                   false\n+\n+# Ensure that Python requests lib always use system CA certificates.\n+spark.yarn.appMasterEnv.REQUESTS_CA_BUNDLE          /etc/ssl/certs/ca-certificates.crt\n+spark.executorEnv.REQUESTS_CA_BUNDLE                /etc/ssl/certs/ca-certificates.crt\n+spark.eventLog.enabled                              true\n+spark.eventLog.dir                                  hdfs:///var/log/spark\n+spark.eventLog.compress                             true\n+spark.yarn.historyServer.address                    spark-history-test.svc.eqiad.wmnet:30443", "parameters": "--- File[/etc/spark35/conf/spark-defaults.conf].orig\n+++ File[/etc/spark35/conf/spark-defaults.conf]\n\n+    owner => root\n+    group => root\n+    mode  => 0644\n"}, {"resource": "Firewall::Service[spark35-driver-blockmanager]", "parameters": "--- Firewall::Service[spark35-driver-blockmanager].orig\n+++ Firewall::Service[spark35-driver-blockmanager]\n\n+    notrack             => False\n+    proto               => tcp\n+    prio                => 10\n+    port_range          => [13000, 13100]\n+    unrestricted_access => False\n+    src_sets            => ['ANALYTICS_NETWORKS']\n+    ensure              => present\n+    desc                => \n"}, {"resource": "Class[Profile::Hadoop::Spark35]", "parameters": "--- Class[Profile::Hadoop::Spark35].orig\n+++ Class[Profile::Hadoop::Spark35]\n\n+    spark_yarn_history_address => spark-history-test.svc.eqiad.wmnet:30443\n+    extra_settings             => {}\n+    encryption_enabled         => True\n+    driver_blockmanager_port   => 13000\n+    port_max_retries           => 100\n+    event_log_compress         => True\n+    default_shuffler_version   => 3.5\n+    require                    => ['Class[Profile::Hadoop::Common]', 'Class[Profile::Analytics::Conda_analytics]']\n+    driver_port                => 12000\n+    executor_env_ld_lib_path   => /usr/lib/hadoop/lib/native\n+    event_log_dir              => hdfs:///var/log/spark\n+    ui_port                    => 4040\n+    spark_version              => 3.5.8\n"}, {"resource": "File[/etc/spark35/conf/spark-env.sh]", "content": "--- /etc/spark35/conf/spark-env.sh.orig\n+++ /etc/spark35/conf/spark-env.sh\n@@ -0,0 +1,179 @@\n+#!/usr/bin/env bash\n+\n+# NOTE: This file is managed by Puppet.\n+\n+# This file is sourced when running various Spark programs.\n+# Copy it as spark-env.sh and edit that to configure Spark for your site.\n+\n+# Options read when launching programs locally with\n+# ./bin/run-example or ./bin/spark-submit\n+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files\n+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node\n+# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program\n+# - SPARK_CLASSPATH, default classpath entries to append\n+\n+# Options read by executors and drivers running inside the cluster\n+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node\n+# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program\n+# - SPARK_CLASSPATH, default classpath entries to append\n+# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data\n+# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos\n+\n+# Options read in YARN client mode\n+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files\n+# - SPARK_EXECUTOR_INSTANCES, Number of executors to start (Default: 2)\n+# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).\n+# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)\n+# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)\n+\n+# Options for the daemons used in the standalone deploy mode\n+# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname\n+# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master\n+# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. \"-Dx=y\")\n+# - SPARK_WORKER_CORES, to set the number of cores to use on this machine\n+# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)\n+# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker\n+# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node\n+# - SPARK_WORKER_DIR, to set the working directory of worker processes\n+# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. \"-Dx=y\")\n+# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).\n+# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. \"-Dx=y\")\n+# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. \"-Dx=y\")\n+# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. \"-Dx=y\")\n+# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers\n+\n+# Generic options for the daemons used in the standalone deploy mode\n+# - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)\n+# - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)\n+# - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)\n+# - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)\n+# - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)\n+# - SPARK_NO_DAEMONIZE  Run the proposed command in the foreground. It will not output a PID file.\n+\n+# = WMF customizations below =\n+#\n+# If /etc/hadoop/conf exists, use it as HADOOP_CONF_DIR\n+if [ -z \"${HADOOP_CONF_DIR}\" -a -e \"/etc/hadoop/conf\" ]; then\n+  export HADOOP_CONF_DIR=/etc/hadoop/conf\n+fi\n+\n+# If /usr/lib/hadoop/native exists, use Hadoop native libs from there\n+if [ -z \"${LD_LIBRARY_PATH}\" -a -e /usr/lib/hadoop/lib/native ]; then\n+    export LD_LIBRARY_PATH=/usr/lib/hadoop/lib/native\n+fi\n+\n+if [ -z \"${SPARK_HOME}\" ]; then\n+    echo \"ERROR: SPARK_HOME must be set before loading ${0}\"\n+    exit 1\n+else\n+    echo \"SPARK_HOME: ${SPARK_HOME}\"\n+fi\n+\n+# Select Hadoop library\n+#\n+# if [ \"${SPARK_HOME}\" != /usr/lib/airflow* -a -x \"$(command -v hadoop)\" ]; then\n+#    SPARK_DIST_CLASSPATH=$(hadoop classpath)\n+# fi\n+HADOOP_VERSION=\"$(ls $SPARK_HOME/jars/ \\\n+  | grep hadoop-client \\\n+  |  sed 's/^hadoop-client-\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*$/\\1/')\"\n+if [ -z \"${HADOOP_VERSION}\" ]; then\n+    echo \"ERROR: unknown HADOOP_VERSION\"\n+    exit 1\n+fi\n+# 2022-09 We are using Hadoop 3 client lib to connect to an Hadoop 2.10 cluster.\n+# Hadoop 2.10 version is a backport of version 3 api. That means we can use\n+# a client lib at version 3 to access it.\n+# This echo is just to remember the explicit version we are using.\n+echo \"Using Hadoop client lib jars at ${HADOOP_VERSION}, provided by Spark.\"\n+\n+# == Determine useful default values for PYSPARK_DRIVER_PYTHON and PYSPARK_PYTHON ==\n+#\n+# PYSPARK_DRIVER_PYTHON:\n+#\n+# - User's python env.  If CONDA_PREFIX is set, or if SPARK_HOME is a directory\n+#   site-packages/pyspark directory, assume we want to use ipython (or python)\n+#   from the user's python env.\n+#\n+# - Else, use $PYTHON_PREFIX_GLOBAL/bin/ipython3\n+#\n+# NOTE: If we don't end up setting PYSPARK_DRIVER_PYTHON, then built in spark scripts will do\n+# what they usually do to set it (use system python). I.e. it will default to the\n+# value of PYSPARK_PYTHON, or the system 'python'.\n+#\n+# PYSPARK_PYTHON:\n+#\n+# This must be available on worker nodes.\n+#\n+# - If we are not running in YARN, then we can safely use the user's python env.\n+#\n+# - Else if we are running in YARN, we have no way of using a user's local python env.\n+#   as the default value here.  It is possible to for users to use a custom python env,\n+#   but the user will have ship their packed python env to workers, and handle setting\n+#   PYSPARK_PYTHON to the correct value themselves,\n+#\n+#   So, we default PYSPARK_PYTHON to $PYTHON_PREFIX_GLOBAL/bin/python3\n+\n+\n+# This python env prefix is expected to exist on all potential spark nodes, both masters and workers.\n+# This will be used for default PYSPARK_*_PYTHON unless a conda env is currently active.\n+PYTHON_PREFIX_GLOBAL=$(realpath ${PYTHON_PREFIX_GLOBAL:-\"/opt/conda-analytics-next\"})\n+\n+# If a conda environment is active (CONDA_PREFIX is set),\n+# then assume this is the user's python environment\n+# If it isn't then check to see if $SPARK_HOME is currently in a pip installed pyspark site-packages\n+# inside a python environment.  If it is, then assume that is the user's python environment.\n+# NOTE: At this point SPARK_HOME should be set, either by the user or by the find-spark-home\n+# script that comes with spark.\n+PYTHON_PREFIX_USER=$(realpath \"${CONDA_PREFIX:-$(dirname $(dirname $(dirname $(dirname $SPARK_HOME))))}\")\n+\n+# Default PYSPARK_DRIVER_PYTHON to ipython3 if running pyspark CLI directly.\n+if [[ -z \"${PYSPARK_DRIVER_PYTHON}\" && \"${0}\" == *pyspark ]]; then\n+\n+    # If a user's python environment exists and has ipython3, use it\n+    if [ -n \"${PYTHON_PREFIX_USER}\" -a -e \"${PYTHON_PREFIX_USER}/bin/ipython3\" ]; then\n+        export PYSPARK_DRIVER_PYTHON=\"${PYTHON_PREFIX_USER}/bin/ipython3\"\n+\n+    # in case the user's python environment exists without ipython3, but does have python3, use it.\n+    elif [ -n \"${PYTHON_PREFIX_USER}\" -a -e \"${PYTHON_PREFIX_USER}/bin/python3\" ]; then\n+        export PYSPARK_DRIVER_PYTHON=\"${PYTHON_PREFIX_USER}/bin/python3\"\n+\n+    # Else use PYTHON_PREFIX_GLOBAL's ipython3.\n+    else\n+        export PYSPARK_DRIVER_PYTHON=\"${PYTHON_PREFIX_GLOBAL}/bin/ipython3\"\n+\n+    fi\n+fi\n+\n+if [ -z \"${PYSPARK_PYTHON}\" ]; then\n+    # Search the CLI opts to find the master option, if it is given.\n+    # PYSPARK_PYTHON needs to be set to something that will work on remote executors\n+    # if a conda environment is active.  This will only work if\n+    # the SparkSession master is being set via the CLI.\n+    spark_master=''\n+    for ((i = 1; i <= $#; i++ )); do\n+       arg=\"${!i}\"\n+       if [[ \"${arg}\" == --master ]]; then\n+           master_index=$((i+1))\n+           spark_master=\"${!master_index}\"\n+       elif [[ \"${arg}\" == --master=* ]]; then\n+           spark_master=\"$(echo ${arg} | cut -f2 -d=)\"\n+       fi\n+    done\n+\n+    # If not YARN and user env's python exists, we can safely use the user env's python.\n+    if [[ \"${spark_master}\" != yarn* && -n \"${PYTHON_PREFIX_USER}\" && -e \"${PYTHON_PREFIX_USER}/bin/python3\" ]]; then\n+        export PYSPARK_PYTHON=\"${PYTHON_PREFIX_USER}/bin/python3\"\n+\n+    # else use the PYTHON_PREFIX_GLOBAL's python.\n+    # This MUST exist on all worker nodes.\n+    else\n+        export PYSPARK_PYTHON=\"${PYTHON_PREFIX_GLOBAL}/bin/python3\"\n+\n+    fi\n+fi\n+\n+# Note: If PYSPARK_DRIVER_PYTHON and PYSPARK_PYTHON are not set at this point,\n+# the pyspark script will just use system 'python'.\n+test -n \"${PYSPARK_DRIVER_PYTHON}\" && echo \"PYSPARK_DRIVER_PYTHON=${PYSPARK_DRIVER_PYTHON}\"\n+test -n \"${PYSPARK_PYTHON}\" && echo \"PYSPARK_PYTHON=${PYSPARK_PYTHON}\"", "parameters": "--- File[/etc/spark35/conf/spark-env.sh].orig\n+++ File[/etc/spark35/conf/spark-env.sh]\n\n+    owner => root\n+    group => root\n+    mode  => 0644\n"}, {"resource": "File[/etc/spark35/conf/log4j2.properties]", "parameters": "--- File[/etc/spark35/conf/log4j2.properties].orig\n+++ File[/etc/spark35/conf/log4j2.properties]\n\n+    source => puppet:///modules/profile/hadoop/spark3/spark35_log4j2.properties\n+    owner  => root\n+    group  => root\n+    mode   => 0644\n"}, {"resource": "File[/usr/bin/spark35-shell]", "content": "--- /usr/bin/spark35-shell.orig\n+++ /usr/bin/spark35-shell\n@@ -0,0 +1,4 @@\n+#!/usr/bin/env bash\n+# Managed by Puppet\n+export SPARK_CONF_DIR=/etc/spark35/conf\n+exec /usr/lib/spark35/bin/spark-shell \"$@\"", "parameters": "--- File[/usr/bin/spark35-shell].orig\n+++ File[/usr/bin/spark35-shell]\n\n+    owner  => root\n+    group  => root\n+    ensure => file\n+    mode   => 0755\n"}, {"resource": "File[/usr/bin/spark35-class]", "content": "--- /usr/bin/spark35-class.orig\n+++ /usr/bin/spark35-class\n@@ -0,0 +1,4 @@\n+#!/usr/bin/env bash\n+# Managed by Puppet\n+export SPARK_CONF_DIR=/etc/spark35/conf\n+exec /usr/lib/spark35/bin/spark-class \"$@\"", "parameters": "--- File[/usr/bin/spark35-class].orig\n+++ File[/usr/bin/spark35-class]\n\n+    owner  => root\n+    group  => root\n+    ensure => file\n+    mode   => 0755\n"}], "perc_changed": "0.92%"}, "core": {"total": 4896, "only_in_self": [], "only_in_other": ["File[/etc/ferm/conf.d/10_spark35_driver]", "File[/etc/ferm/conf.d/10_spark35_driver_blockmanager]", "File[/etc/ferm/conf.d/10_spark35_ui_port]", "File[/etc/spark35/conf/hive-site.xml]", "File[/etc/spark35/conf/log4j2.properties]", "File[/etc/spark35/conf/spark-defaults.conf]", "File[/etc/spark35/conf/spark-env.sh]", "File[/etc/spark35/conf]", "File[/etc/spark35]", "File[/usr/bin/spark35-class]", "File[/usr/bin/spark35-pyspark]", "File[/usr/bin/spark35-shell]", "File[/usr/bin/spark35-sql]", "File[/usr/bin/spark35-submit]", "File[/usr/lib/spark35]"], "resource_diffs": [], "perc_changed": "0.31%"}, "main": {"total": 4896, "only_in_self": [], "only_in_other": ["Class[Profile::Hadoop::Spark35]", "Ferm::Service[spark35_driver]", "Ferm::Service[spark35_driver_blockmanager]", "Ferm::Service[spark35_ui_port]", "File[/etc/ferm/conf.d/10_spark35_driver]", "File[/etc/ferm/conf.d/10_spark35_driver_blockmanager]", "File[/etc/ferm/conf.d/10_spark35_ui_port]", "File[/etc/spark35/conf/hive-site.xml]", "File[/etc/spark35/conf/log4j2.properties]", "File[/etc/spark35/conf/spark-defaults.conf]", "File[/etc/spark35/conf/spark-env.sh]", "File[/etc/spark35/conf]", "File[/etc/spark35]", "File[/usr/bin/spark35-class]", "File[/usr/bin/spark35-pyspark]", "File[/usr/bin/spark35-shell]", "File[/usr/bin/spark35-sql]", "File[/usr/bin/spark35-submit]", "File[/usr/lib/spark35]", "Firewall::Service[spark35-driver-blockmanager]", "Firewall::Service[spark35-driver]", "Firewall::Service[spark35-ui-port]"], "resource_diffs": [{"resource": "Class[Profile::Analytics::Cluster::Client]", "parameters": "--- Class[Profile::Analytics::Cluster::Client].orig\n+++ Class[Profile::Analytics::Cluster::Client]\n\n@@\n-    require => ['Class[Profile::Analytics::Cluster::Packages::Common]', 'Class[Profile::Hadoop::Common]', 'Class[Profile::Hive::Client]', 'Class[Profile::Hadoop::Spark3]']\n+    require => ['Class[Profile::Analytics::Cluster::Packages::Common]', 'Class[Profile::Hadoop::Common]', 'Class[Profile::Hive::Client]']\n"}], "perc_changed": "0.47%"}}}