diff --git a/docs/modules/spark-k8s/examples/example-history-app.yaml b/docs/modules/spark-k8s/examples/example-history-app.yaml index 19642f76..30cd0e96 100644 --- a/docs/modules/spark-k8s/examples/example-history-app.yaml +++ b/docs/modules/spark-k8s/examples/example-history-app.yaml @@ -5,7 +5,7 @@ metadata: name: spark-pi-s3-1 spec: sparkImage: - productVersion: 3.5.6 + productVersion: 4.0.0 pullPolicy: IfNotPresent mode: cluster mainClass: org.apache.spark.examples.SparkPi diff --git a/docs/modules/spark-k8s/examples/example-history-server.yaml b/docs/modules/spark-k8s/examples/example-history-server.yaml index 48325019..75f8df27 100644 --- a/docs/modules/spark-k8s/examples/example-history-server.yaml +++ b/docs/modules/spark-k8s/examples/example-history-server.yaml @@ -5,7 +5,7 @@ metadata: name: spark-history spec: image: - productVersion: 3.5.6 + productVersion: 4.0.0 logFileDirectory: # <1> s3: prefix: eventlogs/ # <2> diff --git a/docs/modules/spark-k8s/examples/example-pvc.yaml b/docs/modules/spark-k8s/examples/example-pvc.yaml index 31c9a2ef..e0d35358 100644 --- a/docs/modules/spark-k8s/examples/example-pvc.yaml +++ b/docs/modules/spark-k8s/examples/example-pvc.yaml @@ -1,8 +1,8 @@ --- apiVersion: v1 -kind: PersistentVolume +kind: PersistentVolume # <1> metadata: - name: pv-ksv # <1> + name: pv-ksv spec: storageClassName: standard accessModes: @@ -13,11 +13,11 @@ spec: path: /some-host-location --- apiVersion: v1 -kind: PersistentVolumeClaim +kind: PersistentVolumeClaim # <2> metadata: - name: pvc-ksv # <2> + name: pvc-ksv spec: - volumeName: pv-ksv # <1> + volumeName: pv-ksv # <3> accessModes: - ReadWriteOnce resources: @@ -25,7 +25,7 @@ spec: storage: 1Gi --- apiVersion: batch/v1 -kind: Job +kind: Job # <4> metadata: name: aws-deps spec: @@ -33,11 +33,11 @@ spec: spec: restartPolicy: Never volumes: - - name: job-deps # <3> + - name: job-deps persistentVolumeClaim: - claimName: pvc-ksv # <2> + claimName: pvc-ksv # <5> containers: - name: aws-deps volumeMounts: - - name: job-deps # <4> + - name: job-deps mountPath: /stackable/spark/dependencies diff --git a/docs/modules/spark-k8s/examples/example-sparkapp-configmap.yaml b/docs/modules/spark-k8s/examples/example-sparkapp-configmap.yaml index 3f16e07e..824ab97b 100644 --- a/docs/modules/spark-k8s/examples/example-sparkapp-configmap.yaml +++ b/docs/modules/spark-k8s/examples/example-sparkapp-configmap.yaml @@ -6,7 +6,7 @@ metadata: namespace: default spec: sparkImage: - productVersion: 3.5.6 + productVersion: 4.0.0 mode: cluster mainApplicationFile: s3a://stackable-spark-k8s-jars/jobs/ny-tlc-report-1.1.0.jar # <3> mainClass: tech.stackable.demo.spark.NYTLCReport diff --git a/docs/modules/spark-k8s/examples/example-sparkapp-image.yaml b/docs/modules/spark-k8s/examples/example-sparkapp-image.yaml index 58a8348f..d24b2055 100644 --- a/docs/modules/spark-k8s/examples/example-sparkapp-image.yaml +++ b/docs/modules/spark-k8s/examples/example-sparkapp-image.yaml @@ -7,7 +7,7 @@ metadata: spec: image: oci.stackable.tech/stackable/ny-tlc-report:0.2.0 # <1> sparkImage: - productVersion: 3.5.6 + productVersion: 4.0.0 mode: cluster mainApplicationFile: local:///stackable/spark/jobs/ny_tlc_report.py # <2> args: diff --git a/docs/modules/spark-k8s/examples/example-sparkapp-pvc.yaml b/docs/modules/spark-k8s/examples/example-sparkapp-pvc.yaml index 1b44a541..edd66888 100644 --- a/docs/modules/spark-k8s/examples/example-sparkapp-pvc.yaml +++ b/docs/modules/spark-k8s/examples/example-sparkapp-pvc.yaml @@ -6,28 +6,30 @@ metadata: namespace: default spec: sparkImage: - productVersion: 3.5.6 + productVersion: 4.0.0 mode: cluster - mainApplicationFile: s3a://stackable-spark-k8s-jars/jobs/ny-tlc-report-1.0-SNAPSHOT.jar # <1> + mainApplicationFile: s3a://my-bucket/app.jar # <1> mainClass: org.example.App # <2> - args: - - "'s3a://nyc-tlc/trip data/yellow_tripdata_2021-07.csv'" sparkConf: # <3> - "spark.hadoop.fs.s3a.aws.credentials.provider": "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider" "spark.driver.extraClassPath": "/dependencies/jars/*" "spark.executor.extraClassPath": "/dependencies/jars/*" volumes: - name: job-deps # <4> persistentVolumeClaim: claimName: pvc-ksv - driver: + job: config: volumeMounts: - name: job-deps mountPath: /dependencies # <5> + driver: + config: + volumeMounts: + - name: job-deps + mountPath: /dependencies # <6> executor: replicas: 3 config: volumeMounts: - name: job-deps - mountPath: /dependencies # <5> + mountPath: /dependencies # <7> diff --git a/docs/modules/spark-k8s/examples/example-sparkapp-s3-private.yaml b/docs/modules/spark-k8s/examples/example-sparkapp-s3-private.yaml index a06ae5e5..de8c510a 100644 --- a/docs/modules/spark-k8s/examples/example-sparkapp-s3-private.yaml +++ b/docs/modules/spark-k8s/examples/example-sparkapp-s3-private.yaml @@ -5,7 +5,7 @@ metadata: name: example-sparkapp-s3-private spec: sparkImage: - productVersion: 3.5.6 + productVersion: 4.0.0 mode: cluster mainApplicationFile: s3a://my-bucket/spark-examples.jar # <1> mainClass: org.apache.spark.examples.SparkPi # <2> diff --git a/docs/modules/spark-k8s/examples/example-sparkapp-streaming.yaml b/docs/modules/spark-k8s/examples/example-sparkapp-streaming.yaml index fc67a9fb..83a230eb 100644 --- a/docs/modules/spark-k8s/examples/example-sparkapp-streaming.yaml +++ b/docs/modules/spark-k8s/examples/example-sparkapp-streaming.yaml @@ -6,7 +6,7 @@ metadata: namespace: default spec: sparkImage: - productVersion: 3.5.6 + productVersion: 4.0.0 mode: cluster mainApplicationFile: local:///stackable/spark/examples/src/main/python/streaming/hdfs_wordcount.py args: diff --git a/docs/modules/spark-k8s/examples/getting_started/getting_started.sh b/docs/modules/spark-k8s/examples/getting_started/getting_started.sh index aad89c62..36639bc3 100755 --- a/docs/modules/spark-k8s/examples/getting_started/getting_started.sh +++ b/docs/modules/spark-k8s/examples/getting_started/getting_started.sh @@ -56,7 +56,7 @@ metadata: namespace: default spec: sparkImage: - productVersion: 3.5.6 + productVersion: 4.0.0 mode: cluster mainApplicationFile: local:///stackable/spark/examples/src/main/python/pi.py driver: diff --git a/docs/modules/spark-k8s/examples/getting_started/getting_started.sh.j2 b/docs/modules/spark-k8s/examples/getting_started/getting_started.sh.j2 index 90d6783f..71c0255d 100755 --- a/docs/modules/spark-k8s/examples/getting_started/getting_started.sh.j2 +++ b/docs/modules/spark-k8s/examples/getting_started/getting_started.sh.j2 @@ -56,7 +56,7 @@ metadata: namespace: default spec: sparkImage: - productVersion: 3.5.6 + productVersion: 4.0.0 mode: cluster mainApplicationFile: local:///stackable/spark/examples/src/main/python/pi.py driver: diff --git a/docs/modules/spark-k8s/pages/usage-guide/job-dependencies.adoc b/docs/modules/spark-k8s/pages/usage-guide/job-dependencies.adoc index a5bbd918..69fd4127 100644 --- a/docs/modules/spark-k8s/pages/usage-guide/job-dependencies.adoc +++ b/docs/modules/spark-k8s/pages/usage-guide/job-dependencies.adoc @@ -4,9 +4,6 @@ == Overview -IMPORTANT: With the platform release 23.4.1 and Apache Spark 3.3.x (and all previous releases), dynamic provisioning of dependencies using the Spark `packages` field doesn't work. -This is a known problem with Spark and is tracked https://github.com/stackabletech/spark-k8s-operator/issues/141[here]. - The container images provided by Stackable include Apache Spark and PySpark applications and libraries. In addition, they include commonly used libraries to connect to storage systems supporting the `hdfs://`, `s3a://` and `abfs://` protocols. These systems are commonly used to store data processed by Spark applications. @@ -24,46 +21,29 @@ To provision job dependencies in Spark workloads, you construct the `SparkApplic * Maven/Java packages * Python packages -The following table provides a high level overview of the relevant aspects of each method. - -|=== -|Dependency specification |Job image size |Reproduciblity |Dev-op cost - -|Custom Spark images -|Large -|Guaranteed -|Medium to High - -|Dependency volumes -|Small -|Guaranteed -|Small to Medium - -|Maven/Java packages -|Small -|Not guaranteed -|Small - -|Python packages -|Small -|Not guaranteed -|Small -|=== - === Custom Spark images -With this method, you submit a `SparkApplication` for which the `sparkImage` refers to the full custom image name. It is recommended to start the custom image from one of the Stackable images to ensure compatibility with the Stackable operator. +With this method, you submit a `SparkApplication` for which the `sparkImage` refers to the full custom image name. It is recommended to start the custom image from one of the Stackable Spark images to ensure compatibility with the operator. Below is an example of a custom image that includes a JDBC driver: [source, Dockerfile] ---- -FROM oci.stackable.tech/sdp/spark-k8s:3.5.6-stackable25.3.0 # <1> +FROM oci.stackable.tech/sdp/spark-k8s:4.0.0-stackable0.0.0-dev # <1> -RUN curl --fail -o /stackable/spark/jars/postgresql-42.6.0.jar "https://jdbc.postgresql.org/download/postgresql-42.6.0.jar" +RUN curl --fail -o /stackable/spark/jars/postgresql-42.6.0.jar "https://jdbc.postgresql.org/download/postgresql-42.6.0.jar" # <2> ---- <1> Start from an existing Stackable image. +<2> Download the JDBC driver and place it in the Spark JARs directory. + +Build your custom image and push it to your container registry. + +[source, bash] +---- +docker build -t my-registry/spark-k8s:4.0.0-psql . +docker push my-registry/spark-k8s:4.0.0-psql +---- And the following snippet showcases an application that uses the custom image: @@ -75,14 +55,13 @@ metadata: name: spark-jdbc spec: sparkImage: - custom: "oci.stackable.tech/sandbox/spark-k8s:3.5.6-stackable0.0.0-dev" # <1> - productVersion: "3.5.6" # <2> - pullPolicy: IfNotPresent # <3> + custom: "my-registry/spark-k8s:4.0.0-psql" # <1> + productVersion: "4.0.0" # <2> ... ---- -<1> Name of the custom image. -<2> Apache Spark version. Needed for the operator to take the correct actions. -<3> Optional. Defaults to `Always`. + +<1> Reference to your custom image.. +<2> Apache Spark version bundled in your custom image. === Dependency volumes @@ -93,28 +72,34 @@ With this method, the job dependencies are provisioned from a `PersistentVolume` include::example$example-sparkapp-pvc.yaml[] ---- <1> Job artifact located on S3. -<2> Job main class -<3> Spark dependencies: the credentials provider (the user knows what is relevant here) plus dependencies needed to access external resources (in this case, in s3, accessed without credentials) -<4> the name of the volume mount backed by a `PersistentVolumeClaim` that must be pre-existing -<5> the path on the volume mount: this is referenced in the `sparkConf` section where the extra class path is defined for the driver and executors +<2> Name of the main class to run. +<3> The job dependencies provisioned from the volume below are added to the class path of the driver and executors. +<4> A `PersistentVolumeClaim` created by the user prior to submitting the Spark job. +<5> The volume containing the dependencies is mounted in the job pod. +<6> The volume containing the dependencies is mounted in the driver pod. +<7> The volume containing the dependencies is mounted in the executor pods. NOTE: The Spark operator has no control over the contents of the dependency volume. It is your responsibility to make sure all required dependencies are installed in the correct versions. -A `PersistentVolumeClaim` and the associated `PersistentVolume` can be defined like this: +A `PersistentVolumeClaim` and the associated `PersistentVolume` can be defined and provisioned like this: [source,yaml] ---- include::example$example-pvc.yaml[] ---- -<1> Reference to a `PersistentVolume`, defining some cluster-reachable storage -<2> The name of the `PersistentVolumeClaim` that references the PV -<3> Defines a `Volume` backed by the PVC, local to the Custom Resource -<4> Defines the `VolumeMount` that is used by the Custom Resource - +<1> Create a volume. This definition, the size and type of the volume are highly dependent on the type of cluster you are using. +<2> Create a persistent volume claim. This allows the volume to be populated with the necessary dependencies and later on referenced by the Spark job. +<3> The volume name is referenced by the `PersistentVolumeClaim`. +<4> Create a job that mounts the volume and populates it with the necessary dependencies. This must job can be run before submitting the Spark job. +<5> The job references the `PersistentVolumeClaim` created above. === Maven packages The last and most flexible way to provision dependencies is to use the built-in `spark-submit` support for Maven package coordinates. +The downside of this method is that job dependencies are downloaded every time the job is submitted and this has several implications you must be aware of. +For example, the job submission time will be longer than with the other methods +Network connectivity problems may lead to job submission failures. +And finally, not all type of dependencies can be provisioned this way. Most notably, JDBC drivers cannot be provisioned this way since the JVM will only look for them at startup time. The snippet below showcases how to add Apache Iceberg support to a Spark (version 3.4.x) application. @@ -138,11 +123,12 @@ spec: ... ---- -<1> Maven package coordinates for Apache Iceberg. This is downloaded from the Manven repository and made available to the Spark application. +<1> Maven package coordinates for Apache Iceberg. This is downloaded from the central Maven repository and made available to the Spark application. -IMPORTANT: Currently it's not possible to provision dependencies that are loaded by the JVM's https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/lang/ClassLoader.html#getSystemClassLoader()[system class loader]. -Such dependencies include JDBC drivers. -If you need access to JDBC sources from your Spark application, consider building your own custom Spark image as shown above. +As mentioned above, not all dependencies can be provisioned this way. +JDBC drivers are notorious for not being supported by this method but other types of dependencies may also not work. +If a jar file can be provisioned using it's Maven coordinates or not, depends a lot on the way it is loaded by the JVM. +In such cases, consider building your own custom Spark image as shown above. === Python packages diff --git a/docs/modules/spark-k8s/partials/supported-versions.adoc b/docs/modules/spark-k8s/partials/supported-versions.adoc index a132ec2d..28c59c2b 100644 --- a/docs/modules/spark-k8s/partials/supported-versions.adoc +++ b/docs/modules/spark-k8s/partials/supported-versions.adoc @@ -3,5 +3,6 @@ // Stackable Platform documentation. // Please sort the versions in descending order (newest first) +- 4.0.0 (Hadoop 3.4.1, Scala 2.13, Python 3.11, Java 17) (Experimental) - 3.5.5 (Hadoop 3.3.4, Scala 2.12, Python 3.11, Java 17) (Deprecated) - 3.5.6 (Hadoop 3.3.4, Scala 2.12, Python 3.11, Java 17) (LTS) diff --git a/rust/operator-binary/src/connect/server.rs b/rust/operator-binary/src/connect/server.rs index 9beaac8c..d469dedb 100644 --- a/rust/operator-binary/src/connect/server.rs +++ b/rust/operator-binary/src/connect/server.rs @@ -550,20 +550,21 @@ pub(crate) fn server_properties( ), ( "spark.driver.extraClassPath".to_string(), - Some(format!("/stackable/spark/extra-jars/*:/stackable/spark/connect/spark-connect_2.12-{spark_version}.jar")), + Some(format!("/stackable/spark/extra-jars/*:/stackable/spark/connect/spark-connect-{spark_version}.jar")), ), ( "spark.metrics.conf".to_string(), - Some(format!("{VOLUME_MOUNT_PATH_CONFIG}/{METRICS_PROPERTIES_FILE}")), + Some(format!( + "{VOLUME_MOUNT_PATH_CONFIG}/{METRICS_PROPERTIES_FILE}" + )), ), // This enables the "/metrics/executors/prometheus" endpoint on the server pod. // The driver collects metrics from the executors and makes them available here. - // The "/metrics/prometheus" endpoint delievers the driver metrics. + // The "/metrics/prometheus" endpoint delivers the driver metrics. ( "spark.ui.prometheus.enabled".to_string(), Some("true".to_string()), ), - ] .into(); diff --git a/tests/templates/kuttl/hbase-connector/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/hbase-connector/10-deploy-spark-app.yaml.j2 index efffe14e..17099750 100644 --- a/tests/templates/kuttl/hbase-connector/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/hbase-connector/10-deploy-spark-app.yaml.j2 @@ -8,11 +8,11 @@ spec: vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} sparkImage: -{% if test_scenario['values']['spark'].find(",") > 0 %} - custom: "{{ test_scenario['values']['spark'].split(',')[1] }}" - productVersion: "{{ test_scenario['values']['spark'].split(',')[0] }}" +{% if test_scenario['values']['spark-hbase-connector'].find(",") > 0 %} + custom: "{{ test_scenario['values']['spark-hbase-connector'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['spark-hbase-connector'].split(',')[0] }}" {% else %} - productVersion: "{{ test_scenario['values']['spark'] }}" + productVersion: "{{ test_scenario['values']['spark-hbase-connector'] }}" {% endif %} # pullPolicy: IfNotPresent pullPolicy: Always diff --git a/tests/templates/kuttl/iceberg/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/iceberg/10-deploy-spark-app.yaml.j2 index 84315812..de4ffec2 100644 --- a/tests/templates/kuttl/iceberg/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/iceberg/10-deploy-spark-app.yaml.j2 @@ -8,11 +8,11 @@ spec: vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} sparkImage: -{% if test_scenario['values']['spark'].find(",") > 0 %} - custom: "{{ test_scenario['values']['spark'].split(',')[1] }}" - productVersion: "{{ test_scenario['values']['spark'].split(',')[0] }}" +{% if test_scenario['values']['spark-iceberg'].find(",") > 0 %} + custom: "{{ test_scenario['values']['spark-iceberg'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['spark-iceberg'].split(',')[0] }}" {% else %} - productVersion: "{{ test_scenario['values']['spark'] }}" + productVersion: "{{ test_scenario['values']['spark-iceberg'] }}" {% endif %} pullPolicy: IfNotPresent mode: cluster @@ -48,7 +48,7 @@ spec: # # We extract the spark parts from the test scenario value. # - - org.apache.iceberg:iceberg-spark-runtime-{{ ".".join(test_scenario['values']['spark'].split('.')[:2]) }}_2.12:1.8.1 + - org.apache.iceberg:iceberg-spark-runtime-{{ ".".join(test_scenario['values']['spark-iceberg'].split('.')[:2]) }}_2.12:1.8.1 volumes: - name: script configMap: diff --git a/tests/templates/kuttl/logging/05-deploy-history-server.yaml.j2 b/tests/templates/kuttl/logging/05-deploy-history-server.yaml.j2 index 65b5917a..a8f17475 100644 --- a/tests/templates/kuttl/logging/05-deploy-history-server.yaml.j2 +++ b/tests/templates/kuttl/logging/05-deploy-history-server.yaml.j2 @@ -22,11 +22,11 @@ metadata: name: spark-history spec: image: -{% if test_scenario['values']['spark'].find(",") > 0 %} - custom: "{{ test_scenario['values']['spark'].split(',')[1] }}" - productVersion: "{{ test_scenario['values']['spark'].split(',')[0] }}" +{% if test_scenario['values']['spark-logging'].find(",") > 0 %} + custom: "{{ test_scenario['values']['spark-logging'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['spark-logging'].split(',')[0] }}" {% else %} - productVersion: "{{ test_scenario['values']['spark'] }}" + productVersion: "{{ test_scenario['values']['spark-logging'] }}" {% endif %} pullPolicy: IfNotPresent vectorAggregatorConfigMapName: spark-vector-aggregator-discovery diff --git a/tests/templates/kuttl/logging/06-deploy-automatic-log-config-spark-app.yaml.j2 b/tests/templates/kuttl/logging/06-deploy-automatic-log-config-spark-app.yaml.j2 index 9804e8c1..981f7a0b 100644 --- a/tests/templates/kuttl/logging/06-deploy-automatic-log-config-spark-app.yaml.j2 +++ b/tests/templates/kuttl/logging/06-deploy-automatic-log-config-spark-app.yaml.j2 @@ -5,11 +5,11 @@ metadata: name: spark-automatic-log-config spec: sparkImage: -{% if test_scenario['values']['spark'].find(",") > 0 %} - custom: "{{ test_scenario['values']['spark'].split(',')[1] }}" - productVersion: "{{ test_scenario['values']['spark'].split(',')[0] }}" +{% if test_scenario['values']['spark-logging'].find(",") > 0 %} + custom: "{{ test_scenario['values']['spark-logging'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['spark-logging'].split(',')[0] }}" {% else %} - productVersion: "{{ test_scenario['values']['spark'] }}" + productVersion: "{{ test_scenario['values']['spark-logging'] }}" {% endif %} pullPolicy: IfNotPresent image: oci.stackable.tech/stackable/ny-tlc-report:{{ test_scenario['values']['ny-tlc-report'] }} diff --git a/tests/templates/kuttl/logging/07-deploy-custom-log-config-spark-app.yaml.j2 b/tests/templates/kuttl/logging/07-deploy-custom-log-config-spark-app.yaml.j2 index 70d64f0c..95de1283 100644 --- a/tests/templates/kuttl/logging/07-deploy-custom-log-config-spark-app.yaml.j2 +++ b/tests/templates/kuttl/logging/07-deploy-custom-log-config-spark-app.yaml.j2 @@ -22,11 +22,11 @@ metadata: name: spark-custom-log-config spec: sparkImage: -{% if test_scenario['values']['spark'].find(",") > 0 %} - custom: "{{ test_scenario['values']['spark'].split(',')[1] }}" - productVersion: "{{ test_scenario['values']['spark'].split(',')[0] }}" +{% if test_scenario['values']['spark-logging'].find(",") > 0 %} + custom: "{{ test_scenario['values']['spark-logging'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['spark-logging'].split(',')[0] }}" {% else %} - productVersion: "{{ test_scenario['values']['spark'] }}" + productVersion: "{{ test_scenario['values']['spark-logging'] }}" {% endif %} pullPolicy: IfNotPresent image: oci.stackable.tech/stackable/ny-tlc-report:{{ test_scenario['values']['ny-tlc-report'] }} diff --git a/tests/templates/kuttl/logging/08-deploy-automatic-log-config-pyspark-app.yaml.j2 b/tests/templates/kuttl/logging/08-deploy-automatic-log-config-pyspark-app.yaml.j2 index d1e70604..ca375989 100644 --- a/tests/templates/kuttl/logging/08-deploy-automatic-log-config-pyspark-app.yaml.j2 +++ b/tests/templates/kuttl/logging/08-deploy-automatic-log-config-pyspark-app.yaml.j2 @@ -5,11 +5,11 @@ metadata: name: pyspark-automatic-log-config spec: sparkImage: -{% if test_scenario['values']['spark'].find(",") > 0 %} - custom: "{{ test_scenario['values']['spark'].split(',')[1] }}" - productVersion: "{{ test_scenario['values']['spark'].split(',')[0] }}" +{% if test_scenario['values']['spark-logging'].find(",") > 0 %} + custom: "{{ test_scenario['values']['spark-logging'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['spark-logging'].split(',')[0] }}" {% else %} - productVersion: "{{ test_scenario['values']['spark'] }}" + productVersion: "{{ test_scenario['values']['spark-logging'] }}" {% endif %} pullPolicy: IfNotPresent vectorAggregatorConfigMapName: spark-vector-aggregator-discovery diff --git a/tests/templates/kuttl/logging/09-deploy-custom-log-config-pyspark-app.yaml.j2 b/tests/templates/kuttl/logging/09-deploy-custom-log-config-pyspark-app.yaml.j2 index 3899b0e4..247d12d8 100644 --- a/tests/templates/kuttl/logging/09-deploy-custom-log-config-pyspark-app.yaml.j2 +++ b/tests/templates/kuttl/logging/09-deploy-custom-log-config-pyspark-app.yaml.j2 @@ -22,11 +22,11 @@ metadata: name: pyspark-custom-log-config spec: sparkImage: -{% if test_scenario['values']['spark'].find(",") > 0 %} - custom: "{{ test_scenario['values']['spark'].split(',')[1] }}" - productVersion: "{{ test_scenario['values']['spark'].split(',')[0] }}" +{% if test_scenario['values']['spark-logging'].find(",") > 0 %} + custom: "{{ test_scenario['values']['spark-logging'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['spark-logging'].split(',')[0] }}" {% else %} - productVersion: "{{ test_scenario['values']['spark'] }}" + productVersion: "{{ test_scenario['values']['spark-logging'] }}" {% endif %} pullPolicy: IfNotPresent vectorAggregatorConfigMapName: spark-vector-aggregator-discovery diff --git a/tests/templates/kuttl/spark-connect/20-run-connect-client.yaml.j2 b/tests/templates/kuttl/spark-connect/20-run-connect-client.yaml.j2 index 85aff8e7..89feba22 100644 --- a/tests/templates/kuttl/spark-connect/20-run-connect-client.yaml.j2 +++ b/tests/templates/kuttl/spark-connect/20-run-connect-client.yaml.j2 @@ -57,10 +57,10 @@ spec: activeDeadlineSeconds: 600 containers: - name: simple-connect-app -{% if test_scenario['values']['spark-connect-client'].find(",") > 0 %} - image: "{{ test_scenario['values']['spark-connect-client'].split(',')[1] }}" +{% if test_scenario['values']['spark-connect'].find(",") > 0 %} + image: "{{ test_scenario['values']['spark-connect'].split(',')[1] }}" {% else %} - image: oci.stackable.tech/stackable/spark-connect-client:{{ test_scenario['values']['spark-connect-client'] }}-stackable0.0.0-dev + image: oci.stackable.tech/sdp/spark-connect-client:{{ test_scenario['values']['spark-connect'] }}-stackable0.0.0-dev {% endif %} imagePullPolicy: IfNotPresent command: diff --git a/tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-4.0.0.jar b/tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-4.0.0.jar new file mode 100644 index 00000000..965cedd9 Binary files /dev/null and b/tests/templates/kuttl/spark-ny-public-s3/ny-tlc-report-1.1.0-4.0.0.jar differ diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 8db6043a..37693cf6 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -7,20 +7,29 @@ dimensions: values: - 3.5.5 - 3.5.6 + - 4.0.0 # Alternatively, if you want to use a custom image, append a comma and the full image name to the product version # as in the example below. # - 3.5.6,oci.stackable.tech/sandbox/spark-k8s:3.5.6-stackable0.0.0-dev + - name: spark-logging + values: + - 3.5.5 + - 3.5.6 + - name: spark-hbase-connector + values: + - 3.5.5 + - 3.5.6 - name: spark-delta-lake values: - 3.5.6 # - 3.5.6,oci.stackable.tech/sandbox/spark-k8s:3.5.6-stackable0.0.0-dev - - name: spark-connect + - name: spark-iceberg values: - 3.5.6 - # - 3.5.6,oci.stackable.tech/sandbox/spark-k8s:3.5.6-stackable0.0.0-dev - - name: spark-connect-client + - name: spark-connect values: - 3.5.6 + - 4.0.0 # - 3.5.6,oci.stackable.tech/sandbox/spark-k8s:3.5.6-stackable0.0.0-dev - name: hbase values: @@ -85,12 +94,12 @@ tests: - openshift - name: logging dimensions: - - spark + - spark-logging - ny-tlc-report - openshift - name: iceberg dimensions: - - spark + - spark-iceberg - openshift - name: delta-lake dimensions: @@ -99,7 +108,7 @@ tests: - openshift - name: hbase-connector dimensions: - - spark + - spark-hbase-connector - hbase - hdfs-latest - zookeeper-latest @@ -113,7 +122,6 @@ tests: - name: spark-connect dimensions: - spark-connect - - spark-connect-client - openshift suites: