Issue after change to 3.0.2

classic Classic list List threaded Threaded
4 messages Options
Reply | Threaded
Open this post in threaded view
|

Issue after change to 3.0.2

Bode, Meikel, NMA-CFD

Hi All,

 

After changing to 3.0.2 I face the following issue. Thanks for any hint on that issue.

 

Best,

Meikel

 

   df = self.spark.read.json(path_in)

  File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 300, in json

  File "/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1304, in __call__

  File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 128, in deco

  File "/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line 326, in get_return_value

py4j.protocol.Py4JJavaError: An error occurred while calling o76.json.

: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 14, 192.168.1.6, executor 0): java.io.InvalidClassException: org.apache.spark.broadcast.TorrentBroadcast; local class incompatible: stream classdesc serialVersionUID = 4804550167553929379, local class serialVersionUID = 3291767831129286585

        at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699)

        at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2003)

        at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1850)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2160)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405)

        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)

        at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76)

        at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115)

        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:407)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)

        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)

        at java.lang.Thread.run(Thread.java:748)

 

Driver stacktrace:

        at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2059)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2008)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2007)

        at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)

        at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)

        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)

        at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2007)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:973)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:973)

        at scala.Option.foreach(Option.scala:407)

        at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:973)

        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2239)

        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2188)

        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2177)

        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)

        at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:775)

        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2114)

        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2209)

        at org.apache.spark.sql.catalyst.json.JsonInferSchema.infer(JsonInferSchema.scala:94)

        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.$anonfun$inferFromDataset$5(JsonDataSource.scala:110)

        at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)

        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.inferFromDataset(JsonDataSource.scala:110)

        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.infer(JsonDataSource.scala:99)

        at org.apache.spark.sql.execution.datasources.json.JsonDataSource.inferSchema(JsonDataSource.scala:65)

        at org.apache.spark.sql.execution.datasources.json.JsonFileFormat.inferSchema(JsonFileFormat.scala:61)

        at org.apache.spark.sql.execution.datasources.DataSource.$anonfun$getOrInferFileFormatSchema$11(DataSource.scala:208)

        at scala.Option.orElse(Option.scala:447)

        at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:205)

        at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:418)

        at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:297)

        at org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:286)

        at scala.Option.getOrElse(Option.scala:189)

        at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:286)

        at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:477)

        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)

        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

        at java.lang.reflect.Method.invoke(Method.java:498)

        at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)

        at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)

        at py4j.Gateway.invoke(Gateway.java:282)

        at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)

        at py4j.commands.CallCommand.execute(CallCommand.java:79)

        at py4j.GatewayConnection.run(GatewayConnection.java:238)

        at java.lang.Thread.run(Thread.java:748)

Caused by: java.io.InvalidClassException: org.apache.spark.broadcast.TorrentBroadcast; local class incompatible: stream classdesc serialVersionUID = 4804550167553929379, local class serialVersionUID = 3291767831129286585

        at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699)

        at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2003)

        at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1850)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2160)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405)

        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)

        at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76)

        at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115)

        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:407)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)

        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)

        ... 1 more

Reply | Threaded
Open this post in threaded view
|

Re: Issue after change to 3.0.2

Mich Talebzadeh
So you have upgraded to Spark  3.0.2?

How are you running your pyspark? Is this through python virtual env or spark-submit? Sounds like it cannot create executor

Can you run it in local mode?

spark-submit --master local[1] --deploy-mode client <your python.py file>

Check also values for PYSPARK_PYTHON and PYSPARK_DRIVER_PYTHON


HTH



LinkedIn  https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw

 



Disclaimer: Use it at your own risk. Any and all responsibility for any loss, damage or destruction of data or any other property which may arise from relying on this email's technical content is explicitly disclaimed. The author will in no case be liable for any monetary damages arising from such loss, damage or destruction.

 



On Fri, 26 Feb 2021 at 08:53, Bode, Meikel, NMA-CFD <[hidden email]> wrote:

Hi All,

 

After changing to 3.0.2 I face the following issue. Thanks for any hint on that issue.

 

Best,

Meikel

 

   df = self.spark.read.json(path_in)

  File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 300, in json

  File "/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1304, in __call__

  File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 128, in deco

  File "/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line 326, in get_return_value

py4j.protocol.Py4JJavaError: An error occurred while calling o76.json.

: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 14, 192.168.1.6, executor 0): java.io.InvalidClassException: org.apache.spark.broadcast.TorrentBroadcast; local class incompatible: stream classdesc serialVersionUID = 4804550167553929379, local class serialVersionUID = 3291767831129286585

        at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699)

        at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2003)

        at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1850)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2160)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405)

        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)

        at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76)

        at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115)

        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:407)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)

        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)

        at java.lang.Thread.run(Thread.java:748)

 

Driver stacktrace:

        at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2059)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2008)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2007)

        at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)

        at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)

        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)

        at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2007)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:973)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:973)

        at scala.Option.foreach(Option.scala:407)

        at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:973)

        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2239)

        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2188)

        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2177)

        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)

        at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:775)

        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2114)

        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2209)

        at org.apache.spark.sql.catalyst.json.JsonInferSchema.infer(JsonInferSchema.scala:94)

        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.$anonfun$inferFromDataset$5(JsonDataSource.scala:110)

        at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)

        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.inferFromDataset(JsonDataSource.scala:110)

        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.infer(JsonDataSource.scala:99)

        at org.apache.spark.sql.execution.datasources.json.JsonDataSource.inferSchema(JsonDataSource.scala:65)

        at org.apache.spark.sql.execution.datasources.json.JsonFileFormat.inferSchema(JsonFileFormat.scala:61)

        at org.apache.spark.sql.execution.datasources.DataSource.$anonfun$getOrInferFileFormatSchema$11(DataSource.scala:208)

        at scala.Option.orElse(Option.scala:447)

        at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:205)

        at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:418)

        at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:297)

        at org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:286)

        at scala.Option.getOrElse(Option.scala:189)

        at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:286)

        at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:477)

        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)

        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

        at java.lang.reflect.Method.invoke(Method.java:498)

        at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)

        at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)

        at py4j.Gateway.invoke(Gateway.java:282)

        at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)

        at py4j.commands.CallCommand.execute(CallCommand.java:79)

        at py4j.GatewayConnection.run(GatewayConnection.java:238)

        at java.lang.Thread.run(Thread.java:748)

Caused by: java.io.InvalidClassException: org.apache.spark.broadcast.TorrentBroadcast; local class incompatible: stream classdesc serialVersionUID = 4804550167553929379, local class serialVersionUID = 3291767831129286585

        at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699)

        at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2003)

        at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1850)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2160)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405)

        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)

        at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76)

        at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115)

        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:407)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)

        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)

        ... 1 more

Reply | Threaded
Open this post in threaded view
|

Re: Issue after change to 3.0.2

srowen
In reply to this post by Bode, Meikel, NMA-CFD
That looks to me like you have two different versions of Spark in use somewhere here. Like the cluster and driver versions aren't quite the same. Check your classpaths?

On Fri, Feb 26, 2021 at 2:53 AM Bode, Meikel, NMA-CFD <[hidden email]> wrote:

Hi All,

 

After changing to 3.0.2 I face the following issue. Thanks for any hint on that issue.

 

Best,

Meikel

 

   df = self.spark.read.json(path_in)

  File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 300, in json

  File "/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1304, in __call__

  File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 128, in deco

  File "/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line 326, in get_return_value

py4j.protocol.Py4JJavaError: An error occurred while calling o76.json.

: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 14, 192.168.1.6, executor 0): java.io.InvalidClassException: org.apache.spark.broadcast.TorrentBroadcast; local class incompatible: stream classdesc serialVersionUID = 4804550167553929379, local class serialVersionUID = 3291767831129286585

        at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699)

        at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2003)

        at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1850)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2160)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405)

        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)

        at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76)

        at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115)

        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:407)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)

        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)

        at java.lang.Thread.run(Thread.java:748)

 

Driver stacktrace:

        at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2059)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2008)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2007)

        at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)

        at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)

        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)

        at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2007)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:973)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:973)

        at scala.Option.foreach(Option.scala:407)

        at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:973)

        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2239)

        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2188)

        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2177)

        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)

        at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:775)

        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2114)

        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2209)

        at org.apache.spark.sql.catalyst.json.JsonInferSchema.infer(JsonInferSchema.scala:94)

        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.$anonfun$inferFromDataset$5(JsonDataSource.scala:110)

        at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)

        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.inferFromDataset(JsonDataSource.scala:110)

        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.infer(JsonDataSource.scala:99)

        at org.apache.spark.sql.execution.datasources.json.JsonDataSource.inferSchema(JsonDataSource.scala:65)

        at org.apache.spark.sql.execution.datasources.json.JsonFileFormat.inferSchema(JsonFileFormat.scala:61)

        at org.apache.spark.sql.execution.datasources.DataSource.$anonfun$getOrInferFileFormatSchema$11(DataSource.scala:208)

        at scala.Option.orElse(Option.scala:447)

        at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:205)

        at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:418)

        at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:297)

        at org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:286)

        at scala.Option.getOrElse(Option.scala:189)

        at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:286)

        at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:477)

        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)

        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

        at java.lang.reflect.Method.invoke(Method.java:498)

        at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)

        at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)

        at py4j.Gateway.invoke(Gateway.java:282)

        at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)

        at py4j.commands.CallCommand.execute(CallCommand.java:79)

        at py4j.GatewayConnection.run(GatewayConnection.java:238)

        at java.lang.Thread.run(Thread.java:748)

Caused by: java.io.InvalidClassException: org.apache.spark.broadcast.TorrentBroadcast; local class incompatible: stream classdesc serialVersionUID = 4804550167553929379, local class serialVersionUID = 3291767831129286585

        at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699)

        at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2003)

        at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1850)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2160)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405)

        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)

        at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76)

        at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115)

        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:407)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)

        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)

        ... 1 more

Reply | Threaded
Open this post in threaded view
|

AW: Issue after change to 3.0.2

Bode, Meikel, NMA-CFD

Hi Sean.

 

You are right. So we are using docker images for our spark cluster. The generation of the worker image did no succeed and therefore the old 3.0.1 image was still in use.

 

Thanks,

Best,

Meikel

 

Von: Sean Owen <[hidden email]>
Gesendet: Freitag, 26. Februar 2021 10:29
An: Bode, Meikel, NMA-CFD <[hidden email]>
Cc: user @spark <[hidden email]>
Betreff: Re: Issue after change to 3.0.2

 

That looks to me like you have two different versions of Spark in use somewhere here. Like the cluster and driver versions aren't quite the same. Check your classpaths?

 

On Fri, Feb 26, 2021 at 2:53 AM Bode, Meikel, NMA-CFD <[hidden email]> wrote:

Hi All,

 

After changing to 3.0.2 I face the following issue. Thanks for any hint on that issue.

 

Best,

Meikel

 

   df = self.spark.read.json(path_in)

  File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 300, in json

  File "/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1304, in __call__

  File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 128, in deco

  File "/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line 326, in get_return_value

py4j.protocol.Py4JJavaError: An error occurred while calling o76.json.

: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 14, 192.168.1.6, executor 0): java.io.InvalidClassException: org.apache.spark.broadcast.TorrentBroadcast; local class incompatible: stream classdesc serialVersionUID = 4804550167553929379, local class serialVersionUID = 3291767831129286585

        at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699)

        at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2003)

        at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1850)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2160)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405)

        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)

        at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76)

        at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115)

        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:407)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)

        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)

        at java.lang.Thread.run(Thread.java:748)

 

Driver stacktrace:

        at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2059)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2008)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2007)

        at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)

        at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)

        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)

        at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2007)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:973)

        at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:973)

        at scala.Option.foreach(Option.scala:407)

        at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:973)

        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2239)

        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2188)

        at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2177)

        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)

        at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:775)

        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2114)

        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2209)

        at org.apache.spark.sql.catalyst.json.JsonInferSchema.infer(JsonInferSchema.scala:94)

        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.$anonfun$inferFromDataset$5(JsonDataSource.scala:110)

        at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)

        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.inferFromDataset(JsonDataSource.scala:110)

        at org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.infer(JsonDataSource.scala:99)

        at org.apache.spark.sql.execution.datasources.json.JsonDataSource.inferSchema(JsonDataSource.scala:65)

        at org.apache.spark.sql.execution.datasources.json.JsonFileFormat.inferSchema(JsonFileFormat.scala:61)

        at org.apache.spark.sql.execution.datasources.DataSource.$anonfun$getOrInferFileFormatSchema$11(DataSource.scala:208)

        at scala.Option.orElse(Option.scala:447)

        at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:205)

        at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:418)

        at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:297)

        at org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:286)

        at scala.Option.getOrElse(Option.scala:189)

        at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:286)

        at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:477)

        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)

        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

        at java.lang.reflect.Method.invoke(Method.java:498)

        at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)

        at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)

        at py4j.Gateway.invoke(Gateway.java:282)

        at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)

        at py4j.commands.CallCommand.execute(CallCommand.java:79)

        at py4j.GatewayConnection.run(GatewayConnection.java:238)

        at java.lang.Thread.run(Thread.java:748)

Caused by: java.io.InvalidClassException: org.apache.spark.broadcast.TorrentBroadcast; local class incompatible: stream classdesc serialVersionUID = 4804550167553929379, local class serialVersionUID = 3291767831129286585

        at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699)

        at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2003)

        at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1850)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2160)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405)

        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329)

        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187)

        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)

        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)

        at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76)

        at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115)

        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:407)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)

        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)

        ... 1 more