|
|
spark2.4 arrow enabled true,error log not returned,in spark 2.3,There's no such problem.
1、spark.sql.execution.arrow.enabled=true yarn log:
18/12/15 14:35:52 INFO CodeGenerator: Code generated in 1030.698785 ms 18/12/15 14:35:54 INFO PythonRunner: Times: total = 1985, boot = 1892, init = 92, finish = 1 18/12/15 14:35:54 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 1799 bytes result sent to driver 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 1 18/12/15 14:35:55 INFO Executor: Running task 0.0 in stage 1.0 (TID 1) 18/12/15 14:35:55 INFO TorrentBroadcast: Started reading broadcast variable 1 18/12/15 14:35:55 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 8.3 KB, free 1048.8 MB) 18/12/15 14:35:55 INFO TorrentBroadcast: Reading broadcast variable 1 took 18 ms 18/12/15 14:35:55 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 14.0 KB, free 1048.8 MB) 18/12/15 14:35:55 INFO CodeGenerator: Code generated in 30.269745 ms 18/12/15 14:35:55 INFO PythonRunner: Times: total = 13, boot = 5, init = 7, finish = 1 18/12/15 14:35:55 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 1893 bytes result sent to driver 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 2 18/12/15 14:35:55 INFO Executor: Running task 1.0 in stage 1.0 (TID 2) 18/12/15 14:35:55 ERROR Executor: Exception in task 1.0 in stage 1.0 (TID 2) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 3 18/12/15 14:35:55 INFO Executor: Running task 1.1 in stage 1.0 (TID 3) 18/12/15 14:35:55 ERROR Executor: Exception in task 1.1 in stage 1.0 (TID 3) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 4 18/12/15 14:35:55 INFO Executor: Running task 1.2 in stage 1.0 (TID 4) 18/12/15 14:35:55 ERROR Executor: Exception in task 1.2 in stage 1.0 (TID 4) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 5 18/12/15 14:35:55 INFO Executor: Running task 1.3 in stage 1.0 (TID 5) 18/12/15 14:35:56 ERROR Executor: Exception in task 1.3 in stage 1.0 (TID 5) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:37:25 ERROR CoarseGrainedExecutorBackend: Executor self-exiting due to : Driver dp199:37391 disassociated! Shutting down. 18/12/15 14:37:25 INFO DiskBlockManager: Shutdown hook called 18/12/15 14:37:25 INFO ShutdownHookManager: Shutdown hook called
2、spark.sql.execution.arrow.enabled=false
yarn log:
18/12/15 14:24:51 INFO CodeGenerator: Code generated in 1850.239094 ms 18/12/15 14:24:51 INFO PythonRunner: Times: total = 2391, boot = 2312, init = 79, finish = 0 18/12/15 14:24:51 INFO PythonRunner: Times: total = 105, boot = 4, init = 100, finish = 1 18/12/15 14:24:51 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 1756 bytes result sent to driver 18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 1 18/12/15 14:24:52 INFO Executor: Running task 0.0 in stage 1.0 (TID 1) 18/12/15 14:24:52 INFO TorrentBroadcast: Started reading broadcast variable 1 18/12/15 14:24:52 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 8.7 KB, free 1048.8 MB) 18/12/15 14:24:52 INFO TorrentBroadcast: Reading broadcast variable 1 took 11 ms 18/12/15 14:24:52 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 15.4 KB, free 1048.8 MB) 18/12/15 14:24:52 INFO PythonRunner: Times: total = 3, boot = -3617, init = 3620, finish = 0 18/12/15 14:24:52 INFO CodeGenerator: Code generated in 50.625969 ms 18/12/15 14:24:52 INFO PythonRunner: Times: total = 16, boot = 5, init = 10, finish = 1 18/12/15 14:24:52 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 1973 bytes result sent to driver 18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 2 18/12/15 14:24:52 INFO Executor: Running task 1.0 in stage 1.0 (TID 2) 18/12/15 14:24:52 INFO PythonRunner: Times: total = 48, boot = -72, init = 120, finish = 0 18/12/15 14:24:52 ERROR Executor: Exception in task 1.0 in stage 1.0 (TID 2) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a' at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 3 18/12/15 14:24:52 INFO Executor: Running task 1.1 in stage 1.0 (TID 3) 18/12/15 14:24:52 INFO PythonRunner: Times: total = 46, boot = -57, init = 102, finish = 1 18/12/15 14:24:52 ERROR Executor: Exception in task 1.1 in stage 1.0 (TID 3) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a' at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 4 18/12/15 14:24:52 INFO Executor: Running task 1.2 in stage 1.0 (TID 4) 18/12/15 14:24:53 INFO PythonRunner: Times: total = 709, boot = -12, init = 721, finish = 0 18/12/15 14:24:53 ERROR Executor: Exception in task 1.2 in stage 1.0 (TID 4) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a' at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:24:53 INFO CoarseGrainedExecutorBackend: Got assigned task 5 18/12/15 14:24:53 INFO Executor: Running task 1.3 in stage 1.0 (TID 5) 18/12/15 14:24:53 INFO PythonRunner: Times: total = 2, boot = -24, init = 26, finish = 0 18/12/15 14:24:53 ERROR Executor: Exception in task 1.3 in stage 1.0 (TID 5) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a' at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:26:39 ERROR CoarseGrainedExecutorBackend: Executor self-exiting due to : Driver dp253:7665 disassociated! Shutting down. 18/12/15 14:26:39 INFO DiskBlockManager: Shutdown hook called 18/12/15 14:26:39 INFO ShutdownHookManager: Shutdown hook called
|
|
Hi,
our project includes this dependency by:
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka_2.11</artifactId>
<version>1.6.3</version>
</dependency>
From dependency tree, we can see it dependency kafka_2.11:0.8.2.1 verson.
But when we move this dependency to parent pom file, the dependency "kafka_2.11" version is changed automatically. Why?
You can see Kafka_2.11 version is changed to 1.0.2? It's so strange, in out project we DO NOT dependency this jar in other places.
|
|
Hi, could you please clarify if you are running a YARN cluster when you see this problem? I tried on Spark standalone and could not reproduce. If it's on a YARN cluster, please file a JIRA and I can try to investigate further.
Thanks, Bryan
spark2.4 arrow enabled true,error log not returned,in spark 2.3,There's no such problem.
1、spark.sql.execution.arrow.enabled=true yarn log:
18/12/15 14:35:52 INFO CodeGenerator: Code generated in 1030.698785 ms 18/12/15 14:35:54 INFO PythonRunner: Times: total = 1985, boot = 1892, init = 92, finish = 1 18/12/15 14:35:54 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 1799 bytes result sent to driver 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 1 18/12/15 14:35:55 INFO Executor: Running task 0.0 in stage 1.0 (TID 1) 18/12/15 14:35:55 INFO TorrentBroadcast: Started reading broadcast variable 1 18/12/15 14:35:55 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 8.3 KB, free 1048.8 MB) 18/12/15 14:35:55 INFO TorrentBroadcast: Reading broadcast variable 1 took 18 ms 18/12/15 14:35:55 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 14.0 KB, free 1048.8 MB) 18/12/15 14:35:55 INFO CodeGenerator: Code generated in 30.269745 ms 18/12/15 14:35:55 INFO PythonRunner: Times: total = 13, boot = 5, init = 7, finish = 1 18/12/15 14:35:55 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 1893 bytes result sent to driver 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 2 18/12/15 14:35:55 INFO Executor: Running task 1.0 in stage 1.0 (TID 2) 18/12/15 14:35:55 ERROR Executor: Exception in task 1.0 in stage 1.0 (TID 2) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 3 18/12/15 14:35:55 INFO Executor: Running task 1.1 in stage 1.0 (TID 3) 18/12/15 14:35:55 ERROR Executor: Exception in task 1.1 in stage 1.0 (TID 3) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 4 18/12/15 14:35:55 INFO Executor: Running task 1.2 in stage 1.0 (TID 4) 18/12/15 14:35:55 ERROR Executor: Exception in task 1.2 in stage 1.0 (TID 4) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 5 18/12/15 14:35:55 INFO Executor: Running task 1.3 in stage 1.0 (TID 5) 18/12/15 14:35:56 ERROR Executor: Exception in task 1.3 in stage 1.0 (TID 5) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:37:25 ERROR CoarseGrainedExecutorBackend: Executor self-exiting due to : Driver dp199:37391 disassociated! Shutting down. 18/12/15 14:37:25 INFO DiskBlockManager: Shutdown hook called 18/12/15 14:37:25 INFO ShutdownHookManager: Shutdown hook called
2、spark.sql.execution.arrow.enabled=false
yarn log:
18/12/15 14:24:51 INFO CodeGenerator: Code generated in 1850.239094 ms 18/12/15 14:24:51 INFO PythonRunner: Times: total = 2391, boot = 2312, init = 79, finish = 0 18/12/15 14:24:51 INFO PythonRunner: Times: total = 105, boot = 4, init = 100, finish = 1 18/12/15 14:24:51 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 1756 bytes result sent to driver 18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 1 18/12/15 14:24:52 INFO Executor: Running task 0.0 in stage 1.0 (TID 1) 18/12/15 14:24:52 INFO TorrentBroadcast: Started reading broadcast variable 1 18/12/15 14:24:52 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 8.7 KB, free 1048.8 MB) 18/12/15 14:24:52 INFO TorrentBroadcast: Reading broadcast variable 1 took 11 ms 18/12/15 14:24:52 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 15.4 KB, free 1048.8 MB) 18/12/15 14:24:52 INFO PythonRunner: Times: total = 3, boot = -3617, init = 3620, finish = 0 18/12/15 14:24:52 INFO CodeGenerator: Code generated in 50.625969 ms 18/12/15 14:24:52 INFO PythonRunner: Times: total = 16, boot = 5, init = 10, finish = 1 18/12/15 14:24:52 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 1973 bytes result sent to driver 18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 2 18/12/15 14:24:52 INFO Executor: Running task 1.0 in stage 1.0 (TID 2) 18/12/15 14:24:52 INFO PythonRunner: Times: total = 48, boot = -72, init = 120, finish = 0 18/12/15 14:24:52 ERROR Executor: Exception in task 1.0 in stage 1.0 (TID 2) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a' at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 3 18/12/15 14:24:52 INFO Executor: Running task 1.1 in stage 1.0 (TID 3) 18/12/15 14:24:52 INFO PythonRunner: Times: total = 46, boot = -57, init = 102, finish = 1 18/12/15 14:24:52 ERROR Executor: Exception in task 1.1 in stage 1.0 (TID 3) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a' at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 4 18/12/15 14:24:52 INFO Executor: Running task 1.2 in stage 1.0 (TID 4) 18/12/15 14:24:53 INFO PythonRunner: Times: total = 709, boot = -12, init = 721, finish = 0 18/12/15 14:24:53 ERROR Executor: Exception in task 1.2 in stage 1.0 (TID 4) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a' at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:24:53 INFO CoarseGrainedExecutorBackend: Got assigned task 5 18/12/15 14:24:53 INFO Executor: Running task 1.3 in stage 1.0 (TID 5) 18/12/15 14:24:53 INFO PythonRunner: Times: total = 2, boot = -24, init = 26, finish = 0 18/12/15 14:24:53 ERROR Executor: Exception in task 1.3 in stage 1.0 (TID 5) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main process() File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream vs = list(itertools.islice(iterator, batch)) File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper return f(*args, **kwargs) File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", line 435, in mapfunc ValueError: could not convert string to float: 'a' at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 18/12/15 14:26:39 ERROR CoarseGrainedExecutorBackend: Executor self-exiting due to : Driver dp253:7665 disassociated! Shutting down. 18/12/15 14:26:39 INFO DiskBlockManager: Shutdown hook called 18/12/15 14:26:39 INFO ShutdownHookManager: Shutdown hook called
|
|
Do you mean you run the same code on yarn and standalone? Can you check if they are running the same python versions?
From: Bryan Cutler <[hidden email]>
Sent: Thursday, January 10, 2019 5:29 PM
To: [hidden email]
Cc: zlist Spark
Subject: Re: spark2.4 arrow enabled true,error log not returned
Hi, could you please clarify if you are running a YARN cluster when you see this problem? I tried on Spark standalone and could not reproduce. If it's on a YARN cluster, please file a JIRA
and I can try to investigate further.
Thanks,
Bryan
spark2.4 arrow enabled true,error log not returned,in spark 2.3,There's no such problem.
1、spark.sql.execution.arrow.enabled=true
yarn log:
18/12/15 14:35:52 INFO CodeGenerator: Code generated in 1030.698785 ms
18/12/15 14:35:54 INFO PythonRunner: Times: total = 1985, boot = 1892, init = 92, finish = 1
18/12/15 14:35:54 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 1799 bytes result sent to driver
18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 1
18/12/15 14:35:55 INFO Executor: Running task 0.0 in stage 1.0 (TID 1)
18/12/15 14:35:55 INFO TorrentBroadcast: Started reading broadcast variable 1
18/12/15 14:35:55 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 8.3 KB, free 1048.8 MB)
18/12/15 14:35:55 INFO TorrentBroadcast: Reading broadcast variable 1 took 18 ms
18/12/15 14:35:55 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 14.0 KB, free 1048.8 MB)
18/12/15 14:35:55 INFO CodeGenerator: Code generated in 30.269745 ms
18/12/15 14:35:55 INFO PythonRunner: Times: total = 13, boot = 5, init = 7, finish = 1
18/12/15 14:35:55 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 1893 bytes result sent to driver
18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 2
18/12/15 14:35:55 INFO Executor: Running task 1.0 in stage 1.0 (TID 2)
18/12/15 14:35:55 ERROR Executor: Exception in task 1.0 in stage 1.0 (TID 2)
org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main
process()
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process
serializer.dump_stream(func(split_index, iterator), outfile)
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream
vs = list(itertools.islice(iterator, batch))
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper
return f(*args, **kwargs)
File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", line 435, in mapfunc
ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97)
at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314)
at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 3
18/12/15 14:35:55 INFO Executor: Running task 1.1 in stage 1.0 (TID 3)
18/12/15 14:35:55 ERROR Executor: Exception in task 1.1 in stage 1.0 (TID 3)
org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main
process()
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process
serializer.dump_stream(func(split_index, iterator), outfile)
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream
vs = list(itertools.islice(iterator, batch))
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper
return f(*args, **kwargs)
File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", line 435, in mapfunc
ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97)
at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314)
at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 4
18/12/15 14:35:55 INFO Executor: Running task 1.2 in stage 1.0 (TID 4)
18/12/15 14:35:55 ERROR Executor: Exception in task 1.2 in stage 1.0 (TID 4)
org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main
process()
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process
serializer.dump_stream(func(split_index, iterator), outfile)
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream
vs = list(itertools.islice(iterator, batch))
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper
return f(*args, **kwargs)
File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", line 435, in mapfunc
ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97)
at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314)
at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 5
18/12/15 14:35:55 INFO Executor: Running task 1.3 in stage 1.0 (TID 5)
18/12/15 14:35:56 ERROR Executor: Exception in task 1.3 in stage 1.0 (TID 5)
org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main
process()
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process
serializer.dump_stream(func(split_index, iterator), outfile)
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream
vs = list(itertools.islice(iterator, batch))
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper
return f(*args, **kwargs)
File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", line 435, in mapfunc
ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to(ArrowConverters.scala:97)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97)
at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314)
at org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
18/12/15 14:37:25 ERROR CoarseGrainedExecutorBackend: Executor self-exiting due to : Driver dp199:37391 disassociated! Shutting down.
18/12/15 14:37:25 INFO DiskBlockManager: Shutdown hook called
18/12/15 14:37:25 INFO ShutdownHookManager: Shutdown hook called
2、spark.sql.execution.arrow.enabled=false
yarn log:
18/12/15 14:24:51 INFO CodeGenerator: Code generated in 1850.239094 ms
18/12/15 14:24:51 INFO PythonRunner: Times: total = 2391, boot = 2312, init = 79, finish = 0
18/12/15 14:24:51 INFO PythonRunner: Times: total = 105, boot = 4, init = 100, finish = 1
18/12/15 14:24:51 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 1756 bytes result sent to driver
18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 1
18/12/15 14:24:52 INFO Executor: Running task 0.0 in stage 1.0 (TID 1)
18/12/15 14:24:52 INFO TorrentBroadcast: Started reading broadcast variable 1
18/12/15 14:24:52 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 8.7 KB, free 1048.8 MB)
18/12/15 14:24:52 INFO TorrentBroadcast: Reading broadcast variable 1 took 11 ms
18/12/15 14:24:52 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 15.4 KB, free 1048.8 MB)
18/12/15 14:24:52 INFO PythonRunner: Times: total = 3, boot = -3617, init = 3620, finish = 0
18/12/15 14:24:52 INFO CodeGenerator: Code generated in 50.625969 ms
18/12/15 14:24:52 INFO PythonRunner: Times: total = 16, boot = 5, init = 10, finish = 1
18/12/15 14:24:52 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 1973 bytes result sent to driver
18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 2
18/12/15 14:24:52 INFO Executor: Running task 1.0 in stage 1.0 (TID 2)
18/12/15 14:24:52 INFO PythonRunner: Times: total = 48, boot = -72, init = 120, finish = 0
18/12/15 14:24:52 ERROR Executor: Exception in task 1.0 in stage 1.0 (TID 2)
org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main
process()
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process
serializer.dump_stream(func(split_index, iterator), outfile)
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream
vs = list(itertools.islice(iterator, batch))
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper
return f(*args, **kwargs)
File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", line 435, in mapfunc
ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 3
18/12/15 14:24:52 INFO Executor: Running task 1.1 in stage 1.0 (TID 3)
18/12/15 14:24:52 INFO PythonRunner: Times: total = 46, boot = -57, init = 102, finish = 1
18/12/15 14:24:52 ERROR Executor: Exception in task 1.1 in stage 1.0 (TID 3)
org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main
process()
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process
serializer.dump_stream(func(split_index, iterator), outfile)
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream
vs = list(itertools.islice(iterator, batch))
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper
return f(*args, **kwargs)
File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", line 435, in mapfunc
ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 4
18/12/15 14:24:52 INFO Executor: Running task 1.2 in stage 1.0 (TID 4)
18/12/15 14:24:53 INFO PythonRunner: Times: total = 709, boot = -12, init = 721, finish = 0
18/12/15 14:24:53 ERROR Executor: Exception in task 1.2 in stage 1.0 (TID 4)
org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main
process()
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process
serializer.dump_stream(func(split_index, iterator), outfile)
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream
vs = list(itertools.islice(iterator, batch))
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper
return f(*args, **kwargs)
File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", line 435, in mapfunc
ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
18/12/15 14:24:53 INFO CoarseGrainedExecutorBackend: Got assigned task 5
18/12/15 14:24:53 INFO Executor: Running task 1.3 in stage 1.0 (TID 5)
18/12/15 14:24:53 INFO PythonRunner: Times: total = 2, boot = -24, init = 26, finish = 0
18/12/15 14:24:53 ERROR Executor: Exception in task 1.3 in stage 1.0 (TID 5)
org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 377, in main
process()
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line 372, in process
serializer.dump_stream(func(split_index, iterator), outfile)
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", line 390, in dump_stream
vs = list(itertools.islice(iterator, batch))
File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, in wrapper
return f(*args, **kwargs)
File "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", line 435, in mapfunc
ValueError: could not convert string to float: 'a'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
18/12/15 14:26:39 ERROR CoarseGrainedExecutorBackend: Executor self-exiting due to : Driver dp253:7665 disassociated! Shutting down.
18/12/15 14:26:39 INFO DiskBlockManager: Shutdown hook called
18/12/15 14:26:39 INFO ShutdownHookManager: Shutdown hook called
|
|