In windows 10, accessing Hive from PySpark with PyCharm throws error

classic Classic list List threaded Threaded
5 messages Options
Reply | Threaded
Open this post in threaded view
|

In windows 10, accessing Hive from PySpark with PyCharm throws error

Mich Talebzadeh
Hi,

I have a simple code that tries to create Hive derby database as follows:

from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import HiveContext
from pyspark.sql import SparkSession
from pyspark.sql import Row
from pyspark.sql.types import StringType, ArrayType
from pyspark.sql.functions import udf, col, max as max, to_date, date_add, \
add_months
from datetime import datetime, timedelta
import os
from os.path import join, abspath
from typing import Optional
import logging
import random
import string
import math
warehouseLocation = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\spark-warehouse'
local_scrtatchdir = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\hive-localscratchdir'
scrtatchdir = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\hive-scratchdir'
tmp_dir = 'd:\\temp\\hive'
metastore_db = 'jdbc:derby:C:\\Users\\admin\\PycharmProjects\\pythonProject\\metastore_db;create=true'
ConnectionDriverName = 'org.apache.derby.EmbeddedDriver'
spark = SparkSession \
.builder \
.appName("App1") \
.config("hive.exec.local.scratchdir", local_scrtatchdir) \
.config("hive.exec.scratchdir", scrtatchdir) \
.config("spark.sql.warehouse.dir", warehouseLocation) \
.config("hadoop.tmp.dir", tmp_dir) \
.config("javax.jdo.option.ConnectionURL", metastore_db ) \
.config("javax.jdo.option.ConnectionDriverName", ConnectionDriverName) \
.enableHiveSupport() \
.getOrCreate()
print(os.listdir(warehouseLocation))
print(os.listdir(local_scrtatchdir))
print(os.listdir(scrtatchdir))
print(os.listdir(tmp_dir))
sc = SparkContext.getOrCreate()
sqlContext = SQLContext(sc)
HiveContext = HiveContext(sc)
spark.sql("CREATE DATABASE IF NOT EXISTS test")

Now this comes back with the following:


C:\Users\admin\PycharmProjects\pythonProject\venv\Scripts\python.exe C:/Users/admin/PycharmProjects/pythonProject/main.py

Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties

Setting default log level to "WARN".

To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).

[]

[]

[]

['hive-localscratchdir', 'hive-scratchdir', 'hive-warehouse']

Traceback (most recent call last):

  File "C:/Users/admin/PycharmProjects/pythonProject/main.py", line 76, in <module>

    spark.sql("CREATE DATABASE IF NOT EXISTS test")

  File "D:\temp\spark\python\pyspark\sql\session.py", line 649, in sql

    return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)

  File "D:\temp\spark\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py", line 1305, in __call__

  File "D:\temp\spark\python\pyspark\sql\utils.py", line 134, in deco

    raise_from(converted)

  File "<string>", line 3, in raise_from

pyspark.sql.utils.AnalysisException: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.createDirectoryWithMode0(Ljava/lang/String;I)V;


Process finished with exit code 1


Also under %SPARK_HOME%/conf I also have hive-site.xml file. It is not obvious to me why it is throwing this error?

Thanks


LinkedIn  https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw

 



Disclaimer: Use it at your own risk. Any and all responsibility for any loss, damage or destruction of data or any other property which may arise from relying on this email's technical content is explicitly disclaimed. The author will in no case be liable for any monetary damages arising from such loss, damage or destruction.

 

Reply | Threaded
Open this post in threaded view
|

Re: In windows 10, accessing Hive from PySpark with PyCharm throws error

Artemis User

Apparently this is a OS dynamic lib link error.  Make sure you have the LD_LIBRARY_PATH (in Linux) or PATH (windows) set up properly for the right .so or .dll file...

On 12/2/20 5:31 PM, Mich Talebzadeh wrote:
Hi,

I have a simple code that tries to create Hive derby database as follows:

from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import HiveContext
from pyspark.sql import SparkSession
from pyspark.sql import Row
from pyspark.sql.types import StringType, ArrayType
from pyspark.sql.functions import udf, col, max as max, to_date, date_add, \
    add_months
from datetime import datetime, timedelta
import os
from os.path import join, abspath
from typing import Optional
import logging
import random
import string
import math
warehouseLocation = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\spark-warehouse'
local_scrtatchdir = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\hive-localscratchdir'
scrtatchdir = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\hive-scratchdir'
tmp_dir = 'd:\\temp\\hive'
metastore_db = 'jdbc:derby:C:\\Users\\admin\\PycharmProjects\\pythonProject\\metastore_db;create=true'
ConnectionDriverName = 'org.apache.derby.EmbeddedDriver'
spark = SparkSession \
    .builder \
    .appName("App1") \
    .config("hive.exec.local.scratchdir", local_scrtatchdir) \
    .config("hive.exec.scratchdir", scrtatchdir) \
    .config("spark.sql.warehouse.dir", warehouseLocation) \
    .config("hadoop.tmp.dir", tmp_dir) \
    .config("javax.jdo.option.ConnectionURL", metastore_db ) \
    .config("javax.jdo.option.ConnectionDriverName", ConnectionDriverName) \
    .enableHiveSupport() \
    .getOrCreate()
print(os.listdir(warehouseLocation))
print(os.listdir(local_scrtatchdir))
print(os.listdir(scrtatchdir))
print(os.listdir(tmp_dir))
sc = SparkContext.getOrCreate()
sqlContext = SQLContext(sc)
HiveContext = HiveContext(sc)
spark.sql("CREATE DATABASE IF NOT EXISTS test")

Now this comes back with the following:


C:\Users\admin\PycharmProjects\pythonProject\venv\Scripts\python.exe C:/Users/admin/PycharmProjects/pythonProject/main.py

Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties

Setting default log level to "WARN".

To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).

[]

[]

[]

['hive-localscratchdir', 'hive-scratchdir', 'hive-warehouse']

Traceback (most recent call last):

  File "C:/Users/admin/PycharmProjects/pythonProject/main.py", line 76, in <module>

    spark.sql("CREATE DATABASE IF NOT EXISTS test")

  File "D:\temp\spark\python\pyspark\sql\session.py", line 649, in sql

    return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)

  File "D:\temp\spark\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py", line 1305, in __call__

  File "D:\temp\spark\python\pyspark\sql\utils.py", line 134, in deco

    raise_from(converted)

  File "<string>", line 3, in raise_from

pyspark.sql.utils.AnalysisException: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.createDirectoryWithMode0(Ljava/lang/String;I)V;


Process finished with exit code 1


Also under %SPARK_HOME%/conf I also have hive-site.xml file. It is not obvious to me why it is throwing this error?

Thanks


LinkedIn  https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw

 



Disclaimer: Use it at your own risk. Any and all responsibility for any loss, damage or destruction of data or any other property which may arise from relying on this email's technical content is explicitly disclaimed. The author will in no case be liable for any monetary damages arising from such loss, damage or destruction.

 

Reply | Threaded
Open this post in threaded view
|

Re: In windows 10, accessing Hive from PySpark with PyCharm throws error

Mich Talebzadeh
This is becoming serious pain.

using powershell I am using spark-submit as follows:

PS C:\Users\admin> spark-submit.cmd C:\Users\admin\PycharmProjects\pythonProject\main.py

WARNING: An illegal reflective access operation has occurred

WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/D:/temp/spark/jars/spark-unsafe_2.12-3.0.1.jar) to constructor java.nio.DirectByteBuffer(long,int)

WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform

WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations

WARNING: All illegal access operations will be denied in a future release

Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties

20/12/03 23:13:59 INFO SparkContext: Running Spark version 3.0.1

20/12/03 23:13:59 INFO ResourceUtils: ==============================================================

20/12/03 23:13:59 INFO ResourceUtils: Resources for spark.driver:


20/12/03 23:13:59 INFO ResourceUtils: ==============================================================

20/12/03 23:13:59 INFO SparkContext: Submitted application: App1

20/12/03 23:13:59 INFO SecurityManager: Changing view acls to: admin

20/12/03 23:13:59 INFO SecurityManager: Changing modify acls to: admin

20/12/03 23:13:59 INFO SecurityManager: Changing view acls groups to:

20/12/03 23:13:59 INFO SecurityManager: Changing modify acls groups to:

20/12/03 23:13:59 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users  with view permissions: Set(admin); groups with view permissions: Set(); users  with modify permissions: Set(admin); groups with modify permissions: Set()

20/12/03 23:14:00 INFO Utils: Successfully started service 'sparkDriver' on port 62327.

20/12/03 23:14:00 INFO SparkEnv: Registering MapOutputTracker

20/12/03 23:14:00 INFO SparkEnv: Registering BlockManagerMaster

20/12/03 23:14:01 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information

20/12/03 23:14:01 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up

20/12/03 23:14:01 INFO SparkEnv: Registering BlockManagerMasterHeartbeat

20/12/03 23:14:01 INFO DiskBlockManager: Created local directory at C:\Users\admin\AppData\Local\Temp\blockmgr-30e2019a-af60-44da-86e7-8a162d1e29da

20/12/03 23:14:01 INFO MemoryStore: MemoryStore started with capacity 434.4 MiB

20/12/03 23:14:01 INFO SparkEnv: Registering OutputCommitCoordinator

20/12/03 23:14:01 INFO Utils: Successfully started service 'SparkUI' on port 4040.

20/12/03 23:14:01 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://w7:4040

20/12/03 23:14:01 INFO Executor: Starting executor ID driver on host w7

20/12/03 23:14:01 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 62373.

20/12/03 23:14:01 INFO NettyBlockTransferService: Server created on w7:62373

20/12/03 23:14:01 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy

20/12/03 23:14:01 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, w7, 62373, None)

20/12/03 23:14:01 INFO BlockManagerMasterEndpoint: Registering block manager w7:62373 with 434.4 MiB RAM, BlockManagerId(driver, w7, 62373, None)

20/12/03 23:14:01 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, w7, 62373, None)

20/12/03 23:14:01 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, w7, 62373, None)

D:\temp\spark\python\lib\pyspark.zip\pyspark\context.py:225: DeprecationWarning: Support for Python 2 and Python 3 prior to version 3.6 is deprecated as of Spark 3.0. See also the plan for dropping Python 2 support at https://spark.apache.org/news/plan-for-dropping-python-2-support.html.

  DeprecationWarning)

20/12/03 23:14:02 INFO SharedState: loading hive config file: file:/D:/temp/spark/conf/hive-site.xml

20/12/03 23:14:02 INFO SharedState: spark.sql.warehouse.dir is not set, but hive.metastore.warehouse.dir is set. Setting spark.sql.warehouse.dir to the value of hive.metastore.warehouse.dir ('C:\Users\admin\PycharmProjects\pythonProject\spark-warehouse').

20/12/03 23:14:02 INFO SharedState: Warehouse path is 'C:\Users\admin\PycharmProjects\pythonProject\spark-warehouse'.

20/12/03 23:14:04 INFO HiveConf: Found configuration file file:/D:/temp/spark/conf/hive-site.xml

20/12/03 23:14:04 INFO HiveUtils: Initializing HiveMetastoreConnection version 2.3.7 using Spark classes.

Traceback (most recent call last):

  File "C:/Users/admin/PycharmProjects/pythonProject/main.py", line 79, in <module>

    spark.sql("CREATE DATABASE IF NOT EXISTS test")

  File "D:\temp\spark\python\lib\pyspark.zip\pyspark\sql\session.py", line 649, in sql

  File "D:\temp\spark\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py", line 1305, in __call__

  File "D:\temp\spark\python\lib\pyspark.zip\pyspark\sql\utils.py", line 134, in deco

  File "<string>", line 3, in raise_from

pyspark.sql.utils.AnalysisException: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.createDirectoryWithMode0(Ljava/lang/String;I)V;

20/12/03 23:14:04 INFO SparkContext: Invoking stop() from shutdown hook

20/12/03 23:14:04 INFO SparkUI: Stopped Spark web UI at http://w7:4040

20/12/03 23:14:04 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!

20/12/03 23:14:04 INFO MemoryStore: MemoryStore cleared

20/12/03 23:14:04 INFO BlockManager: BlockManager stopped

20/12/03 23:14:04 INFO BlockManagerMaster: BlockManagerMaster stopped

20/12/03 23:14:04 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!

20/12/03 23:14:04 INFO SparkContext: Successfully stopped SparkContext

20/12/03 23:14:04 INFO ShutdownHookManager: Shutdown hook called

20/12/03 23:14:04 INFO ShutdownHookManager: Deleting directory C:\Users\admin\AppData\Local\Temp\spark-2ccc7f91-3970-42e4-b564-6621215dd446

20/12/03 23:14:04 INFO ShutdownHookManager: Deleting directory C:\Users\admin\AppData\Local\Temp\spark-8015fc12-eff7-4d2e-b4c3-f864bf4b00ce\pyspark-12b6b74c-09a3-447f-be8b-b5aa26fa274d

20/12/03 23:14:04 INFO ShutdownHookManager: Deleting directory C:\Users\admin\AppData\Local\Temp\spark-8015fc12-eff7-4d2e-b4c3-f864bf4b00ce


So basically it finds hive-site.xml under %SPARK_HOME%/conf directory. Tries to initialise HiveMetastoreConnection but fails with error


pyspark.sql.utils.AnalysisException: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.createDirectoryWithMode0(Ljava/lang/String;I)V;


winutils.exe is put under %SPARK_HOME%/bin directory


where winutils.exe

D:\temp\spark\bin\winutils.exe


and permissions chmod -R 777 is set


Also this is hive-site.xml


<?xml version="1.0" encoding="UTF-8" standalone="no"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>


<configuration>


  <property>

    <name>hive.exec.local.scratchdir</name>

    <value>C:\Users\admin\PycharmProjects\pythonProject\hive-localscratchdir</value>

    <description>Local scratch space for Hive jobs</description>

  </property>


 <property>

    <name>hive.exec.scratchdir</name>

    <value>C:\Users\admin\PycharmProjects\pythonProject\hive-scratchdir</value>

    <description>HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. For each connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/&lt;username&gt; is created, with ${hive.scratch.dir.permission}.</description>

  </property>


  <property>

    <name>hive.metastore.warehouse.dir</name>

    <value>C:\Users\admin\PycharmProjects\pythonProject\spark-warehouse</value>

    <description>location of default database for the warehouse</description>

  </property>

  

  <property>

    <name>spark.sql.warehouse.dir</name>

    <value>C:\Users\admin\PycharmProjects\pythonProject\spark-warehouse</value>

    <description>location of default database for the warehouse</description>

  </property>

  


  <property>

    <name>hadoop.tmp.dir</name>

    <value>d:\temp\hive\</value>

    <description>A base for other temporary directories.</description>

  </property>


  <property>

   <name>javax.jdo.option.ConnectionURL</name>

   <value>jdbc:derby:C:\Users\admin\PycharmProjects\pythonProject\metastore_db;create=true</value>

   <description>JDBC connect string for a JDBC metastore</description>

  </property>


  <property>

   <name>javax.jdo.option.ConnectionDriverName</name>

   <value>org.apache.derby.EmbeddedDriver</value>

   <description>Driver class name for a JDBC metastore</description>

  </property>


</configuration>



LinkedIn  https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw

 



Disclaimer: Use it at your own risk. Any and all responsibility for any loss, damage or destruction of data or any other property which may arise from relying on this email's technical content is explicitly disclaimed. The author will in no case be liable for any monetary damages arising from such loss, damage or destruction.

 



On Wed, 2 Dec 2020 at 23:11, Artemis User <[hidden email]> wrote:

Apparently this is a OS dynamic lib link error.  Make sure you have the LD_LIBRARY_PATH (in Linux) or PATH (windows) set up properly for the right .so or .dll file...

On 12/2/20 5:31 PM, Mich Talebzadeh wrote:
Hi,

I have a simple code that tries to create Hive derby database as follows:

from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import HiveContext
from pyspark.sql import SparkSession
from pyspark.sql import Row
from pyspark.sql.types import StringType, ArrayType
from pyspark.sql.functions import udf, col, max as max, to_date, date_add, \
    add_months
from datetime import datetime, timedelta
import os
from os.path import join, abspath
from typing import Optional
import logging
import random
import string
import math
warehouseLocation = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\spark-warehouse'
local_scrtatchdir = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\hive-localscratchdir'
scrtatchdir = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\hive-scratchdir'
tmp_dir = 'd:\\temp\\hive'
metastore_db = 'jdbc:derby:C:\\Users\\admin\\PycharmProjects\\pythonProject\\metastore_db;create=true'
ConnectionDriverName = 'org.apache.derby.EmbeddedDriver'
spark = SparkSession \
    .builder \
    .appName("App1") \
    .config("hive.exec.local.scratchdir", local_scrtatchdir) \
    .config("hive.exec.scratchdir", scrtatchdir) \
    .config("spark.sql.warehouse.dir", warehouseLocation) \
    .config("hadoop.tmp.dir", tmp_dir) \
    .config("javax.jdo.option.ConnectionURL", metastore_db ) \
    .config("javax.jdo.option.ConnectionDriverName", ConnectionDriverName) \
    .enableHiveSupport() \
    .getOrCreate()
print(os.listdir(warehouseLocation))
print(os.listdir(local_scrtatchdir))
print(os.listdir(scrtatchdir))
print(os.listdir(tmp_dir))
sc = SparkContext.getOrCreate()
sqlContext = SQLContext(sc)
HiveContext = HiveContext(sc)
spark.sql("CREATE DATABASE IF NOT EXISTS test")

Now this comes back with the following:


C:\Users\admin\PycharmProjects\pythonProject\venv\Scripts\python.exe C:/Users/admin/PycharmProjects/pythonProject/main.py

Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties

Setting default log level to "WARN".

To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).

[]

[]

[]

['hive-localscratchdir', 'hive-scratchdir', 'hive-warehouse']

Traceback (most recent call last):

  File "C:/Users/admin/PycharmProjects/pythonProject/main.py", line 76, in <module>

    spark.sql("CREATE DATABASE IF NOT EXISTS test")

  File "D:\temp\spark\python\pyspark\sql\session.py", line 649, in sql

    return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)

  File "D:\temp\spark\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py", line 1305, in __call__

  File "D:\temp\spark\python\pyspark\sql\utils.py", line 134, in deco

    raise_from(converted)

  File "<string>", line 3, in raise_from

pyspark.sql.utils.AnalysisException: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.createDirectoryWithMode0(Ljava/lang/String;I)V;


Process finished with exit code 1


Also under %SPARK_HOME%/conf I also have hive-site.xml file. It is not obvious to me why it is throwing this error?

Thanks


LinkedIn  https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw

 



Disclaimer: Use it at your own risk. Any and all responsibility for any loss, damage or destruction of data or any other property which may arise from relying on this email's technical content is explicitly disclaimed. The author will in no case be liable for any monetary damages arising from such loss, damage or destruction.

 

Reply | Threaded
Open this post in threaded view
|

Re: In windows 10, accessing Hive from PySpark with PyCharm throws error

Artemis User

You don't have to include all your config and log messages.  The error message would suffice.  The java.lang.UnsatisfiedLinkError exception indicates that the JVM can't find some OS-specific libraries (or commonly referred as native libraries).  On Windows, they would be some dll files.  Look into your Hadoop installation and you will find the $HADOOPHOME/lib/native directory.  All the OS-specific library files are there (on Windows, this lib path may be different).  So add this path to your PATH environmental variable in your command shell before running spark-submit again.

-- ND

On 12/3/20 6:28 PM, Mich Talebzadeh wrote:
This is becoming serious pain.

using powershell I am using spark-submit as follows:

PS C:\Users\admin> spark-submit.cmd C:\Users\admin\PycharmProjects\pythonProject\main.py

WARNING: An illegal reflective access operation has occurred

WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/D:/temp/spark/jars/spark-unsafe_2.12-3.0.1.jar) to constructor java.nio.DirectByteBuffer(long,int)

WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform

WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations

WARNING: All illegal access operations will be denied in a future release

Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties

20/12/03 23:13:59 INFO SparkContext: Running Spark version 3.0.1

20/12/03 23:13:59 INFO ResourceUtils: ==============================================================

20/12/03 23:13:59 INFO ResourceUtils: Resources for spark.driver:


20/12/03 23:13:59 INFO ResourceUtils: ==============================================================

20/12/03 23:13:59 INFO SparkContext: Submitted application: App1

20/12/03 23:13:59 INFO SecurityManager: Changing view acls to: admin

20/12/03 23:13:59 INFO SecurityManager: Changing modify acls to: admin

20/12/03 23:13:59 INFO SecurityManager: Changing view acls groups to:

20/12/03 23:13:59 INFO SecurityManager: Changing modify acls groups to:

20/12/03 23:13:59 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users  with view permissions: Set(admin); groups with view permissions: Set(); users  with modify permissions: Set(admin); groups with modify permissions: Set()

20/12/03 23:14:00 INFO Utils: Successfully started service 'sparkDriver' on port 62327.

20/12/03 23:14:00 INFO SparkEnv: Registering MapOutputTracker

20/12/03 23:14:00 INFO SparkEnv: Registering BlockManagerMaster

20/12/03 23:14:01 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information

20/12/03 23:14:01 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up

20/12/03 23:14:01 INFO SparkEnv: Registering BlockManagerMasterHeartbeat

20/12/03 23:14:01 INFO DiskBlockManager: Created local directory at C:\Users\admin\AppData\Local\Temp\blockmgr-30e2019a-af60-44da-86e7-8a162d1e29da

20/12/03 23:14:01 INFO MemoryStore: MemoryStore started with capacity 434.4 MiB

20/12/03 23:14:01 INFO SparkEnv: Registering OutputCommitCoordinator

20/12/03 23:14:01 INFO Utils: Successfully started service 'SparkUI' on port 4040.

20/12/03 23:14:01 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://w7:4040

20/12/03 23:14:01 INFO Executor: Starting executor ID driver on host w7

20/12/03 23:14:01 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 62373.

20/12/03 23:14:01 INFO NettyBlockTransferService: Server created on w7:62373

20/12/03 23:14:01 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy

20/12/03 23:14:01 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, w7, 62373, None)

20/12/03 23:14:01 INFO BlockManagerMasterEndpoint: Registering block manager w7:62373 with 434.4 MiB RAM, BlockManagerId(driver, w7, 62373, None)

20/12/03 23:14:01 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, w7, 62373, None)

20/12/03 23:14:01 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, w7, 62373, None)

D:\temp\spark\python\lib\pyspark.zip\pyspark\context.py:225: DeprecationWarning: Support for Python 2 and Python 3 prior to version 3.6 is deprecated as of Spark 3.0. See also the plan for dropping Python 2 support at https://spark.apache.org/news/plan-for-dropping-python-2-support.html.

  DeprecationWarning)

20/12/03 23:14:02 INFO SharedState: loading hive config file: file:/D:/temp/spark/conf/hive-site.xml

20/12/03 23:14:02 INFO SharedState: spark.sql.warehouse.dir is not set, but hive.metastore.warehouse.dir is set. Setting spark.sql.warehouse.dir to the value of hive.metastore.warehouse.dir ('C:\Users\admin\PycharmProjects\pythonProject\spark-warehouse').

20/12/03 23:14:02 INFO SharedState: Warehouse path is 'C:\Users\admin\PycharmProjects\pythonProject\spark-warehouse'.

20/12/03 23:14:04 INFO HiveConf: Found configuration file file:/D:/temp/spark/conf/hive-site.xml

20/12/03 23:14:04 INFO HiveUtils: Initializing HiveMetastoreConnection version 2.3.7 using Spark classes.

Traceback (most recent call last):

  File "C:/Users/admin/PycharmProjects/pythonProject/main.py", line 79, in <module>

    spark.sql("CREATE DATABASE IF NOT EXISTS test")

  File "D:\temp\spark\python\lib\pyspark.zip\pyspark\sql\session.py", line 649, in sql

  File "D:\temp\spark\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py", line 1305, in __call__

  File "D:\temp\spark\python\lib\pyspark.zip\pyspark\sql\utils.py", line 134, in deco

  File "<string>", line 3, in raise_from

pyspark.sql.utils.AnalysisException: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.createDirectoryWithMode0(Ljava/lang/String;I)V;

20/12/03 23:14:04 INFO SparkContext: Invoking stop() from shutdown hook

20/12/03 23:14:04 INFO SparkUI: Stopped Spark web UI at http://w7:4040

20/12/03 23:14:04 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!

20/12/03 23:14:04 INFO MemoryStore: MemoryStore cleared

20/12/03 23:14:04 INFO BlockManager: BlockManager stopped

20/12/03 23:14:04 INFO BlockManagerMaster: BlockManagerMaster stopped

20/12/03 23:14:04 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!

20/12/03 23:14:04 INFO SparkContext: Successfully stopped SparkContext

20/12/03 23:14:04 INFO ShutdownHookManager: Shutdown hook called

20/12/03 23:14:04 INFO ShutdownHookManager: Deleting directory C:\Users\admin\AppData\Local\Temp\spark-2ccc7f91-3970-42e4-b564-6621215dd446

20/12/03 23:14:04 INFO ShutdownHookManager: Deleting directory C:\Users\admin\AppData\Local\Temp\spark-8015fc12-eff7-4d2e-b4c3-f864bf4b00ce\pyspark-12b6b74c-09a3-447f-be8b-b5aa26fa274d

20/12/03 23:14:04 INFO ShutdownHookManager: Deleting directory C:\Users\admin\AppData\Local\Temp\spark-8015fc12-eff7-4d2e-b4c3-f864bf4b00ce


So basically it finds hive-site.xml under %SPARK_HOME%/conf directory. Tries to initialise HiveMetastoreConnection but fails with error


pyspark.sql.utils.AnalysisException: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.createDirectoryWithMode0(Ljava/lang/String;I)V;


winutils.exe is put under %SPARK_HOME%/bin directory


where winutils.exe

D:\temp\spark\bin\winutils.exe


and permissions chmod -R 777 is set


Also this is hive-site.xml


<?xml version="1.0" encoding="UTF-8" standalone="no"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>


<configuration>


  <property>

    <name>hive.exec.local.scratchdir</name>

    <value>C:\Users\admin\PycharmProjects\pythonProject\hive-localscratchdir</value>

    <description>Local scratch space for Hive jobs</description>

  </property>


 <property>

    <name>hive.exec.scratchdir</name>

    <value>C:\Users\admin\PycharmProjects\pythonProject\hive-scratchdir</value>

    <description>HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. For each connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/&lt;username&gt; is created, with ${hive.scratch.dir.permission}.</description>

  </property>


  <property>

    <name>hive.metastore.warehouse.dir</name>

    <value>C:\Users\admin\PycharmProjects\pythonProject\spark-warehouse</value>

    <description>location of default database for the warehouse</description>

  </property>

  

  <property>

    <name>spark.sql.warehouse.dir</name>

    <value>C:\Users\admin\PycharmProjects\pythonProject\spark-warehouse</value>

    <description>location of default database for the warehouse</description>

  </property>

  


  <property>

    <name>hadoop.tmp.dir</name>

    <value>d:\temp\hive\</value>

    <description>A base for other temporary directories.</description>

  </property>


  <property>

   <name>javax.jdo.option.ConnectionURL</name>

   <value>jdbc:derby:C:\Users\admin\PycharmProjects\pythonProject\metastore_db;create=true</value>

   <description>JDBC connect string for a JDBC metastore</description>

  </property>


  <property>

   <name>javax.jdo.option.ConnectionDriverName</name>

   <value>org.apache.derby.EmbeddedDriver</value>

   <description>Driver class name for a JDBC metastore</description>

  </property>


</configuration>



LinkedIn  https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw

 



Disclaimer: Use it at your own risk. Any and all responsibility for any loss, damage or destruction of data or any other property which may arise from relying on this email's technical content is explicitly disclaimed. The author will in no case be liable for any monetary damages arising from such loss, damage or destruction.

 



On Wed, 2 Dec 2020 at 23:11, Artemis User <[hidden email]> wrote:

Apparently this is a OS dynamic lib link error.  Make sure you have the LD_LIBRARY_PATH (in Linux) or PATH (windows) set up properly for the right .so or .dll file...

On 12/2/20 5:31 PM, Mich Talebzadeh wrote:
Hi,

I have a simple code that tries to create Hive derby database as follows:

from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import HiveContext
from pyspark.sql import SparkSession
from pyspark.sql import Row
from pyspark.sql.types import StringType, ArrayType
from pyspark.sql.functions import udf, col, max as max, to_date, date_add, \
    add_months
from datetime import datetime, timedelta
import os
from os.path import join, abspath
from typing import Optional
import logging
import random
import string
import math
warehouseLocation = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\spark-warehouse'
local_scrtatchdir = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\hive-localscratchdir'
scrtatchdir = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\hive-scratchdir'
tmp_dir = 'd:\\temp\\hive'
metastore_db = 'jdbc:derby:C:\\Users\\admin\\PycharmProjects\\pythonProject\\metastore_db;create=true'
ConnectionDriverName = 'org.apache.derby.EmbeddedDriver'
spark = SparkSession \
    .builder \
    .appName("App1") \
    .config("hive.exec.local.scratchdir", local_scrtatchdir) \
    .config("hive.exec.scratchdir", scrtatchdir) \
    .config("spark.sql.warehouse.dir", warehouseLocation) \
    .config("hadoop.tmp.dir", tmp_dir) \
    .config("javax.jdo.option.ConnectionURL", metastore_db ) \
    .config("javax.jdo.option.ConnectionDriverName", ConnectionDriverName) \
    .enableHiveSupport() \
    .getOrCreate()
print(os.listdir(warehouseLocation))
print(os.listdir(local_scrtatchdir))
print(os.listdir(scrtatchdir))
print(os.listdir(tmp_dir))
sc = SparkContext.getOrCreate()
sqlContext = SQLContext(sc)
HiveContext = HiveContext(sc)
spark.sql("CREATE DATABASE IF NOT EXISTS test")

Now this comes back with the following:


C:\Users\admin\PycharmProjects\pythonProject\venv\Scripts\python.exe C:/Users/admin/PycharmProjects/pythonProject/main.py

Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties

Setting default log level to "WARN".

To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).

[]

[]

[]

['hive-localscratchdir', 'hive-scratchdir', 'hive-warehouse']

Traceback (most recent call last):

  File "C:/Users/admin/PycharmProjects/pythonProject/main.py", line 76, in <module>

    spark.sql("CREATE DATABASE IF NOT EXISTS test")

  File "D:\temp\spark\python\pyspark\sql\session.py", line 649, in sql

    return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)

  File "D:\temp\spark\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py", line 1305, in __call__

  File "D:\temp\spark\python\pyspark\sql\utils.py", line 134, in deco

    raise_from(converted)

  File "<string>", line 3, in raise_from

pyspark.sql.utils.AnalysisException: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.createDirectoryWithMode0(Ljava/lang/String;I)V;


Process finished with exit code 1


Also under %SPARK_HOME%/conf I also have hive-site.xml file. It is not obvious to me why it is throwing this error?

Thanks


LinkedIn  https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw

 



Disclaimer: Use it at your own risk. Any and all responsibility for any loss, damage or destruction of data or any other property which may arise from relying on this email's technical content is explicitly disclaimed. The author will in no case be liable for any monetary damages arising from such loss, damage or destruction.

 

Reply | Threaded
Open this post in threaded view
|

Re: In windows 10, accessing Hive from PySpark with PyCharm throws error

Mich Talebzadeh
OK with PyCharm itself, i am getting this error

pyspark.sql.utils.AnalysisException: java.lang.RuntimeException: Error while running command to get file permissions : java.io.IOException: (null) entry in command string: null ls -F C:\Users\admin\PycharmProjects\pythonProject\hive-scratchdir

I gather null ls is because it cannot find winutil.exe?


Now if i run the command manually with winutils.exe



D:\temp\spark\bin\winutils.exe ls -F C:\Users\admin\PycharmProjects\pythonProject\hive-scratchdir

drwxrwxrwx|1|w7\admin|w7\None|0|Nov|29|2020|C:\Users\admin\PycharmProjects\pythonProject\hive-scratchdir


it works

thanks

Disclaimer: Use it at your own risk. Any and all responsibility for any loss, damage or destruction of data or any other property which may arise from relying on this email's technical content is explicitly disclaimed. The author will in no case be liable for any monetary damages arising from such loss, damage or destruction.

 



On Fri, 4 Dec 2020 at 04:50, Artemis User <[hidden email]> wrote:

You don't have to include all your config and log messages.  The error message would suffice.  The java.lang.UnsatisfiedLinkError exception indicates that the JVM can't find some OS-specific libraries (or commonly referred as native libraries).  On Windows, they would be some dll files.  Look into your Hadoop installation and you will find the $HADOOPHOME/lib/native directory.  All the OS-specific library files are there (on Windows, this lib path may be different).  So add this path to your PATH environmental variable in your command shell before running spark-submit again.

-- ND

On 12/3/20 6:28 PM, Mich Talebzadeh wrote:
This is becoming serious pain.

using powershell I am using spark-submit as follows:

PS C:\Users\admin> spark-submit.cmd C:\Users\admin\PycharmProjects\pythonProject\main.py

WARNING: An illegal reflective access operation has occurred

WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/D:/temp/spark/jars/spark-unsafe_2.12-3.0.1.jar) to constructor java.nio.DirectByteBuffer(long,int)

WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform

WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations

WARNING: All illegal access operations will be denied in a future release

Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties

20/12/03 23:13:59 INFO SparkContext: Running Spark version 3.0.1

20/12/03 23:13:59 INFO ResourceUtils: ==============================================================

20/12/03 23:13:59 INFO ResourceUtils: Resources for spark.driver:


20/12/03 23:13:59 INFO ResourceUtils: ==============================================================

20/12/03 23:13:59 INFO SparkContext: Submitted application: App1

20/12/03 23:13:59 INFO SecurityManager: Changing view acls to: admin

20/12/03 23:13:59 INFO SecurityManager: Changing modify acls to: admin

20/12/03 23:13:59 INFO SecurityManager: Changing view acls groups to:

20/12/03 23:13:59 INFO SecurityManager: Changing modify acls groups to:

20/12/03 23:13:59 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users  with view permissions: Set(admin); groups with view permissions: Set(); users  with modify permissions: Set(admin); groups with modify permissions: Set()

20/12/03 23:14:00 INFO Utils: Successfully started service 'sparkDriver' on port 62327.

20/12/03 23:14:00 INFO SparkEnv: Registering MapOutputTracker

20/12/03 23:14:00 INFO SparkEnv: Registering BlockManagerMaster

20/12/03 23:14:01 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information

20/12/03 23:14:01 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up

20/12/03 23:14:01 INFO SparkEnv: Registering BlockManagerMasterHeartbeat

20/12/03 23:14:01 INFO DiskBlockManager: Created local directory at C:\Users\admin\AppData\Local\Temp\blockmgr-30e2019a-af60-44da-86e7-8a162d1e29da

20/12/03 23:14:01 INFO MemoryStore: MemoryStore started with capacity 434.4 MiB

20/12/03 23:14:01 INFO SparkEnv: Registering OutputCommitCoordinator

20/12/03 23:14:01 INFO Utils: Successfully started service 'SparkUI' on port 4040.

20/12/03 23:14:01 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://w7:4040

20/12/03 23:14:01 INFO Executor: Starting executor ID driver on host w7

20/12/03 23:14:01 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 62373.

20/12/03 23:14:01 INFO NettyBlockTransferService: Server created on w7:62373

20/12/03 23:14:01 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy

20/12/03 23:14:01 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, w7, 62373, None)

20/12/03 23:14:01 INFO BlockManagerMasterEndpoint: Registering block manager w7:62373 with 434.4 MiB RAM, BlockManagerId(driver, w7, 62373, None)

20/12/03 23:14:01 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, w7, 62373, None)

20/12/03 23:14:01 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, w7, 62373, None)

D:\temp\spark\python\lib\pyspark.zip\pyspark\context.py:225: DeprecationWarning: Support for Python 2 and Python 3 prior to version 3.6 is deprecated as of Spark 3.0. See also the plan for dropping Python 2 support at https://spark.apache.org/news/plan-for-dropping-python-2-support.html.

  DeprecationWarning)

20/12/03 23:14:02 INFO SharedState: loading hive config file: file:/D:/temp/spark/conf/hive-site.xml

20/12/03 23:14:02 INFO SharedState: spark.sql.warehouse.dir is not set, but hive.metastore.warehouse.dir is set. Setting spark.sql.warehouse.dir to the value of hive.metastore.warehouse.dir ('C:\Users\admin\PycharmProjects\pythonProject\spark-warehouse').

20/12/03 23:14:02 INFO SharedState: Warehouse path is 'C:\Users\admin\PycharmProjects\pythonProject\spark-warehouse'.

20/12/03 23:14:04 INFO HiveConf: Found configuration file file:/D:/temp/spark/conf/hive-site.xml

20/12/03 23:14:04 INFO HiveUtils: Initializing HiveMetastoreConnection version 2.3.7 using Spark classes.

Traceback (most recent call last):

  File "C:/Users/admin/PycharmProjects/pythonProject/main.py", line 79, in <module>

    spark.sql("CREATE DATABASE IF NOT EXISTS test")

  File "D:\temp\spark\python\lib\pyspark.zip\pyspark\sql\session.py", line 649, in sql

  File "D:\temp\spark\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py", line 1305, in __call__

  File "D:\temp\spark\python\lib\pyspark.zip\pyspark\sql\utils.py", line 134, in deco

  File "<string>", line 3, in raise_from

pyspark.sql.utils.AnalysisException: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.createDirectoryWithMode0(Ljava/lang/String;I)V;

20/12/03 23:14:04 INFO SparkContext: Invoking stop() from shutdown hook

20/12/03 23:14:04 INFO SparkUI: Stopped Spark web UI at http://w7:4040

20/12/03 23:14:04 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!

20/12/03 23:14:04 INFO MemoryStore: MemoryStore cleared

20/12/03 23:14:04 INFO BlockManager: BlockManager stopped

20/12/03 23:14:04 INFO BlockManagerMaster: BlockManagerMaster stopped

20/12/03 23:14:04 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!

20/12/03 23:14:04 INFO SparkContext: Successfully stopped SparkContext

20/12/03 23:14:04 INFO ShutdownHookManager: Shutdown hook called

20/12/03 23:14:04 INFO ShutdownHookManager: Deleting directory C:\Users\admin\AppData\Local\Temp\spark-2ccc7f91-3970-42e4-b564-6621215dd446

20/12/03 23:14:04 INFO ShutdownHookManager: Deleting directory C:\Users\admin\AppData\Local\Temp\spark-8015fc12-eff7-4d2e-b4c3-f864bf4b00ce\pyspark-12b6b74c-09a3-447f-be8b-b5aa26fa274d

20/12/03 23:14:04 INFO ShutdownHookManager: Deleting directory C:\Users\admin\AppData\Local\Temp\spark-8015fc12-eff7-4d2e-b4c3-f864bf4b00ce


So basically it finds hive-site.xml under %SPARK_HOME%/conf directory. Tries to initialise HiveMetastoreConnection but fails with error


pyspark.sql.utils.AnalysisException: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.createDirectoryWithMode0(Ljava/lang/String;I)V;


winutils.exe is put under %SPARK_HOME%/bin directory


where winutils.exe

D:\temp\spark\bin\winutils.exe


and permissions chmod -R 777 is set


Also this is hive-site.xml


<?xml version="1.0" encoding="UTF-8" standalone="no"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>


<configuration>


  <property>

    <name>hive.exec.local.scratchdir</name>

    <value>C:\Users\admin\PycharmProjects\pythonProject\hive-localscratchdir</value>

    <description>Local scratch space for Hive jobs</description>

  </property>


 <property>

    <name>hive.exec.scratchdir</name>

    <value>C:\Users\admin\PycharmProjects\pythonProject\hive-scratchdir</value>

    <description>HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. For each connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/&lt;username&gt; is created, with ${hive.scratch.dir.permission}.</description>

  </property>


  <property>

    <name>hive.metastore.warehouse.dir</name>

    <value>C:\Users\admin\PycharmProjects\pythonProject\spark-warehouse</value>

    <description>location of default database for the warehouse</description>

  </property>

  

  <property>

    <name>spark.sql.warehouse.dir</name>

    <value>C:\Users\admin\PycharmProjects\pythonProject\spark-warehouse</value>

    <description>location of default database for the warehouse</description>

  </property>

  


  <property>

    <name>hadoop.tmp.dir</name>

    <value>d:\temp\hive\</value>

    <description>A base for other temporary directories.</description>

  </property>


  <property>

   <name>javax.jdo.option.ConnectionURL</name>

   <value>jdbc:derby:C:\Users\admin\PycharmProjects\pythonProject\metastore_db;create=true</value>

   <description>JDBC connect string for a JDBC metastore</description>

  </property>


  <property>

   <name>javax.jdo.option.ConnectionDriverName</name>

   <value>org.apache.derby.EmbeddedDriver</value>

   <description>Driver class name for a JDBC metastore</description>

  </property>


</configuration>



LinkedIn  https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw

 



Disclaimer: Use it at your own risk. Any and all responsibility for any loss, damage or destruction of data or any other property which may arise from relying on this email's technical content is explicitly disclaimed. The author will in no case be liable for any monetary damages arising from such loss, damage or destruction.

 



On Wed, 2 Dec 2020 at 23:11, Artemis User <[hidden email]> wrote:

Apparently this is a OS dynamic lib link error.  Make sure you have the LD_LIBRARY_PATH (in Linux) or PATH (windows) set up properly for the right .so or .dll file...

On 12/2/20 5:31 PM, Mich Talebzadeh wrote:
Hi,

I have a simple code that tries to create Hive derby database as follows:

from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import HiveContext
from pyspark.sql import SparkSession
from pyspark.sql import Row
from pyspark.sql.types import StringType, ArrayType
from pyspark.sql.functions import udf, col, max as max, to_date, date_add, \
    add_months
from datetime import datetime, timedelta
import os
from os.path import join, abspath
from typing import Optional
import logging
import random
import string
import math
warehouseLocation = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\spark-warehouse'
local_scrtatchdir = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\hive-localscratchdir'
scrtatchdir = 'c:\\Users\\admin\\PycharmProjects\\pythonProject\\hive-scratchdir'
tmp_dir = 'd:\\temp\\hive'
metastore_db = 'jdbc:derby:C:\\Users\\admin\\PycharmProjects\\pythonProject\\metastore_db;create=true'
ConnectionDriverName = 'org.apache.derby.EmbeddedDriver'
spark = SparkSession \
    .builder \
    .appName("App1") \
    .config("hive.exec.local.scratchdir", local_scrtatchdir) \
    .config("hive.exec.scratchdir", scrtatchdir) \
    .config("spark.sql.warehouse.dir", warehouseLocation) \
    .config("hadoop.tmp.dir", tmp_dir) \
    .config("javax.jdo.option.ConnectionURL", metastore_db ) \
    .config("javax.jdo.option.ConnectionDriverName", ConnectionDriverName) \
    .enableHiveSupport() \
    .getOrCreate()
print(os.listdir(warehouseLocation))
print(os.listdir(local_scrtatchdir))
print(os.listdir(scrtatchdir))
print(os.listdir(tmp_dir))
sc = SparkContext.getOrCreate()
sqlContext = SQLContext(sc)
HiveContext = HiveContext(sc)
spark.sql("CREATE DATABASE IF NOT EXISTS test")

Now this comes back with the following:


C:\Users\admin\PycharmProjects\pythonProject\venv\Scripts\python.exe C:/Users/admin/PycharmProjects/pythonProject/main.py

Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties

Setting default log level to "WARN".

To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).

[]

[]

[]

['hive-localscratchdir', 'hive-scratchdir', 'hive-warehouse']

Traceback (most recent call last):

  File "C:/Users/admin/PycharmProjects/pythonProject/main.py", line 76, in <module>

    spark.sql("CREATE DATABASE IF NOT EXISTS test")

  File "D:\temp\spark\python\pyspark\sql\session.py", line 649, in sql

    return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)

  File "D:\temp\spark\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py", line 1305, in __call__

  File "D:\temp\spark\python\pyspark\sql\utils.py", line 134, in deco

    raise_from(converted)

  File "<string>", line 3, in raise_from

pyspark.sql.utils.AnalysisException: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.createDirectoryWithMode0(Ljava/lang/String;I)V;


Process finished with exit code 1


Also under %SPARK_HOME%/conf I also have hive-site.xml file. It is not obvious to me why it is throwing this error?

Thanks


LinkedIn  https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw

 



Disclaimer: Use it at your own risk. Any and all responsibility for any loss, damage or destruction of data or any other property which may arise from relying on this email's technical content is explicitly disclaimed. The author will in no case be liable for any monetary damages arising from such loss, damage or destruction.