# Copyright 2019 The Glow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from glow.conversions import OneDimensionalDoubleNumpyArrayConverter, TwoDimensionalDoubleNumpyArrayConverter
from py4j import protocol
from py4j.protocol import register_input_converter
from pyspark import SparkContext
from pyspark.sql import DataFrame, SQLContext, SparkSession
from typing import Any, Dict
from typeguard import check_argument_types, check_return_type
__all__ = ['register', 'transform']
[docs]def register(session: SparkSession, new_session: bool = True) -> SparkSession:
"""
Register SQL extensions and py4j converters for a Spark session.
Args:
session: Spark session
new_session: If ``True``, create a new Spark session using ``session.newSession()`` before registering
extensions. This may be necessary if you're using functions that register new
analysis rules. The new session has isolated UDFs, configurations, and temporary tables,
but shares the existing ``SparkContext`` and cached data.
Example:
>>> import glow
>>> spark = glow.register(spark)
"""
assert check_argument_types()
sc = session._sc
return SparkSession(
sc, session._jvm.io.projectglow.Glow.register(session._jsparkSession, new_session))
# Register input converters in idempotent fashion
glow_input_converters = [
OneDimensionalDoubleNumpyArrayConverter, TwoDimensionalDoubleNumpyArrayConverter
]
for gic in glow_input_converters:
if not any(type(pic) is gic for pic in protocol.INPUT_CONVERTER):
register_input_converter(gic(), prepend=True)