Skip to content

spark shell: How To

James W. Kimani edited this page Feb 10, 2018 · 3 revisions

start spark shell session on a server

spark-shell

or

In case you need to use other libraries not included in spark

spark-shell --jars <location of jar files>

import databricks package

import com.databricks.spark.avro._

import org.apache.spark.sql package

import org.apache.spark.sql._

create spark session

val spark: SparkSession = SparkSession.builder.getOrCreate()

create dataframe from avro file

val df = spark.read.avro("/spark-warehouse/product_avro_table/part-00000-39888027-8365-49c2-8217-8294062cc595.avro")

create dataframe from orc file

val df = spark.read.format("orc").load("/spark-warehouse/product_orc_table/part-00000-6c329148-858e-4742-81f4-b40760a34acf.snappy.orc")

get dataframe size

df.count

get dataframe schema

df.schema