如果是spark1.6.0请添加maven:
<dependency> <groupId>com.databricks</groupId>
<artifactId>spark-csv_2.10</artifactId> <version>1.4.0</version>
<scope>compile</scope> </dependency>
如果是spark2.0+就不用添加maven了,因为spark2.0内部集成了读写csv文件。
package com.egridcloud.spark import org.apache.spark.sql.{DataFrame,
SQLContext} import org.apache.spark.{SparkConf, SparkContext} /** * Created by
LHX on 2018/3/20 13:26. */ object SparkReadFile { def main(args:
Array[String]): Unit = { val localpath="D:\\input\\word.csv" val
outpath="D:\\output\\word2" val conf = new SparkConf()
conf.setAppName("SparkReadFile") conf.setMaster("local") val sparkContext = new
SparkContext(conf) val sqlContext = new SQLContext(sparkContext) //读csv文件 val
data: DataFrame = sqlContext.read.format("com.databricks.spark.csv")
.option("header", "false") //在csv第一行有属性"true",没有就是"false"
.option("inferSchema", true.toString) //这是自动推断属性列的数据类型 .load(localpath) //
data.show() // 写csv文件
data.repartition(1).write.format("com.databricks.spark.csv") .option("header",
"false")//在csv第一行有属性"true",没有就是"false" .option("delimiter",",")//默认以","分割
.save(outpath) sparkContext.stop() } }
热门工具 换一换