标记一个博客怕找不到了:http://www.cnblogs.com/shishanyuan/p/4723713.html
第一种:
通过case class
package cn.lijie
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
/** * Created by jie on 2017/7/31. */
object SparkSql01 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("sparksql").setMaster("local[2]")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
System.setProperty("user.name","bigdata")
val rdd = sc.textFile("C:\\Users\\jie\\Desktop\\game.txt").map{ x =>{
val split = x.split(",")
Game(split(0).toLong,split(1),split(2).toInt)
}}
import sqlContext.implicits._
val df = rdd.toDF
df.registerTempTable("t_game")
sqlContext.sql("select * from t_game where id <= 2").write.json("C:\\Users\\jie\\Desktop\\game.json")
val newDf = sqlContext.load("C:\\Users\\jie\\Desktop\\game.json","json")
newDf.show
}
}
case class Game(id:Long,name:String,level:Int)
第二种:
通过StructType
package cn.lijie
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.sql.types._
import org.apache.spark.{SparkConf, SparkContext}
/** * Created by jie on 2017/7/31. */
object SparkSql01 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("sparksql").setMaster("local[2]")
System.setProperty("user.name","bigdata")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val rdd = sc.textFile("C:\\Users\\jie\\Desktop\\game.txt")
val schema = StructType(
List(
StructField("id", LongType, true),
StructField("name", StringType, true),
StructField("age", IntegerType, true)
)
)
val rowRDD = rdd.map(x => {
Row(x(0).toLong, x(1), x(2).toInt)
})
val df = sqlContext.createDataFrame(rowRDD, schema)
df.registerTempTable("t_game")
sqlContext.sql("select * from t_game where id <= 2").write.json("C:\\Users\\jie\\Desktop\\game.json")
val newDf = sqlContext.load("C:\\Users\\jie\\Desktop\\game.json","json")
newDf.show
}
}
case class Game(id: Long, name: String, level: Int)
pom
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>spark-sparksql</groupId>
<artifactId>spark-sql</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
<encoding>UTF-8</encoding>
<scala.version>2.10.6</scala.version>
<spark.version>1.6.1</spark.version>
<hadoop.version>2.6.4</hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src/main/scala</sourceDirectory>
<testSourceDirectory>src/test/scala</testSourceDirectory>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.2.2</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
<configuration>
<args>
<arg>-make:transitive</arg>
<arg>-dependencyfile</arg>
<arg>${project.build.directory}/.scala_dependencies</arg>
</args>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>cn.lijie.SparkSql01</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
免责声明:本站所有文章内容,图片,视频等均是来源于用户投稿和互联网及文摘转载整编而成,不代表本站观点,不承担相关法律责任。其著作权各归其原作者或其出版社所有。如发现本站有涉嫌抄袭侵权/违法违规的内容,侵犯到您的权益,请在线联系站长,一经查实,本站将立刻删除。 本文来自网络,若有侵权,请联系删除,如若转载,请注明出处:https://yundeesoft.com/4835.html