scala实现wordCount

题目

scala实现wordcount

环境

IDEA 版本: IntelliJ IDEA 2018.2.5 x64

实现

word.txt

1
2
3
hadoop	hadoop	hadoop	
word world world
pig hive word hive pig

WordCountApp.scala

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import scala.io.Source

/**
* 使用scala实现一个wc小程序
*/
object WordCountApp {

def main(args: Array[String]): Unit = {
//文件路径
val filePath = "D:/word.txt"
//文件编码
val codec = "utf-8"
//打开文件
val file = Source.fromFile(filePath, codec)
val wc = file
.getLines() // ["hadoop hadoop hadoop","word world world"....]

.flatMap(_.split("\t")) //["hadoop","hadoop","hadoop","word"...

.toList //List(hadoop, hadoop, hadoop,...

.map((_, 1)) //List((hadoop,1), (hadoop,1), (hadoop,1), (word,1),...

.groupBy((_._1)) //Map(world -> List((world,1), (world,1)),...

.mapValues(_.size) //Map(world -> 2, hadoop -> 3, hive -> 2, word -> 2, pig -> 2)
println(wc)

// 关闭文件
file.close()
}

}

结果

源码

WordCountApp.scala