Skip to content

Commit 5b6265e

Browse files
committed
Sparkling Water update
1 parent 8e81e61 commit 5b6265e

2 files changed

Lines changed: 29 additions & 20 deletions

File tree

tutorials/sparkling-water/README.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
### Prepare environment
2727

2828
1. Run Sparkling shell with an embedded Spark cluster:
29-
```
29+
```bash
3030
cd "path/to/sparkling/water"
3131
export SPARK_HOME="/path/to/spark/installation"
3232
export MASTER="local-cluster[3,2,4096]"
@@ -188,29 +188,29 @@
188188
val resultDF = hamSpamRDD.zip(tfidfRDD).map(v => SMS(v._1, v._2)).toDF
189189

190190
// Publish Spark DataFrame as H2OFrame
191-
val table = h2oContext.asH2OFrame(resultDF, "messages_table")
191+
val tableHF = h2oContext.asH2OFrame(resultDF, "messages_table")
192192

193193
// Transform target column into categorical!
194-
table.replace(table.find("target"), table.vec("target").toCategoricalVec()).remove()
195-
table.update(null)
194+
tableHF.replace(tableHF.find("target"), tableHF.vec("target").toCategoricalVec()).remove()
195+
tableHF.update(null)
196196

197197
// Split table into training and validation parts
198198
val keys = Array[String]("train.hex", "valid.hex")
199199
val ratios = Array[Double](0.8)
200-
val frs = split(table, keys, ratios)
201-
val (train, valid) = (frs(0), frs(1))
200+
val frs = split(tableHF, keys, ratios)
201+
val (trainHF, validHF) = (frs(0), frs(1))
202202
table.delete()
203203

204204
// Build final DeepLearning model
205-
val dlModel = buildDLModel(train, valid)(h2oContext)
205+
val dlModel = buildDLModel(trainHF, validHF)(h2oContext)
206206
```
207207

208208
11. Evaluate model quality:
209209
```scala
210210
// Collect model metrics and evaluate model quality
211211
import water.app.ModelMetricsSupport
212-
val trainMetrics = ModelMetricsSupport.binomialMM(dlModel, train)
213-
val validMetrics = ModelMetricsSupport.binomialMM(dlModel, valid)
212+
val trainMetrics = ModelMetricsSupport.binomialMM(dlModel, trainHF)
213+
val validMetrics = ModelMetricsSupport.binomialMM(dlModel, validHF)
214214
println(trainMetrics.auc._auc)
215215
println(validMetrics.auc._auc)
216216
```

tutorials/sparkling-water/h2oworld.script.scala

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,22 @@ def buildDLModel(trainHF: Frame, validHF: Frame,
9393
val dlModel = dl.trainModel.get
9494

9595
// Force computation of model metrics on both datasets
96-
dlModel.score(train).delete()
97-
dlModel.score(valid).delete()
96+
dlModel.score(trainHF).delete()
97+
dlModel.score(validHF).delete()
9898

9999
// And return resulting model
100100
dlModel
101101
}
102102

103+
// Create SQL support
104+
import org.apache.spark.sql._
105+
implicit val sqlContext = SQLContext.getOrCreate(sc)
106+
import sqlContext.implicits._
107+
//
108+
// Start H2O services
109+
import org.apache.spark.h2o._
110+
val h2oContext = new H2OContext(sc).start()
111+
103112
// Data load
104113
val dataRDD = load(DATAFILE)
105114
// Extract response column from dataset
@@ -120,26 +129,26 @@ var (hashingTF, idfModel, tfidfRDD) = buildIDFModel(tokensRDD)
120129
val resultDF = hamSpamRDD.zip(tfidfRDD).map(v => SMS(v._1, v._2)).toDF
121130

122131
// Publish Spark DataFrame as H2OFrame
123-
val table = h2oContext.asH2OFrame(resultDF, "messages_table")
132+
val tableHF = h2oContext.asH2OFrame(resultDF, "messages_table")
124133

125134
// Transform target column into categorical!
126-
table.replace(table.find("target"), table.vec("target").toCategoricalVec()).remove()
127-
table.update(null)
135+
tableHF.replace(tableHF.find("target"), tableHF.vec("target").toCategoricalVec()).remove()
136+
tableHF.update(null)
128137

129138
// Split table into training and validation parts
130139
val keys = Array[String]("train.hex", "valid.hex")
131140
val ratios = Array[Double](0.8)
132-
val frs = split(table, keys, ratios)
133-
val (train, valid) = (frs(0), frs(1))
134-
table.delete()
141+
val frs = split(tableHF, keys, ratios)
142+
val (trainHF, validHF) = (frs(0), frs(1))
143+
tableHF.delete()
135144

136145
// Build final DeepLearning model
137-
val dlModel = buildDLModel(train, valid)(h2oContext)
146+
val dlModel = buildDLModel(trainHF, validHF)(h2oContext)
138147

139148
// Collect model metrics and evaluate model quality
140149
import water.app.ModelMetricsSupport
141-
val trainMetrics = ModelMetricsSupport.binomialMM(dlModel, train)
142-
val validMetrics = ModelMetricsSupport.binomialMM(dlModel, valid)
150+
val trainMetrics = ModelMetricsSupport.binomialMM(dlModel, trainHF)
151+
val validMetrics = ModelMetricsSupport.binomialMM(dlModel, validHF)
143152
println(trainMetrics.auc._auc)
144153
println(validMetrics.auc._auc)
145154

0 commit comments

Comments
 (0)