@@ -93,13 +93,22 @@ def buildDLModel(trainHF: Frame, validHF: Frame,
9393 val dlModel = dl.trainModel.get
9494
9595 // Force computation of model metrics on both datasets
96- dlModel.score(train ).delete()
97- dlModel.score(valid ).delete()
96+ dlModel.score(trainHF ).delete()
97+ dlModel.score(validHF ).delete()
9898
9999 // And return resulting model
100100 dlModel
101101}
102102
103+ // Create SQL support
104+ import org .apache .spark .sql ._
105+ implicit val sqlContext = SQLContext .getOrCreate(sc)
106+ import sqlContext .implicits ._
107+ //
108+ // Start H2O services
109+ import org .apache .spark .h2o ._
110+ val h2oContext = new H2OContext (sc).start()
111+
103112// Data load
104113val dataRDD = load(DATAFILE )
105114// Extract response column from dataset
@@ -120,26 +129,26 @@ var (hashingTF, idfModel, tfidfRDD) = buildIDFModel(tokensRDD)
120129val resultDF = hamSpamRDD.zip(tfidfRDD).map(v => SMS (v._1, v._2)).toDF
121130
122131// Publish Spark DataFrame as H2OFrame
123- val table = h2oContext.asH2OFrame(resultDF, " messages_table" )
132+ val tableHF = h2oContext.asH2OFrame(resultDF, " messages_table" )
124133
125134// Transform target column into categorical!
126- table .replace(table .find(" target" ), table .vec(" target" ).toCategoricalVec()).remove()
127- table .update(null )
135+ tableHF .replace(tableHF .find(" target" ), tableHF .vec(" target" ).toCategoricalVec()).remove()
136+ tableHF .update(null )
128137
129138// Split table into training and validation parts
130139val keys = Array [String ](" train.hex" , " valid.hex" )
131140val ratios = Array [Double ](0.8 )
132- val frs = split(table , keys, ratios)
133- val (train, valid ) = (frs(0 ), frs(1 ))
134- table .delete()
141+ val frs = split(tableHF , keys, ratios)
142+ val (trainHF, validHF ) = (frs(0 ), frs(1 ))
143+ tableHF .delete()
135144
136145// Build final DeepLearning model
137- val dlModel = buildDLModel(train, valid )(h2oContext)
146+ val dlModel = buildDLModel(trainHF, validHF )(h2oContext)
138147
139148// Collect model metrics and evaluate model quality
140149import water .app .ModelMetricsSupport
141- val trainMetrics = ModelMetricsSupport .binomialMM(dlModel, train )
142- val validMetrics = ModelMetricsSupport .binomialMM(dlModel, valid )
150+ val trainMetrics = ModelMetricsSupport .binomialMM(dlModel, trainHF )
151+ val validMetrics = ModelMetricsSupport .binomialMM(dlModel, validHF )
143152println(trainMetrics.auc._auc)
144153println(validMetrics.auc._auc)
145154
0 commit comments