Skip to content

Commit bd0e5a4

Browse files
committed
add implementation with Krangl library (Kotlin)
1 parent 810f99f commit bd0e5a4

File tree

2 files changed

+61
-6
lines changed

2 files changed

+61
-6
lines changed

build.gradle

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,17 @@
1+
buildscript {
2+
ext.kotlin_version = '1.3.61'
3+
repositories {
4+
mavenCentral()
5+
jcenter()
6+
}
7+
8+
dependencies {
9+
classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version"
10+
}
11+
}
12+
113
apply plugin: 'java'
14+
apply plugin: 'kotlin'
215
apply plugin: 'eclipse'
316

417
sourceCompatibility = 1.8
@@ -9,15 +22,18 @@ repositories {
922
}
1023

1124
dependencies {
12-
compile 'tech.tablesaw:tablesaw-core:0.37.3'
25+
implementation 'tech.tablesaw:tablesaw-core:0.37.3'
1326

14-
compile 'joinery:joinery-dataframe:1.9'
27+
implementation 'joinery:joinery-dataframe:1.9'
1528
// For the CSV import joinery needs this dependency too:
16-
compile 'org.apache.poi:poi:3.17'
29+
implementation 'org.apache.poi:poi:3.17'
30+
31+
implementation 'com.zavtech:morpheus-core:0.9.21'
1732

18-
compile 'com.zavtech:morpheus-core:0.9.21'
33+
implementation 'org.datavec:datavec-api:1.0.0-beta2'
34+
implementation 'org.datavec:datavec-local:1.0.0-beta2'
1935

20-
compile 'org.datavec:datavec-api:1.0.0-beta2'
21-
compile 'org.datavec:datavec-local:1.0.0-beta2'
36+
implementation "de.mpicbg.scicomp:krangl:0.11"
37+
implementation "org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlin_version"
2238
}
2339

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package test_dataframes
2+
3+
import com.google.common.base.Stopwatch
4+
import krangl.*
5+
6+
/**
7+
* Test the API of krangl to do some basic dataframe manipulations.
8+
*
9+
* https://github.com/holgerbrandl/krangl
10+
*
11+
* See https://medium.com/@thijser/doing-cool-data-science-in-java-how-3-dataframe-libraries-stack-up-5e6ccb7b437
12+
* for more information.
13+
*/
14+
fun main() {
15+
val data = DataFrame.readCSV("urb_cpop1_1_Data.csv")
16+
17+
val watch = Stopwatch.createStarted()
18+
// remove missing values indicated with ":", convert column to IntCol
19+
val filtered = data.filter { !(it["Value"] eq ":") }.addColumn("Value") {
20+
it["Value"].map(String::toInt)
21+
}
22+
// replace duplicated rows with mean value, create pivot table
23+
val cities = filtered.groupBy("CITIES", "INDIC_UR", "TIME")
24+
.summarize("Value" to { it["Value"].mean() })
25+
.spread("TIME", "Value").filter {
26+
it["INDIC_UR"].isMatching<String> { endsWith("January, total") }
27+
}
28+
29+
println(cities.select("CITIES", "2017").sortedByDescending("2017").head(10))
30+
31+
val highestGrowthTable = cities.addColumn("growth") {
32+
(it["2016"] / it["2010"] - 1.0) * 100.0
33+
}.sortedByDescending("growth")
34+
35+
println(highestGrowthTable.select("CITIES", "growth").head(10))
36+
37+
CheckResult.checkResult(highestGrowthTable["CITIES"].asStrings().toList())
38+
println("Total time: $watch")
39+
}

0 commit comments

Comments
 (0)