forked from apache/systemds
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenDescriptiveStatisticsData.sh
More file actions
executable file
·46 lines (38 loc) · 2.68 KB
/
genDescriptiveStatisticsData.sh
File metadata and controls
executable file
·46 lines (38 loc) · 2.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/bin/bash
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
if [ "$2" == "SPARK" ]; then CMD="./sparkDML.sh "; DASH="-"; elif [ "$2" == "MR" ]; then CMD="hadoop jar SystemML.jar " ; else CMD="echo " ; fi
FORMAT="binary"
BASE=$1/bivar
export HADOOP_CLIENT_OPTS="-Xmx2048m -Xms2048m -Xmn256m"
c=1000
nc=100
mdomain=1100
set=20
labelset=10
#XS data 10K rows
${CMD} -f ../datagen/genRandData4DescriptiveStats.dml $DASH-explain $DASH-stats $DASH-nvargs R=10000 C=$c NC=$nc MAXDOMAIN=$mdomain DATA=${BASE}/A_10k/data TYPES=${BASE}/A_10k/types SETSIZE=$set LABELSETSIZE=$labelset TYPES1=${BASE}/A_10k/set1.types TYPES2=${BASE}/A_10k/set2.types INDEX1=${BASE}/A_10k/set1.indices INDEX2=${BASE}/A_10k/set2.indices FMT=$FORMAT
#S data 100K rows
#${CMD} -f ../datagen/genRandData4DescriptiveStats.dml $DASH-explain $DASH-stats $DASH-nvargs R=100000 C=$c NC=$nc MAXDOMAIN=$mdomain DATA=${BASE}/A_100k/data TYPES=${BASE}/A_100k/types SETSIZE=$set LABELSETSIZE=$labelset TYPES1=${BASE}/A_100k/set1.types TYPES2=${BASE}/A_100k/set2.types INDEX1=${BASE}/A_100k/set1.indices INDEX2=${BASE}/A_100k/set2.indices FMT=$FORMAT
#M data 1M rows
#${CMD} -f ../datagen/genRandData4DescriptiveStats.dml $DASH-explain $DASH-stats $DASH-nvargs R=1000000 C=$c NC=$nc MAXDOMAIN=$mdomain DATA=${BASE}/A_1M/data TYPES=${BASE}/A_1M/types SETSIZE=$set LABELSETSIZE=$labelset TYPES1=${BASE}/A_1M/set1.types TYPES2=${BASE}/A_1M/set2.types INDEX1=${BASE}/A_1M/set1.indices INDEX2=${BASE}/A_1M/set2.indices FMT=$FORMAT
#L data 10M rows
#${CMD} -f ../datagen/genRandData4DescriptiveStats.dml $DASH-explain $DASH-stats $DASH-nvargs R=10000000 C=$c NC=$nc MAXDOMAIN=$mdomain DATA=${BASE}/A_10M/data TYPES=${BASE}/A_10M/types SETSIZE=$set LABELSETSIZE=$labelset TYPES1=${BASE}/A_10M/set1.types TYPES2=${BASE}/A_10M/set2.types INDEX1=${BASE}/A_10M/set1.indices INDEX2=${BASE}/A_10M/set2.indices FMT=$FORMAT