|
| 1 | +import os |
| 2 | +import pandas as pd |
| 3 | +import numpy as np |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +from sklearn.model_selection import train_test_split |
| 6 | +from sklearn.linear_model import LinearRegression |
| 7 | +from sklearn.linear_model import Ridge |
| 8 | +from sklearn.linear_model import Lasso |
| 9 | +from sklearn.preprocessing import normalize |
| 10 | +from sklearn import metrics |
| 11 | + |
| 12 | +def displaycoefs(coef_name): |
| 13 | + coef_name.sort() |
| 14 | + coef_name.reverse() |
| 15 | + for c,n in coef_name: |
| 16 | + print("\t%0.2g cycles per %s "%(c,n)) |
| 17 | + |
| 18 | +datafile = "modeltable.txt" ## from ./scripts/statisticalmodel.sh |
| 19 | + |
| 20 | +predictors = ["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "object_count", "array_count", "null_count", "true_count", "false_count", "byte_count", "structural_indexes_count"] |
| 21 | +targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count", "stage3_cycle_count", "stage3_instruction_count"] |
| 22 | + |
| 23 | +print("loading", datafile) |
| 24 | +dataset = pd.read_csv(datafile, delim_whitespace=True, skip_blank_lines=True, comment="#", header=None, names = predictors + targets) |
| 25 | + |
| 26 | + |
| 27 | +dataset.columns = predictors + targets |
| 28 | + |
| 29 | +dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']+dataset['stage3_cycle_count'] |
| 30 | +dataset['ratio']=dataset['total_cycles']/dataset['byte_count'] |
| 31 | +#print(dataset[['ratio']]) |
| 32 | + |
| 33 | +chosenpredictors = predictors #["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "byte_count", "structural_indexes_count"] |
| 34 | +print("chosenpredictors=",chosenpredictors) |
| 35 | +print() |
| 36 | +chosentargets=["stage1_cycle_count", "stage2_cycle_count", "stage3_cycle_count","total_cycles"] |
| 37 | +for t in chosentargets: |
| 38 | + print("target = ", t) |
| 39 | + howmany = 1 # we want at most one predictors |
| 40 | + if(t.startswith("stage2")): |
| 41 | + howmany = 2 # we allow for less |
| 42 | + if(t.startswith("stage3")): |
| 43 | + howmany = 3 # we allow for more |
| 44 | + if(t.startswith("total")): |
| 45 | + howmany = 3 # we allow for more |
| 46 | + A=10000000.0 |
| 47 | + while(True): |
| 48 | + regressor = Lasso(max_iter=100000, alpha=A, positive = True, normalize=False, fit_intercept=False) #LinearRegression(normalize=False, fit_intercept=False) |
| 49 | + x = dataset[chosenpredictors] |
| 50 | + y = dataset[[t]] |
| 51 | + regressor.fit(x, y) |
| 52 | + rest = list(filter(lambda z: z[0] != 0, zip(regressor.coef_,chosenpredictors) )) |
| 53 | + nonzero = len(rest) |
| 54 | + if(nonzero > howmany): |
| 55 | + A *= 1.2 |
| 56 | + else: |
| 57 | + #print(rest) |
| 58 | + displaycoefs(rest) |
| 59 | + print("R2 = ", regressor.score(x,y)) |
| 60 | + Y_pred = regressor.predict(x) |
| 61 | + break |
| 62 | + print() |
0 commit comments