import pandas as pd
def import_data(dataName):
csv_path_full = dataName
print (csv_path_full)
return pd.read_csv(csv_path_full)
totalData = import_data("trim_QOL_dataset.csv")
import numpy as np
def split_train_test(data, test_ratio):
shuffled_indices = np.random.permutation(len(data))
test_set_size= int(len(data) * test_ratio)
test_indices = shuffled_indices[:test_set_size]
train_indices = shuffled_indices[test_set_size:]
return data.iloc[train_indices], data.iloc[test_indices]
train_set, test_set = split_train_test(totalData, 0.2)
#test_set
#testRow = test_set.loc[20]
#testRow
importSet = import_data("activeInputValues.csv")
testRow = importSet.loc[0]
#testRow
#test_set.iloc[] = testRow
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import Imputer
from sklearn.preprocessing import StandardScaler
def prep(X):
imputer = Imputer(missing_values='NaN', strategy='median')
imputer.fit(X)
imputed = imputer.transform(X)
scaler = StandardScaler()
scaler.fit(imputed)
prepped= scaler.transform(imputed)
return prepped
def dataScaler(X):
scaler = StandardScaler()
scaler.fit(X)
prepped= scaler.transform(X)
return prepped
def holderMake(testRow):
holder = train_set - train_set.iloc[1]
holder.loc[1] = testRow
#print(holder.loc[1])
imputer2 = Imputer(missing_values='NaN')
imputer2.fit(holder)
holderImp = imputer2.transform(holder)
holderImp
scaler = StandardScaler()
scaler.fit(holderImp)
preppy = scaler.transform(holderImp)
#print(holderImp)
return holderImp
#e prepTopred(X, label, out):
imputer = Imputer(missing_values='NaN')
imputer.fit(X)
imputed = imputer.transform(X)
scaler = StandardScaler()
scaler.fit(imputed)
prepped= scaler.transform(imputed)
#print(prepped)
found_labels = X[[label]].copy()
imputer2 =Imputer(missing_values='NaN')
imputer2.fit(found_labels)
labelImp = imputer2.transform(found_labels)
from sklearn.model_selection import GridSearchCV
param_grid = [
{'n_estimators': [3,10,30, 40, 50], 'max_features':[2,4,6,8, 10, 12, 14, 16]},
{'bootstrap': [False], 'n_estimators': [3,10, 30, 50, 50], 'max_features':[2,3,4, 6, 8, 10, 12, 14, 16]},
]
forest_reg2 = RandomForestRegressor()
grid_search = GridSearchCV(forest_reg2, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(prepped, labelImp.ravel())
#forest_reg = RandomForestRegressor()
#trained =forest_reg.fit(prepped, labelImp.ravel())
model = grid_search.best_estimator_
grid_search.best_params_
predicted = model.predict(out)
#print(predicted)
#output = predicted[0]
return model
preppy = holderMake(testRow)
#de prepTopred(X, label, out):
imputer = Imputer(missing_values='NaN')
imputer.fit(train_set)
imputed = imputer.transform(train_set)
scaler = StandardScaler()
scaler.fit(imputed)
prepped= scaler.transform(imputed)
#print(prepped)
found_labels = train_set[["Employment_Rate_2015"]].copy()
imputer2 =Imputer(missing_values='NaN')
imputer2.fit(found_labels)
labelImp = imputer2.transform(found_labels)
from sklearn.model_selection import GridSearchCV
param_grid = [
{'n_estimators': [3,10,30, 40, 50, 60, 70, 80, 90, 100], 'max_features':[2,4,6,8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]},
{'bootstrap': [False], 'n_estimators': [3,10, 30, 40, 50, 60, 70, 80, 90, 100], 'max_features':[2,3,4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]},
]
forest_reg2 = RandomForestRegressor()
grid_search = GridSearchCV(forest_reg2, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(prepped, labelImp.ravel())
#forest_reg = RandomForestRegressor()
#trained =forest_reg.fit(prepped, labelImp.ravel())
#model = grid_search.best_estimator_
grid_search.best_params_
#predicted = model.predict(out)
#print(predicted)
#output = predicted[0]
#return model
#predictedHouseChange = prepTopred(train_set, "lowerHouseChange", preppy)
#predictedComConstruct = prepTopred(train_set, "Commercial_Construction_2015", preppy)
#predictedIncome = prepTopred(train_set, "Household_Income_2015", preppy)
#predictedEmploy = prepTopred(train_set, "Employment_Rate_2015", preppy)
h = float(predictedHouseChange)
c = float(predictedComConstruct)
i = float(predictedIncome)
e = float(predictedEmploy)
IS = h + c + i + e
output = [h, c, i, e, IS]
import json
jdat = json.dumps(output)
#print(jsonData)
with open('modelOutput.json', 'w') as f:
json.dump(jsonData, f)
return null
#output = [h, c, i, e, IS]
#Below Values are the predicted output categories:
#Change in Median House Value, Commercial Construction, Household Income, Employment rate, and an aggregated score.
#actual values (as seen below): 16379.00, 0.55, 67361.00, 95.00
#import json
#jdat = json.dumps(output)
#print(jsonData)
#with open('modelOutput.json', 'w') as f:
#json.dump(jsonData, f)
#json.dump(jdat, f)
testRow