import time
time_start_notebook = time.time()


%%capture
import os
import sys
ENV_COLAB = 'google.colab' in sys.modules

if ENV_COLAB:
    ## install modules
    !pip install watermark

    # model evaluation
    !pip install shap
    !pip install eli5
    !pip install lime
    !pip install duecredit # forestci needs this
    !pip install forestci # fci.random_forest_error(model_rf, Xtrain,Xtest)
    !pip install dtreeviz # decision tree viz
    
    # update modules
    !pip install -U scikit-learn # we need restart
    import sklearn

    # update pandas profiling
    # profile = df.profile_report(style={'full_width':True})
    # profile.to_file(output_file="output.html")
    !pip install -U pandas-profiling # we need restart
    import pandas_profiling

    # Note: We need to restart kernel to use tqdm
    # from tqdm.notebook import trange, tqdm
    # tqdm.pandas()
    # out = df['A'].progress_apply(myfunc)
    !pip install -U tqdm

    # print
    print('Environment: Google Colaboratory.')

# NOTE: If we update modules in gcolab, we need to restart runtime.


# usual imports
import numpy as np
import pandas as pd


# visualization
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# modelling
import sklearn
from sklearn import ensemble

# mixed
import os
import time
import tqdm

# random state
SEED = 0
RNG = np.random.RandomState(SEED)

# ipython
import IPython
from IPython.display import display, HTML, Image, Markdown

# model eval
import shap
import lime
import eli5
import yellowbrick
import pandas_profiling
from pandas_profiling import ProfileReport
import dtreeviz
import forestci
import forestci as fci
import pydotplus
from eli5 import show_weights
from eli5 import show_prediction
import lime.lime_tabular

# versions
import watermark
%load_ext watermark
%watermark -a "Bhishan Poudel" -d -v -m
print()
%watermark -iv

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Bhishan Poudel 2020-11-23 

CPython 3.7.9
IPython 7.19.0

compiler   : Clang 10.0.0 
system     : Darwin
release    : 19.6.0
machine    : x86_64
processor  : i386
CPU cores  : 4
interpreter: 64bit

seaborn          0.11.0
matplotlib       3.3.3
IPython          7.19.0
sklearn          0.23.2
yellowbrick      1.2
pandas_profiling 2.9.0
forestci         0.4.1
tqdm             4.53.0
shap             0.34.0
json             2.0.9
eli5             0.10.1
numpy            1.18.5
watermark        2.0.2
pandas           1.1.4


def show_methods(obj, ncols=4):
    lst = [i for i in dir(obj) if i[0]!='_' ]
    df = pd.DataFrame(np.array_split(lst,ncols)).T.fillna('')
    return df


if ENV_COLAB:
    path_raw = 'https://raw.githubusercontent.com/bhishanpdl/Datasets/master/'
    proj = 'Projects/King_County_Seattle_House_Price_Kaggle/'
    data_path_parent = path_raw + proj
    data_path_train = data_path_parent + 'raw/train.csv'
    data_path_test = data_path_parent + 'raw/test.csv'

else:
    data_path_parent = '../data/'
    data_path_train = data_path_parent + 'raw/train.csv'
    data_path_test = data_path_parent + 'raw/test.csv'

target = 'price'
train_size = 0.8

print(data_path_train)

../data/raw/train.csv


df_train = pd.read_csv(data_path_train)
df_test = pd.read_csv(data_path_test)
print(df_train.shape)
print(df_train.columns)

display(df_train.head(2).append(df_train.tail(2)))

(17290, 21)
Index(['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
       'lat', 'long', 'sqft_living15', 'sqft_lot15'],
      dtype='object')


df_train.dtypes

id                 int64
date              object
price            float64
bedrooms           int64
bathrooms        float64
sqft_living        int64
sqft_lot           int64
floors           float64
waterfront         int64
view               int64
condition          int64
grade              int64
sqft_above         int64
sqft_basement      int64
yr_built           int64
yr_renovated       int64
zipcode            int64
lat              float64
long             float64
sqft_living15      int64
sqft_lot15         int64
dtype: object


cols_drop = ['id','date']

df_train = df_train.drop(cols_drop,axis=1)
df_test = df_test.drop(cols_drop,axis=1)


df_Xtrain = df_train.drop(target,axis=1)
df_Xtest  = df_test.drop(target, axis=1)

Xtrain = np.array(df_Xtrain)
Xtest  = np.array(df_Xtest)

ser_ytrain = df_train[target]
ser_ytest  = df_test[target]

ytrain = np.array(ser_ytrain).flatten()
ytest  = np.array(ser_ytest).flatten()


%%writefile get_report_pandas_profiling.py
#!/usr/bin/env python
import pandas as pd
import pandas_profiling

def get_report_pandas_profiling(ifile,ofile):
    # Data
    df = pd.read_csv(ifile)
    profile = pandas_profiling.ProfileReport(df)
    profile.to_file(ofile)

if __name__ == '__main__':
    ifile = '../data/raw/kc_house_data.csv'
    ofile = '../reports/report_pandas_profiling.html'
    get_sweetviz_report(ifile,ofile)

Writing get_report_pandas_profiling.py


%%writefile get_report_sweetviz.py
#!/usr/bin/env python
import pandas as pd
import sweetviz

def get_report_sweetviz(ifile,ofile):
    # config
    sweetviz.config_parser.read_string("[Layout]\nshow_logo=0")

    # Data
    df = pd.read_csv(ifile)
    print(f'shape: {df.shape}')

    my_report = sweetviz.analyze([df,'Full data'])
    my_report.show_html(ofile)

if __name__ == '__main__':
    ifile = '../data/raw/kc_house_data.csv'
    ofile = '../reports/report_sweetviz.html'
    get_report_sweetvis(ifile,ofile)

Overwriting get_report_sweetviz.py


model = sklearn.ensemble.RandomForestRegressor(n_estimators=100,
                                             random_state=SEED)

model.fit(Xtrain, ytrain)

RandomForestRegressor(random_state=100)


from sklearn.metrics import mean_squared_error

mse_train = mean_squared_error(ytrain, model.predict(Xtrain))
mse_test = mean_squared_error(ytest, model.predict(Xtest))

print(f'Random Forest mean-squared error on train set: {mse_train:.5f}')
print(f'Random Forest mean-squared error on test  set: {mse_test:.5f}')

Random Forest mean-squared error on train set: 2390088635.38711
Random Forest mean-squared error on test  set: 15884522785.79567


from yellowbrick.regressor import PredictionError, ResidualsPlot

# resuduals vs predicted values
fig, ax = plt.subplots(figsize=(8,6)); 
visualizer = ResidualsPlot(model, ax=ax)

visualizer.fit(Xtrain, ytrain)  
visualizer.score(Xtest, ytest) 


g = visualizer.poof()


ser_test_resids = pd.Series(model.predict(Xtest) - ytest)

ax = ser_test_resids.plot(kind="hist", bins=10,
                          title="Residuals on Predicted", color='g', alpha=1);
plt.show()


# residuals vs true values

dfx = pd.DataFrame({'Residual': ser_test_resids.to_numpy(), 'Truth': ytest})
fig, ax = plt.subplots(figsize=(8,6)); 

dfx.plot(kind="scatter", x='Truth', y='Residual', ax=ax,
         title="Residual vs Truth");

*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2-D array with a single row if you intend to specify the same RGB or RGBA value for all points.


# if predictions are close to truth, we get 45 deg line
fig, ax = plt.subplots(figsize=(8,6)); 

visualizer = PredictionError(model, ax=ax)

visualizer.fit(Xtrain, ytrain)
visualizer.score(Xtest, ytest)
g = visualizer.poof()


import forestci as fci


V_IJ_unbiased = fci.random_forest_error(model, Xtrain, Xtest)

ypreds = model.predict(Xtest)

fig, ax = plt.subplots(figsize=(8,6)); 
ax.errorbar(ytest, ypreds, yerr=np.sqrt(V_IJ_unbiased), fmt='o');

ax.set(title="Truth vs Predicted from test set with an estimate of variance on the error bars",
       xlabel="ytest (truth)",
       ylabel="ypreds (prediction)");
plt.plot([0, 50], [0, 50], '--', label="Identity");
plt.legend();

overflow encountered in exp
invalid value encountered in true_divide
Warning: converting a masked element to nan.


# If we build estimators on bootstrap samples of the original training set and
# test them all on the same test set, we can see
# if certain parts of the regression range are more variable. 
# This might suggest that our original training data isn't very good
# at properly defining these parts of the regression range.


%%time

# fixed test set, subsampled training set
# need to identify where the most variance exists
n_items = Xtrain.shape[0]
N_BOOTSTRAP_MODELS = 20

ser_ypreds_subsample = pd.DataFrame({'price': ytest}) 

import copy
model_subsample = copy.copy(model)

for n in range(N_BOOTSTRAP_MODELS):
    train_mask = np.random.uniform(0, n_items, int(n_items * 0.9)).astype(np.int_)
    df_Xtrain_masked = df_Xtrain.iloc[train_mask]
    ser_ytrain_masked = ser_ytrain.iloc[train_mask]
    
    model_subsample.fit(df_Xtrain_masked, ser_ytrain_masked)
    
    mse_train = mean_squared_error(ser_ytrain_masked, model_subsample.predict(df_Xtrain_masked))
    ypreds = model_subsample.predict(Xtest)
    ser_ypreds_subsample[f'prediction_{n}'] = ypreds
    mse_test = mean_squared_error(ytest, ypreds)


fig, ax = plt.subplots()
ser_ypreds_subsample = ser_ypreds_subsample.sort_values(target).reset_index(drop=True)
ser_ypreds_subsample.drop([target], axis=1).plot(ax=ax, legend=False);
ser_ypreds_subsample[target].plot(ax=ax, label=target, legend=True)
ax.set(title="{} models from subsampled training data,\
 predicting on the same test data\nsorted by increasing price".format(N_BOOTSTRAP_MODELS));

plt.show()


ser_ypreds_subsample.head()
# we do not have much variance


fig, ax = plt.subplots()
ser_ypreds_subsample[target].plot(ax=ax, secondary_y=False,
                                  label=target, legend=True)


ser_ypreds_subsample.drop([target], axis=1).var(axis=1).plot(
    ax=ax, label="Variance", legend=True, secondary_y=True);


ax.set(title="Variance of subsampled models on the same test data, by Price");


example_to_explain_idx = 14
example_to_explain = df_Xtest.iloc[example_to_explain_idx]
example_to_explain_true_answer = ser_ytest.iloc[example_to_explain_idx]
feature_names = df_Xtrain.columns.tolist()

print(f"Explaining the {example_to_explain_idx}th row from the testing set")

Explaining the 14th row from the testing set


print("The answer we're looking for is: ", example_to_explain_true_answer)
print("The predicted answer is:", float(model.predict(example_to_explain.values.reshape(-1, 1).T)))
print("The input data X is: ")

The answer we're looking for is:  405000.0
The predicted answer is: 427318.5
The input data X is:


pd.DataFrame(example_to_explain)


df_Xtrain.apply(pd.Series.nunique).loc[lambda x: x <10]

floors        6
waterfront    2
view          5
condition     5
dtype: int64


categorical_features = df_Xtrain.apply(pd.Series.nunique).loc[lambda x: x <10].index.to_list()
categorical_features


categorical_features_idx = [df_Xtrain.columns.get_loc(x)
                            for x in categorical_features]
categorical_features_idx


feature_names = df_Xtrain.columns.to_list()
categorical_features = categorical_features_idx
explainer = lime.lime_tabular.LimeTabularExplainer(Xtrain, 
                    feature_names=feature_names, 
                    class_names=['price'], 
                    categorical_features=categorical_features_idx, 
                    verbose=True, 
                    mode='regression')

# Lime Uses perturbed data neighborhood_data and neighborhood_labels
# Intercept is the generated linear model's intercept
# Prediction_local is the predicted output from the linear model
# Right is the predicted value from the explained regressor (not LIME's linear model)


exp = explainer.explain_instance(example_to_explain, model.predict, num_features=10)

Intercept 935578.71671716
Prediction_local [322517.60288523]
Right: 427318.5


exp.show_in_notebook(show_table=True)


exp.as_pyplot_figure()
# note that the double-plot is a bug: https://github.com/marcotcr/lime/issues/89


lst = exp.as_list()
lst


pd.DataFrame(lst)


import eli5
from eli5 import show_weights
from eli5 import show_prediction


print("BIAS is the mean of the training data (i.e. a guess prior to using any features):", ytrain.mean())

BIAS is the mean of the training data (i.e. a guess prior to using any features): 539524.7533834586


model

RandomForestRegressor(random_state=100)


feature_names = df_Xtrain.columns.to_list()

show_prediction(model, 
                example_to_explain,
                feature_names=feature_names, 
                show_feature_values=True)


df_imp = pd.DataFrame({'feature_importances_': model.feature_importances_}, index=feature_names)
df_imp.sort_values(by="feature_importances_", ascending=False).head(10).round(4)


# using eli5 importance visualizer
# Note that the +/- value assumes a Gaussian and the boxplot below shows that this isn't true


show_weights(model, feature_names=feature_names)


from eli5.sklearn import PermutationImportance

perm = PermutationImportance(model).fit(Xtest, ytest)
eli5.show_weights(perm, feature_names=feature_names)


feature_names = df_Xtrain.columns.to_list()

df_imp = pd.DataFrame()

for est_idx, est_tree in enumerate(model.estimators_):
    df_imp["tree_{}".format(est_idx)] = est_tree.feature_importances_

df_imp.index = feature_names
df_imp = df_imp.T

sorted_index = df_imp.mean(axis=0).sort_values().index

fig, ax = plt.subplots(figsize=(8,6)); 
df_imp[sorted_index].plot(kind="box", vert=False, ax=ax, title="Feature importance distributions");
ax.set_xlabel("Importance")

# remove right/top border to make things slightly neater
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')

# visual tidy-up to make left axis small values slightly easier to read
# offset left and bottom axis
ax.spines['bottom'].set_position(('axes', -0.05))
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('axes', -0.05))


fig, ax = plt.subplots(figsize=(8,6)); 
sns.stripplot(data=df_imp[sorted_index[::-1]], jitter=0.05, orient="h", ax=ax, edgecolor="k", linewidth=1);
sns.boxplot(data=df_imp[sorted_index[::-1]], orient="h", ax=ax);
ax.set_title("Feature importance distributions");
ax.set_xlabel("Importance");


show_weights(model, feature_names=feature_names, show="description")


est_depth_3 = sklearn.ensemble.RandomForestRegressor(n_estimators=10,
                                             max_depth=3,
                                             random_state=0)

print("Sizes for train {}, test {}".format(Xtrain.shape, Xtest.shape))
est_depth_3.fit(Xtrain, ytrain)


est_tree0 = est_depth_3.estimators_[0]

Sizes for train (17290, 18), test (4323, 18)


show_weights(est_tree0, feature_names=feature_names)



Tree


0

grade <= 9.5
mse = 134288139790.765
samples = 100.0%
value = 535594.228


1

lat <= 47.534
mse = 55472788860.881
samples = 92.4%
value = 474242.924


0->1


True


8

sqft_living <= 4425.0
mse = 492891064499.102
samples = 7.6%
value = 1271809.886


0->8


False


2

sqft_living <= 2145.0
mse = 18259188200.985
samples = 37.0%
value = 331376.796


1->2


5

sqft_living <= 2185.0
mse = 57529482042.694
samples = 55.4%
value = 570485.793


1->5


3

mse = 8515506675.512
samples = 25.6%
value = 283457.276


2->3


4

mse = 23155828759.067
samples = 11.4%
value = 442874.673


2->4


6

mse = 23204994431.151
samples = 36.7%
value = 480112.999


5->6


7

mse = 77164736994.132
samples = 18.7%
value = 751373.612


5->7


9

long <= -122.189
mse = 211890458650.779
samples = 5.9%
value = 1078759.132


8->9


12

waterfront <= 0.5
mse = 890853190163.058
samples = 1.6%
value = 1926143.63


8->12


10

mse = 306698535315.563
samples = 2.4%
value = 1343505.554


9->10


11

mse = 61287443548.459
samples = 3.5%
value = 892602.444


9->11


13

mse = 606412597599.894
samples = 1.5%
value = 1751404.133


12->13


14

mse = 851243175898.438
samples = 0.1%
value = 3405968.75


12->14


est_tree1 = est_depth_3.estimators_[1]
show_weights(est_tree1, feature_names=feature_names)



Tree


0

grade <= 8.5
mse = 129027579754.244
samples = 100.0%
value = 538322.924


1

lat <= 47.534
mse = 37077038048.873
samples = 80.3%
value = 437357.816


0->1


True


8

sqft_living <= 4275.0
mse = 292889324928.851
samples = 19.7%
value = 951097.284


0->8


False


2

sqft_living <= 2032.0
mse = 12972644140.395
samples = 32.9%
value = 312563.507


1->2


5

sqft_living <= 2025.0
mse = 35456606727.987
samples = 47.4%
value = 524344.908


1->5


3

mse = 7140829129.663
samples = 22.7%
value = 273631.86


2->3


4

mse = 15057829366.52
samples = 10.2%
value = 399421.202


2->4


6

mse = 19353359704.06
samples = 31.6%
value = 462257.649


5->6


7

mse = 44463246676.031
samples = 15.7%
value = 649871.639


5->7


9

lat <= 47.517
mse = 142500105516.347
samples = 17.4%
value = 856756.065


8->9


12

long <= -122.201
mse = 855207110682.926
samples = 2.4%
value = 1700365.179


8->12


10

mse = 37414912350.087
samples = 3.5%
value = 550263.325


9->10


11

mse = 139122354921.233
samples = 13.8%
value = 935838.602


9->11


13

mse = 1108358414236.679
samples = 1.0%
value = 2269734.706


12->13


14

mse = 175396755655.057
samples = 1.3%
value = 1239446.99


12->14


from sklearn.tree import export_graphviz
from sklearn.ensemble import RandomForestRegressor
from subprocess import call

model = RandomForestRegressor(n_estimators=10,random_state=SEED)
model.fit(Xtrain,ytrain)
last_decision_tree = model.estimators_[-1]

export_graphviz(last_decision_tree, out_file='tree.dot', 
                feature_names = feature_names,
                class_names = target,
                rounded = True,
                proportion = False, 
                precision = 2,
                filled = True)

# NOTE: The image is too small, its useless

# Export as dot file
# call(['dot', '-Tpng', 'tree.dot', '-o', 'tree.png', '-Gdpi=600'])


# Display in jupyter notebook
# from IPython.display import Image
# Image(filename = 'tree.png')


import pydotplus

# Create DOT data
dot_data = sklearn.tree.export_graphviz(
    last_decision_tree,
    out_file=None, 
    feature_names=feature_names,  
    class_names=target)

# # Draw graph
# graph = pydotplus.graph_from_dot_data(dot_data)  

# # Show graph
# Image(graph.create_png())

# NOTE: The image is too BIG, its useless

dot: graph is too large for cairo-renderer bitmaps. Scaling by 0.0580735 to fit


# # Create PDF
# graph.write_pdf("forest.pdf")

# # Create PNG
# graph.write_png("forest.png")

dot: graph is too large for cairo-renderer bitmaps. Scaling by 0.0580735 to fit

True

	id	date	price	bedrooms	bathrooms	sqft_living	sqft_lot	floors	view	...	grade	sqft_above	sqft_basement	yr_built	yr_renovated	zipcode	lat	long	sqft_living15	sqft_lot15
0	2561340020	20140804T000000	325000.0	3	1.75	1780	11096	1.0	0	...	7	1210	570	1979	0	98074	47.6170	-122.051	1780	10640
1	8598200070	20141208T000000	278000.0	2	2.50	1420	2229	2.0	0	...	7	1420	0	2004	0	98059	47.4871	-122.165	1500	2230
17288	7174800760	20140725T000000	667000.0	5	2.00	1900	5470	1.0	0	...	7	1180	720	1930	1965	98105	47.6666	-122.303	1300	3250
17289	9521100280	20140612T000000	480000.0	3	2.50	1250	1103	3.0	2	...	8	1250	0	2005	0	98103	47.6619	-122.352	1250	1188

	price	prediction_0	prediction_1	prediction_2	prediction_3	prediction_4	prediction_5	prediction_6	prediction_7	prediction_8	...	prediction_10	prediction_11	prediction_12	prediction_13	prediction_14	prediction_15	prediction_16	prediction_17	prediction_18	prediction_19
0	82500.0	142149.200000	139748.0	198321.43	132443.50	113340.500000	151062.00	192610.600000	160423.733333	210922.600000	...	150935.50	164147.983333	149175.771429	178948.90	168335.733333	182367.700000	150591.083333	168500.00	157423.800000	193492.25
1	84000.0	219204.200000	199347.3	222482.40	194468.20	163114.583333	200623.21	204826.233333	218428.776667	219307.516667	...	170443.50	197598.816667	173947.057143	197591.30	194979.900000	207897.000000	188478.261905	174044.50	164710.050000	240378.10
2	92000.0	150097.840000	158602.0	169153.64	153096.52	134874.000000	164636.18	153371.130000	193290.770000	155654.910000	...	136652.63	154650.000000	163976.500000	157817.25	143193.670000	150760.784286	152514.630000	140313.25	133347.750000	146495.13
3	95000.0	153051.733333	172824.3	161808.25	173089.63	150396.500000	149936.00	151005.500000	152251.930000	177271.133333	...	173601.66	135316.250000	143232.944444	163906.95	164266.000000	151310.500000	159131.250000	156467.68	169188.464286	181192.74
4	105500.0	357380.750000	141086.5	418092.00	326018.50	138257.150000	388501.00	167553.000000	367720.500000	400104.450000	...	320318.50	181887.500000	174659.500000	335607.50	344823.000000	348528.400000	167737.000000	159588.80	149971.920000	170711.65

	0	1
0	waterfront=0	-253007.615387
1	sqft_living <= 1420.00	-153878.482473
2	grade <= 7.00	-115284.938503
3	long > -122.12	-55098.249840
4	view=0	-34502.575039
5	sqft_lot > 10676.50	32499.361684
6	sqft_lot15 > 10078.75	26676.358222
7	47.47 < lat <= 47.57	-25924.353979
8	bathrooms <= 1.50	-21432.820346
9	condition=3	-13107.798171

Contribution^?	Feature	Value
+539287.119	<BIAS>	1.000
+38992.169	sqft_lot	12500.000
+25099.262	zipcode	98027.000
+22975.171	sqft_living15	2310.000
+12624.691	long	-122.051
+11478.805	sqft_above	1330.000
+1338.927	condition	3.000
+308.378	bedrooms	3.000
-81.900	yr_renovated	0.000
-578.915	sqft_basement	0.000
-746.061	floors	1.000
-848.045	waterfront	0.000
-1290.447	bathrooms	1.500
-1413.067	yr_built	1966.000
-2513.542	sqft_lot15	12500.000
-5089.293	view	0.000
-62719.059	sqft_living	1330.000
-69610.967	lat	47.526
-79894.727	grade	7.000

Table of Contents

Data Description¶

Load the libraries¶

Useful Functions¶

Parameters¶

Load the Data¶

Pandas Profiling¶

Sweetviz¶

Modelling: Random Forest¶

Yellowbrick Visualization¶

Prediction Error vs Truth¶

Random Forest Confidence Interval¶

Model Explanation Using Lime¶

Model Intrepretation using ELI5¶

Feature Importances¶

ELI5's Permutation Importance on the same features¶

Feature importance as a box plot¶

Weights of a tree in a small forest¶

sklearn Random Forest plot tree using graphviz¶

	14
bedrooms	3.0000
bathrooms	1.5000
sqft_living	1330.0000
sqft_lot	12500.0000
floors	1.0000
waterfront	0.0000
view	0.0000
condition	3.0000
grade	7.0000
sqft_above	1330.0000
sqft_basement	0.0000
yr_built	1966.0000
yr_renovated	0.0000
zipcode	98027.0000
lat	47.5263
long	-122.0510
sqft_living15	2310.0000
sqft_lot15	12500.0000

	feature_importances_
grade	0.3294
sqft_living	0.2477
lat	0.1572
long	0.0698
waterfront	0.0336
sqft_living15	0.0307
yr_built	0.0283
sqft_above	0.0239
zipcode	0.0148
sqft_lot15	0.0139

Weight	Feature
0.3294 ± 0.1489	grade
0.2477 ± 0.1551	sqft_living
0.1572 ± 0.0242	lat
0.0698 ± 0.0221	long
0.0336 ± 0.0234	waterfront
0.0307 ± 0.0108	sqft_living15
0.0283 ± 0.0215	yr_built
0.0239 ± 0.0306	sqft_above
0.0148 ± 0.0080	zipcode
0.0139 ± 0.0084	sqft_lot15
0.0138 ± 0.0092	sqft_lot
0.0117 ± 0.0181	view
0.0088 ± 0.0158	bathrooms
0.0056 ± 0.0062	sqft_basement
0.0034 ± 0.0042	bedrooms
0.0032 ± 0.0034	condition
0.0023 ± 0.0068	yr_renovated
0.0021 ± 0.0027	floors

Weight	Feature
0.3548 ± 0.0235	lat
0.2660 ± 0.0234	sqft_living
0.2204 ± 0.0335	grade
0.1894 ± 0.0151	long
0.0284 ± 0.0021	yr_built
0.0236 ± 0.0043	sqft_living15
0.0232 ± 0.0088	waterfront
0.0129 ± 0.0008	zipcode
0.0089 ± 0.0011	sqft_above
0.0068 ± 0.0006	sqft_lot
0.0068 ± 0.0010	view
0.0044 ± 0.0009	sqft_lot15
0.0013 ± 0.0005	condition
0.0005 ± 0.0003	floors
0.0003 ± 0.0006	bathrooms
-0.0001 ± 0.0002	yr_renovated
-0.0008 ± 0.0006	sqft_basement
-0.0008 ± 0.0004	bedrooms

Weight	Feature
0.5250	grade
0.2408	sqft_living
0.1475	lat
0.0527	waterfront
0.0340	long
0	condition
0	bathrooms
0	sqft_lot
0	floors
0	view
0	sqft_lot15
0	sqft_living15
0	sqft_above
0	sqft_basement
0	yr_built
0	yr_renovated
0	zipcode
0	bedrooms

Weight	Feature
0.5269	grade
0.2364	sqft_living
0.1638	lat
0.0729	long
0	condition
0	bathrooms
0	sqft_lot
0	floors
0	waterfront
0	view
0	sqft_lot15
0	sqft_living15
0	sqft_above
0	sqft_basement
0	yr_built
0	yr_renovated
0	zipcode
0	bedrooms