This dataset contains house sale prices for King County, which includes Seattle. It includes homes sold between May 2014 and May 2015.
import numpy as np
import pandas as pd
import seaborn as sns
sns.set(color_codes=True)
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import os
import time
# random state
SEED = 0
RNG = np.random.RandomState(SEED)
# Jupyter notebook settings for pandas
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 20) # None for all the rows
pd.set_option('display.max_colwidth', 50)
print([(x.__name__,x.__version__) for x in [np, pd,sns,matplotlib]])
[('numpy', '1.16.4'), ('pandas', '0.25.0'), ('seaborn', '0.9.0'), ('matplotlib', '3.1.1')]
import bokeh
import pixiedust
from pixiedust import display
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;
df = pd.read_csv('../data/raw/kc_house_data.csv')
print(df.shape)
df.head().T
(21613, 21)
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
id | 7129300520 | 6414100192 | 5631500400 | 2487200875 | 1954400510 |
date | 20141013T000000 | 20141209T000000 | 20150225T000000 | 20141209T000000 | 20150218T000000 |
price | 2.2e+05 | 5.4e+05 | 1.8e+05 | 6e+05 | 5.1e+05 |
bedrooms | 3 | 3 | 2 | 4 | 3 |
bathrooms | 1 | 2.2 | 1 | 3 | 2 |
sqft_living | 1180 | 2570 | 770 | 1960 | 1680 |
sqft_lot | 5650 | 7242 | 10000 | 5000 | 8080 |
floors | 1 | 2 | 1 | 1 | 1 |
waterfront | 0 | 0 | 0 | 0 | 0 |
view | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... |
grade | 7 | 7 | 6 | 7 | 8 |
sqft_above | 1180 | 2170 | 770 | 1050 | 1680 |
sqft_basement | 0 | 400 | 0 | 910 | 0 |
yr_built | 1955 | 1951 | 1933 | 1965 | 1987 |
yr_renovated | 0 | 1991 | 0 | 0 | 0 |
zipcode | 98178 | 98125 | 98028 | 98136 | 98074 |
lat | 48 | 48 | 48 | 48 | 48 |
long | -1.2e+02 | -1.2e+02 | -1.2e+02 | -1.2e+02 | -1.2e+02 |
sqft_living15 | 1340 | 1690 | 2720 | 1360 | 1800 |
sqft_lot15 | 5650 | 7639 | 8062 | 5000 | 7503 |
21 rows × 5 columns
display(df)