import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
import pandas as pd
boston = load_boston()
X = boston['data'] # boston.data
y = boston['target'] # boston.target
features = boston['feature_names'] # boston.feature_names
# DataFrame으로 변환
boston_df = pd.DataFrame(X, columns = features, index= None)
boston_df['Price'] = y
print(boston_df.head())
print(boston_df.shape)
print(boston_df.describe())
CRIM ZN INDUS CHAS NOX ... TAX PTRATIO B LSTAT Price
0 0.00632 18.0 2.31 0.0 0.538 ... 296.0 15.3 396.90 4.98 24.0
1 0.02731 0.0 7.07 0.0 0.469 ... 242.0 17.8 396.90 9.14 21.6
2 0.02729 0.0 7.07 0.0 0.469 ... 242.0 17.8 392.83 4.03 34.7
3 0.03237 0.0 2.18 0.0 0.458 ... 222.0 18.7 394.63 2.94 33.4
4 0.06905 0.0 2.18 0.0 0.458 ... 222.0 18.7 396.90 5.33 36.2
(506, 14)
CRIM ZN INDUS ... B LSTAT Price
count 506.000000 506.000000 506.000000 ... 506.000000 506.000000 506.000000
mean 3.613524 11.363636 11.136779 ... 356.674032 12.653063 22.532806
std 8.601545 23.322453 6.860353 ... 91.294864 7.141062 9.197104
min 0.006320 0.000000 0.460000 ... 0.320000 1.730000 5.000000
25% 0.082045 0.000000 5.190000 ... 375.377500 6.950000 17.025000
50% 0.256510 0.000000 9.690000 ... 391.440000 11.360000 21.200000
75% 3.677083 12.500000 18.100000 ... 396.225000 16.955000 25.000000
max 88.976200 100.000000 27.740000 ... 396.900000 37.970000 50.000000
columns = ['LSTAT', 'INDUS', 'NOX', 'RM', 'Price']
subset_df = boston_df[columns]
sns.pairplot(subset_df)
plt.show()