# 챌린저호의 사고 조사 데이터!!
launch <- read.csv(file = 'mlwr/challenger.csv')
str(launch)
head(launch)
summary(launch)
# 단순 선형 회귀(distress_ct ~ temperature)
plot(x = launch$temperature, y = launch$distress_ct)
lm_launch <- lm(formula=distress_ct ~ temperature, data = launch )
summary(lm_launch)
Call:
lm(formula = distress_ct ~ temperature, data = launch)
Residuals:
Min 1Q Median 3Q Max
-0.5608 -0.3944 -0.0854 0.1056 1.8671
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.69841 1.21951 3.033 0.00633 ** <유의수준 0.01 -> 유의미
temperature -0.04754 0.01744 -2.725 0.01268 * <유의수준 0.05 -> 유의미
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.5774 on 21 degrees of freedom
Multiple R-squared: 0.2613, Adjusted R-squared: 0.2261 데이터 중 22% 설명 / adjusted 인거나 아닌거나 비슷함.
F-statistic: 7.426 on 1 and 21 DF, p-value: 0.01268
> a <- lm_launch$coefficients[1] # 선형모델의 y절편
> a
(Intercept)
3.698413
> b <- lm_launch$coefficients[2] # 선형 모델의 기울기
> b
temperature
-0.04753968
abline(a= a, b= b, col= 'blue')
# 다중선형 회귀(multiple linear regression
# y ~ x1 + x2 + x3 + ...
str(launch)
lm_launch <- lm(formula = distress_ct ~ ., data= launch )
summary(lm_launch)
Call:
lm(formula = distress_ct ~ ., data = launch) # '. ' 테이블의 모든 변수
Residuals:
Min 1Q Median 3Q Max
-0.65003 -0.24414 -0.11219 0.01279 1.67530
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.527093 1.307024 2.699 0.0142 * # 유의수준 0.05
temperature -0.051386 0.018341 -2.802 0.0114 * # 온도와의 절대값이 가장 큼 -> 상관관계가 가장 큼
field_check_pressure 0.001757 0.003402 0.517 0.6115 # 유의수준 1
flight_num 0.014293 0.035138 0.407 0.6887
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.565 on 19 degrees of freedom
Multiple R-squared: 0.36, Adjusted R-squared: 0.259
F-statistic: 3.563 on 3 and 19 DF, p-value: 0.03371
y = a + b1x1 + b2x2 + b3x3
x1 기온 x2 압력 x3 비행횟수 ...