/* Lesson 11-1 */ /* File Name = les1101.sas 06/26/03 */ data air; infile 'usair2.prn'; input id $ y x1 x2 x3 x4 x5 x6; /* label id='Cities (都市名)' y='SO2 of air in micrograms per cubic metre (SO2 濃度)' x1='Average annual temperature in F (気温)' x2='Number of manufacturing enterprises employing 20 or more workers (製造業数)' x3='Population size (1970 census); in thousands (人口)' x4='Average annual wind speed in miles per hour (風速)' x5='Average annual precipitation in inches (降雨量)' x6='Average number of days with precipitation per year (降雨日数)' ; */ proc print data=air(obs=10); run; proc corr data=air; run; proc reg data=air; : model y=x1 x2 x3 x4 x5 x6; : フルモデル output out=outreg1 predicted=pred1 residual=resid1; : run; : proc print data=outreg1(obs=15); run; proc plot data=outreg1; : 残差解析用 plot resid1*pred1 /vref=0; : plot resid1*x1 /vref=0; : ズラズラと列記 plot resid1*x2 /vref=0; : plot resid1*x3 /vref=0; : plot resid1*x4 /vref=0; : plot resid1*x5 /vref=0; : plot resid1*x6 /vref=0; : plot resid1*y /vref=0; : run; : proc univariate data=outreg1 plot normal; : 残差解析 var resid1; : run; : proc reg data=air; : model y=x1--x6 / selection=stepwise; : 逐次増減法 output out=outreg2 predicted=pred2 residual=resid2; : 連続した変数の指定方法(簡略形) run; : proc print data=outreg2(obs=15); run; proc plot data=outreg2; : 残差解析用 plot resid2*pred2 /vref=0; : /* : plot resid2*(x1 x2 x3 x4 x5 x6) /vref=0; : 簡略形(上と比較せよ) */ : plot resid2*(x1--x6) /vref=0; : 簡略形(これも同じ意味) plot resid2*y /vref=0; : run; : proc univariate data=outreg2 plot normal; : 残差解析 var resid2; : run; :
SAS システム 2 23:52 Tuesday, June 24, 2003 Correlation Analysis 7 'VAR' Variables: Y X1 X2 X3 X4 X5 X6 Simple Statistics Variable N Mean Std Dev Sum Minimum Maximum Y 41 30.0488 23.4723 1232 8.0000 110.0000 X1 41 55.7634 7.2277 2286 43.5000 75.5000 X2 41 463.0976 563.4739 18987 35.0000 3344 X3 41 608.6098 579.1130 24953 71.0000 3369 X4 41 9.4439 1.4286 387.2000 6.0000 12.7000 X5 41 36.7690 11.7715 1508 7.0500 59.8000 X6 41 113.9024 26.5064 4670 36.0000 166.0000 SAS システム 3 23:52 Tuesday, June 24, 2003 Correlation Analysis Pearson Correlation Coefficients / Prob > |R| under Ho: Rho=0 / N = 41 Y X1 X2 X3 X4 X5 X6 Y 1.00000 -0.43360 0.64477 0.49378 0.09469 0.05429 0.36956 0.0 0.0046 0.0001 0.0010 0.5559 0.7360 0.0174 X1 -0.43360 1.00000 -0.19004 -0.06268 -0.34974 0.38625 -0.43024 0.0046 0.0 0.2340 0.6970 0.0250 0.0126 0.0050 X2 0.64477 -0.19004 1.00000 0.95527 0.23795 -0.03242 0.13183 0.0001 0.2340 0.0 0.0001 0.1341 0.8405 0.4113 X3 0.49378 -0.06268 0.95527 1.00000 0.21264 -0.02612 0.04208 0.0010 0.6970 0.0001 0.0 0.1819 0.8712 0.7939 X4 0.09469 -0.34974 0.23795 0.21264 1.00000 -0.01299 0.16411 0.5559 0.0250 0.1341 0.1819 0.0 0.9357 0.3052 X5 0.05429 0.38625 -0.03242 -0.02612 -0.01299 1.00000 0.49610 0.7360 0.0126 0.8405 0.8712 0.9357 0.0 0.0010 X6 0.36956 -0.43024 0.13183 0.04208 0.16411 0.49610 1.00000 0.0174 0.0050 0.4113 0.7939 0.3052 0.0010 0.0 SAS システム 5 23:52 Tuesday, June 24, 2003 Model: MODEL1 Dependent Variable: Y Analysis of Variance Sum of Mean Source DF Squares Square F Value Prob>F Model 6 14754.63603 2459.10601 11.480 0.0001 Error 34 7283.26641 214.21372 C Total 40 22037.90244 Root MSE 14.63604 R-square 0.6695 Dep Mean 30.04878 Adj R-sq 0.6112 C.V. 48.70761 SAS システム 6 23:52 Tuesday, June 24, 2003 Parameter Estimates Parameter Standard T for H0: Variable DF Estimate Error Parameter=0 Prob > |T| INTERCEP 1 111.728481 47.31810073 2.361 0.0241 X1 1 -1.267941 0.62117952 -2.041 0.0491 X2 1 0.064918 0.01574825 4.122 0.0002 X3 1 -0.039277 0.01513274 -2.595 0.0138 X4 1 -3.181366 1.81501910 -1.753 0.0887 X5 1 0.512359 0.36275507 1.412 0.1669 X6 1 -0.052050 0.16201386 -0.321 0.7500 SAS システム 7 23:52 Tuesday, June 24, 2003 OBS ID Y X1 X2 X3 X4 X5 X6 PRED1 RESID1 1 Phoenix 10 70.3 213 582 6.0 7.05 36 -3.789 13.7891 2 Little_R 13 61.0 91 132 8.2 48.52 100 28.675 -15.6745 3 San_Fran 12 56.7 453 716 8.7 20.66 67 20.542 -8.5421 4 Denver 17 51.9 454 515 9.0 12.95 86 28.694 -11.6941 5 Hartford 56 49.1 412 158 9.0 43.37 127 56.991 -0.9915 6 Wilmingt 36 54.0 80 80 9.0 40.25 114 31.367 4.6326 SAS システム 15 23:52 Tuesday, June 24, 2003 プロット : RESID1*Y. 凡例: A = 1 OBS, B = 2 OBS, ... | R 50 + A e | s | A i 25 + d | A A AA u | AA AA A A A A a 0 +------AB------AAABA-A---------A--------------------------A------- l | CAA C A | ABA A -25 + A ---+---------+---------+---------+---------+---------+---------+-- 0 20 40 60 80 100 120 Y SAS システム 19 23:52 Tuesday, June 24, 2003 Univariate Procedure Variable=RESID1 Residual Stem Leaf # Boxplot 4 9 1 * 3 0 1 0 2 1 4457 4 | 0 23455567779 11 +--+--+ -0 97665433211100 14 *-----* -1 986652211 9 | -2 3 1 | ----+----+----+----+ Multiply Stem.Leaf by 10**+1 SAS システム 20 23:52 Tuesday, June 24, 2003 Univariate Procedure Variable=RESID1 Residual Normal Probability Plot 45+ * | * +++ | ++++++++ | +++**+** | ++********* | ********** | * **+****** -25+ *+++++++ +----+----+----+----+----+----+----+----+----+----+ -2 -1 0 +1 +2 SAS システム 21 23:52 Tuesday, June 24, 2003 Stepwise Procedure for Dependent Variable Y Step 1 Variable X2 Entered R-square = 0.41572671 C(p) = 23.10893175 DF Sum of Squares Mean Square F Prob>F Regression 1 9161.74469120 9161.74469120 27.75 0.0001 Error 39 12876.15774782 330.15789097 Total 40 22037.90243902 Parameter Standard Type II Variable Estimate Error Sum of Squares F Prob>F INTERCEP 17.61057438 3.69158676 7513.50474182 22.76 0.0001 X2 0.02685872 0.00509867 9161.74469120 27.75 0.0001 Bounds on condition number: 1, 1 SAS システム 22 23:52 Tuesday, June 24, 2003 ------------------------------------------------------------------------------- Step 2 Variable X3 Entered R-square = 0.58632019 C(p) = 7.55859687 DF Sum of Squares Mean Square F Prob>F Regression 2 12921.26717485 6460.63358743 26.93 0.0001 Error 38 9116.63526417 239.91145432 Total 40 22037.90243902 Parameter Standard Type II Variable Estimate Error Sum of Squares F Prob>F INTERCEP 26.32508332 3.84043919 11272.71964000 46.99 0.0001 X2 0.08243410 0.01469656 7548.02378137 31.46 0.0001 X3 -0.05660660 0.01429968 3759.52248365 15.67 0.0003 SAS システム 23 23:52 Tuesday, June 24, 2003 Bounds on condition number: 11.43374, 45.73494 ------------------------------------------------------------------------------- Step 3 Variable X6 Entered R-square = 0.61740155 C(p) = 6.36100514 DF Sum of Squares Mean Square F Prob>F Regression 3 13606.23518823 4535.41172941 19.90 0.0001 Error 37 8431.66725079 227.88289867 Total 40 22037.90243902 Parameter Standard Type II Variable Estimate Error Sum of Squares F Prob>F INTERCEP 6.96584888 11.77690656 79.72552238 0.35 0.5578 X2 0.07433399 0.01506613 5547.32153619 24.34 0.0001 X3 -0.04939437 0.01454421 2628.36952166 11.53 0.0016 X6 0.16435940 0.09480151 684.96801338 3.01 0.0913 Bounds on condition number: 12.65025, 78.63322 ------------------------------------------------------------------------------- All variables left in the model are significant at the 0.1500 level. No other variable met the 0.1500 significance level for entry into the model. Summary of Stepwise Procedure for Dependent Variable Y Variable Number Partial Model Step Entered Removed In R**2 R**2 C(p) F Prob>F 1 X2 1 0.4157 0.4157 23.1089 27.7496 0.0001 2 X3 2 0.1706 0.5863 7.5586 15.6705 0.0003 3 X6 3 0.0311 0.6174 6.3610 3.0058 0.0913 SAS システム 33 23:52 Tuesday, June 24, 2003 プロット : RESID2*Y. 凡例: A = 1 OBS, B = 2 OBS, ... 50 + A R | e | A s | AA i | A ABA A A A d 0 +--------BA-A--ABA-A-A---------A--------------------------A------- u | AC C B A A a | B A A A l | A | -50 + ---+---------+---------+---------+---------+---------+---------+-- 0 20 40 60 80 100 120 Y SAS システム 37 23:52 Tuesday, June 24, 2003 Univariate Procedure Variable=RESID2 Residual Stem Leaf # Boxplot 5 0 1 0 4 3 0 1 | 2 0 1 | 1 001349 6 | 0 011234455589 12 +--+--+ -0 8877755554 10 +-----+ -1 887764321 9 | -2 9 1 | ----+----+----+----+ Multiply Stem.Leaf by 10**+1 SAS システム 38 23:52 Tuesday, June 24, 2003 Univariate Procedure Variable=RESID2 Residual Normal Probability Plot 55+ | * | +++++ | +*++*++ 15+ +*****+* | ******** | ******* | * **+****** -25+ * +++++++ +----+----+----+----+----+----+----+----+----+----+ -2 -1 0 +1 +2
/* Lesson 11-2 */ /* File Name = les1102.sas 06/26/03 */ data air; infile 'usair2.prn'; input id $ y x1 x2 x3 x4 x5 x6; proc print data=air(obs=10); run; proc corr data=air; run; proc reg data=air; : model y=x1--x6 / selection=rsquare; : 総当り法 run; :
SAS システム 5 23:39 Tuesday, June 24, 2003 N = 41 Regression Models for Dependent Variable: Y Number in R-square Variables in Model Model 1 0.41572671 X2 1 0.24381828 X3 1 0.18800913 X1 1 0.13657727 X6 1 0.00896628 X4 1 0.00294788 X5 -------------------------- 2 0.58632019 X2 X3 2 0.51611499 X1 X2 2 0.49813569 X2 X6 2 0.42138706 X2 X5 2 0.41938296 X2 X4 2 0.40658556 X1 X3 (中略) 2 0.01204980 X4 X5 ----------------------------- 3 0.61740155 X2 X3 X6 3 0.61254683 X1 X2 X3 3 0.59304760 X2 X3 X5 3 0.59298732 X2 X3 X4 3 0.56222293 X1 X2 X5 3 0.54523587 X1 X2 X6 (中略) 3 0.15899893 X4 X5 X6 -------------------------------- 4 0.63964257 X1 X2 X3 X5 4 0.63287070 X1 X2 X3 X4 4 0.62909408 X1 X2 X3 X6 4 0.62847667 X2 X3 X4 X6 4 0.61759495 X2 X3 X5 X6 4 0.60282531 X1 X2 X4 X5 (中略) 4 0.25499437 X1 X4 X5 X6 ----------------------------------- 5 0.66850854 X1 X2 X3 X4 X5 5 0.65012088 X1 X2 X3 X4 X6 5 0.63964824 X1 X2 X3 X5 X6 5 0.62901313 X2 X3 X4 X5 X6 5 0.60403117 X1 X2 X4 X5 X6 5 0.50433666 X1 X3 X4 X5 X6 -------------------------------------- 6 0.66951181 X1 X2 X3 X4 X5 X6 -----------------------------------------
data air; infile 'usair2fix.prn'; input id $ 1-20 y x1 x2 x3 x4 x5 x6;