/* Lesson 26-1 */ /* File Name = les2601.sas 12/19/02 */ options linesize=72; /* pagesize=20; */ proc format; value specname 1='Sentosa ' 2='Versicolor' 3='Virginica '; value specchar 1='S' 2='O' 3='V'; run; data iris; title "Fisher's Iris Data"; infile 'IRISES.DAT'; do obs=1 to 50; do species=1 to 3; input sepallen sepalwid petallen petalwid @@; format species specname.; output; end; end; /* label sepallen='Sepal Length in mm.' sepalwid='Sepal Width in mm.' petallen='Petal Length in mm.' petalwid='Petal Width in mm.'; */ proc print data=iris(obs=10); run; title2 'Correlation of this data'; proc corr data=iris; var sepallen sepalwid petallen petalwid; run; proc sort data=iris; by species; run; proc corr data=iris; var sepallen sepalwid petallen petalwid; by species; run; title2 'Histogram of petalwid'; proc chart data=iris; hbar petalwid/subgroup=species midpoints=0.0 to 3.0 by 0.1; format species specchar.; run; title2 'Discriminant Analysis'; proc discrim data=iris; class species; var petalwid; run; proc discrim data=iris outstat=irisout method=normal pool=test wcov pcov distance anova manova listerr crosslisterr; class species; var sepallen sepalwid petallen petalwid; run; proc print data=irisout; run;
Fisher's Iris Data 1 13:04 Thursday, December 19, 2002 OBS OBS SPECIES SEPALLEN SEPALWID PETALLEN PETALWID 1 1 Sentosa 5.1 3.5 1.4 0.2 2 1 Versicolor 7.0 3.2 4.7 1.4 3 1 Virginica 6.3 3.3 6.0 2.5 4 2 Sentosa 4.9 3.0 1.4 0.2 5 2 Versicolor 6.4 3.2 4.5 1.5 6 2 Virginica 5.8 2.7 5.1 1.9 7 3 Sentosa 4.7 3.2 1.3 0.2 8 3 Versicolor 6.9 3.1 4.9 1.5 9 3 Virginica 7.1 3.0 5.9 2.1 10 4 Sentosa 4.6 3.1 1.5 0.2 Fisher's Iris Data 2 Correlation of this data 13:04 Thursday, December 19, 2002 Correlation Analysis 4 'VAR' Variables: SEPALLEN SEPALWID PETALLEN PETALWID Simple Statistics Variable N Mean Std Dev Sum Minimum Maximum SEPALLEN 150 5.8433 0.8281 876.5 4.3000 7.9000 SEPALWID 150 3.0573 0.4359 458.6 2.0000 4.4000 PETALLEN 150 3.7580 1.7653 563.7 1.0000 6.9000 PETALWID 150 1.1993 0.7622 179.9 0.1000 2.5000 Pearson Correlation Coefficients / Prob > |R| under Ho: Rho=0 / N = 150 SEPALLEN SEPALWID PETALLEN PETALWID SEPALLEN 1.00000 -0.11757 0.87175 0.81794 0.0 0.1519 0.0001 0.0001 SEPALWID -0.11757 1.00000 -0.42844 -0.36613 0.1519 0.0 0.0001 0.0001 PETALLEN 0.87175 -0.42844 1.00000 0.96287 0.0001 0.0001 0.0 0.0001 PETALWID 0.81794 -0.36613 0.96287 1.00000 0.0001 0.0001 0.0001 0.0 Fisher's Iris Data 3 Correlation of this data 13:04 Thursday, December 19, 2002 --------------------------- SPECIES=Sentosa ---------------------------- Correlation Analysis 4 'VAR' Variables: SEPALLEN SEPALWID PETALLEN PETALWID Simple Statistics Variable N Mean Std Dev Sum Minimum Maximum SEPALLEN 50 5.0060 0.3525 250.3 4.3000 5.8000 SEPALWID 50 3.4280 0.3791 171.4 2.3000 4.4000 PETALLEN 50 1.4620 0.1737 73.1000 1.0000 1.9000 PETALWID 50 0.2460 0.1054 12.3000 0.1000 0.6000 Pearson Correlation Coefficients / Prob > |R| under Ho: Rho=0 / N = 50 SEPALLEN SEPALWID PETALLEN PETALWID SEPALLEN 1.00000 0.74255 0.26718 0.27810 0.0 0.0001 0.0607 0.0505 SEPALWID 0.74255 1.00000 0.17770 0.23275 0.0001 0.0 0.2170 0.1038 PETALLEN 0.26718 0.17770 1.00000 0.33163 0.0607 0.2170 0.0 0.0186 PETALWID 0.27810 0.23275 0.33163 1.00000 0.0505 0.1038 0.0186 0.0 Fisher's Iris Data 4 Correlation of this data 13:04 Thursday, December 19, 2002 -------------------------- SPECIES=Versicolor -------------------------- Correlation Analysis 4 'VAR' Variables: SEPALLEN SEPALWID PETALLEN PETALWID Simple Statistics Variable N Mean Std Dev Sum Minimum Maximum SEPALLEN 50 5.9360 0.5162 296.8 4.9000 7.0000 SEPALWID 50 2.7700 0.3138 138.5 2.0000 3.4000 PETALLEN 50 4.2600 0.4699 213.0 3.0000 5.1000 PETALWID 50 1.3260 0.1978 66.3000 1.0000 1.8000 Pearson Correlation Coefficients / Prob > |R| under Ho: Rho=0 / N = 50 SEPALLEN SEPALWID PETALLEN PETALWID SEPALLEN 1.00000 0.52591 0.75405 0.54646 0.0 0.0001 0.0001 0.0001 SEPALWID 0.52591 1.00000 0.56052 0.66400 0.0001 0.0 0.0001 0.0001 PETALLEN 0.75405 0.56052 1.00000 0.78667 0.0001 0.0001 0.0 0.0001 PETALWID 0.54646 0.66400 0.78667 1.00000 0.0001 0.0001 0.0001 0.0 Fisher's Iris Data 5 Correlation of this data 13:04 Thursday, December 19, 2002 -------------------------- SPECIES=Virginica --------------------------- Correlation Analysis 4 'VAR' Variables: SEPALLEN SEPALWID PETALLEN PETALWID Simple Statistics Variable N Mean Std Dev Sum Minimum Maximum SEPALLEN 50 6.5880 0.6359 329.4 4.9000 7.9000 SEPALWID 50 2.9740 0.3225 148.7 2.2000 3.8000 PETALLEN 50 5.5520 0.5519 277.6 4.5000 6.9000 PETALWID 50 2.0260 0.2747 101.3 1.4000 2.5000 Pearson Correlation Coefficients / Prob > |R| under Ho: Rho=0 / N = 50 SEPALLEN SEPALWID PETALLEN PETALWID SEPALLEN 1.00000 0.45723 0.86422 0.28111 0.0 0.0008 0.0001 0.0480 SEPALWID 0.45723 1.00000 0.40104 0.53773 0.0008 0.0 0.0039 0.0001 PETALLEN 0.86422 0.40104 1.00000 0.32211 0.0001 0.0039 0.0 0.0225 PETALWID 0.28111 0.53773 0.32211 1.00000 0.0480 0.0001 0.0225 0.0 Fisher's Iris Data 6 Histogram of petalwid 13:04 Thursday, December 19, 2002 PETALWID Cum. Cum. Midpoint Freq Freq Percent Percent | 0.0 | 0 0 0.00 0.00 0.1 |SSSSS 5 5 3.33 3.33 0.2 |SSSSSSSSSSSSSSSSSSSSSSSSSSSSS 29 34 19.33 22.67 0.3 |SSSSSSS 7 41 4.67 27.33 0.4 |SSSSSSS 7 48 4.67 32.00 0.5 |S 1 49 0.67 32.67 0.6 |S 1 50 0.67 33.33 0.7 | 0 50 0.00 33.33 0.8 | 0 50 0.00 33.33 0.9 | 0 50 0.00 33.33 1.0 |OOOOOOO 7 57 4.67 38.00 1.1 |OOO 3 60 2.00 40.00 1.2 |OOOOO 5 65 3.33 43.33 1.3 |OOOOOOOOOOOOO 13 78 8.67 52.00 1.4 |OOOOOOOV 8 86 5.33 57.33 1.5 |OOOOOOOOOOVV 12 98 8.00 65.33 1.6 |OOOV 4 102 2.67 68.00 1.7 |OV 2 104 1.33 69.33 1.8 |OVVVVVVVVVVV 12 116 8.00 77.33 1.9 |VVVVV 5 121 3.33 80.67 2.0 |VVVVVV 6 127 4.00 84.67 2.1 |VVVVVV 6 133 4.00 88.67 2.2 |VVV 3 136 2.00 90.67 2.3 |VVVVVVVV 8 144 5.33 96.00 2.4 |VVV 3 147 2.00 98.00 2.5 |VVV 3 150 2.00 100.00 2.6 | 0 150 0.00 100.00 2.7 | 0 150 0.00 100.00 2.8 | 0 150 0.00 100.00 2.9 | 0 150 0.00 100.00 3.0 | 0 150 0.00 100.00 | -----+----+----+----+----+---- 5 10 15 20 25 Frequency 記号 SPECIES 記号 SPECIES 記号 SPECIES S S O O V V Fisher's Iris Data 7 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis 150 Observations 149 DF Total 1 Variables 147 DF Within Classes 3 Classes 2 DF Between Classes Class Level Information Prior SPECIES Frequency Weight Proportion Probability Sentosa 50 50.0000 0.333333 0.333333 Versicolor 50 50.0000 0.333333 0.333333 Virginica 50 50.0000 0.333333 0.333333 Fisher's Iris Data 8 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Pooled Covariance Matrix Information Covariance Natural Log of the Determinant Matrix Rank of the Covariance Matrix 1 -3.1729079 Fisher's Iris Data 9 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Pairwise Generalized Squared Distances Between Groups 2 _ _ -1 _ _ D (i|j) = (X - X )' COV (X - X ) i j i j Generalized Squared Distance to SPECIES From SPECIES Sentosa Versicolor Virginica Sentosa 0 27.84992 75.65130 Versicolor 27.84992 0 11.69964 Virginica 75.65130 11.69964 0 Fisher's Iris Data 10 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Linear Discriminant Function _ -1 _ -1 _ Constant = -.5 X' COV X Coefficient Vector = COV X j j j SPECIES Sentosa Versicolor Virginica CONSTANT -0.72246 -20.99102 -49.00330 PETALWID 5.87370 31.66066 48.37443 Fisher's Iris Data 11 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Classification Summary for Calibration Data: WORK.IRIS Resubstitution Summary using Linear Discriminant Function Generalized Squared Distance Function: 2 _ -1 _ D (X) = (X-X )' COV (X-X ) j j j Posterior Probability of Membership in each SPECIES: 2 2 Pr(j|X) = exp(-.5 D (X)) / SUM exp(-.5 D (X)) j k k Number of Observations and Percent Classified into SPECIES: From SPECIES Sentosa Versicolor Virginica Total Sentosa 50 0 0 50 100.00 0.00 0.00 100.00 Versicolor 0 48 2 50 0.00 96.00 4.00 100.00 Virginica 0 4 46 50 0.00 8.00 92.00 100.00 Total 50 52 48 150 Percent 33.33 34.67 32.00 100.00 Priors 0.3333 0.3333 0.3333 Error Count Estimates for SPECIES: Sentosa Versicolor Virginica Total Rate 0.0000 0.0400 0.0800 0.0400 Priors 0.3333 0.3333 0.3333 Fisher's Iris Data 12 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis 150 Observations 149 DF Total 4 Variables 147 DF Within Classes 3 Classes 2 DF Between Classes Class Level Information Prior SPECIES Frequency Weight Proportion Probability Sentosa 50 50.0000 0.333333 0.333333 Versicolor 50 50.0000 0.333333 0.333333 Virginica 50 50.0000 0.333333 0.333333 Fisher's Iris Data 13 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Within-Class Covariance Matrices SPECIES = Sentosa DF = 49 Variable SEPALLEN SEPALWID PETALLEN PETALWID SEPALLEN 0.1242489796 0.0992163265 0.0163551020 0.0103306122 SEPALWID 0.0992163265 0.1436897959 0.0116979592 0.0092979592 PETALLEN 0.0163551020 0.0116979592 0.0301591837 0.0060693878 PETALWID 0.0103306122 0.0092979592 0.0060693878 0.0111061224 ------------------------------------------------------------------------ SPECIES = Versicolor DF = 49 Variable SEPALLEN SEPALWID PETALLEN PETALWID SEPALLEN 0.2664326531 0.0851836735 0.1828979592 0.0557795918 SEPALWID 0.0851836735 0.0984693878 0.0826530612 0.0412040816 PETALLEN 0.1828979592 0.0826530612 0.2208163265 0.0731020408 PETALWID 0.0557795918 0.0412040816 0.0731020408 0.0391061224 ------------------------------------------------------------------------ SPECIES = Virginica DF = 49 Variable SEPALLEN SEPALWID PETALLEN PETALWID SEPALLEN 0.4043428571 0.0937632653 0.3032897959 0.0490938776 SEPALWID 0.0937632653 0.1040040816 0.0713795918 0.0476285714 PETALLEN 0.3032897959 0.0713795918 0.3045877551 0.0488244898 PETALWID 0.0490938776 0.0476285714 0.0488244898 0.0754326531 Fisher's Iris Data 14 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Pooled Within-Class Covariance Matrix DF = 147 Variable SEPALLEN SEPALWID PETALLEN PETALWID SEPALLEN 0.2650081633 0.0927210884 0.1675142857 0.0384013605 SEPALWID 0.0927210884 0.1153877551 0.0552435374 0.0327102041 PETALLEN 0.1675142857 0.0552435374 0.1851877551 0.0426653061 PETALWID 0.0384013605 0.0327102041 0.0426653061 0.0418816327 Fisher's Iris Data 15 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Within Covariance Matrix Information Covariance Natural Log of the Determinant SPECIES Matrix Rank of the Covariance Matrix Sentosa 4 -13.06736 Versicolor 4 -10.87433 Virginica 4 -8.92706 Pooled 4 -9.95854 Fisher's Iris Data 16 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Test of Homogeneity of Within Covariance Matrices Notation: K = Number of Groups P = Number of Variables N = Total Number of Observations - Number of Groups N(i) = Number of Observations in the i'th Group - 1 __ N(i)/2 || |Within SS Matrix(i)| V = ----------------------------------- N/2 |Pooled SS Matrix| _ _ 2 | 1 1 | 2P + 3P - 1 RHO = 1.0 - | SUM ----- - --- | ------------- |_ N(i) N _| 6(P+1)(K-1) DF = .5(K-1)P(P+1) _ _ | PN/2 | | N V | Under null hypothesis: -2 RHO ln | ------------------ | | __ PN(i)/2 | |_ || N(i) _| is distributed approximately as chi-square(DF) Test Chi-Square Value = 140.943050 with 20 DF Prob > Chi-Sq = 0.0001 Since the chi-square value is significant at the 0.1 level, the within covariance matrices will be used in the discriminant function. Reference: Morrison, D.F. (1976) Multivariate Statistical Methods p252. Fisher's Iris Data 17 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Pairwise Squared Distances Between Groups 2 _ _ -1 _ _ D (i|j) = (X - X )' COV (X - X ) i j j i j Squared Distance to SPECIES From SPECIES Sentosa Versicolor Virginica Sentosa 0 103.19382 168.76759 Versicolor 323.06203 0 13.83875 Virginica 706.08494 17.86670 0 Fisher's Iris Data 18 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Pairwise Generalized Squared Distances Between Groups 2 _ _ -1 _ _ D (i|j) = (X - X )' COV (X - X ) + ln |COV | i j j i j j Generalized Squared Distance to SPECIES From SPECIES Sentosa Versicolor Virginica Sentosa -13.06736 92.31949 159.84053 Versicolor 309.99467 -10.87433 4.91170 Virginica 693.01757 6.99238 -8.92706 Fisher's Iris Data 19 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Univariate Test Statistics F Statistics, Num DF= 2 Den DF= 147 Total Pooled Between RSQ/ Variable STD STD STD R-Squared (1-RSQ) SEPALLEN 0.8281 0.5148 0.7951 0.618706 1.6226 SEPALWID 0.4359 0.3397 0.3368 0.400783 0.6688 PETALLEN 1.7653 0.4303 2.0907 0.941372 16.0566 PETALWID 0.7622 0.2047 0.8967 0.928883 13.0613 Univariate Test Statistics Variable F Pr > F SEPALLEN 119.2645 0.0001 SEPALWID 49.1600 0.0001 PETALLEN 1180.1612 0.0001 PETALWID 960.0071 0.0001 Average R-Squared: Unweighted = 0.7224358 Weighted by Variance = 0.8689444 Multivariate Statistics and F Approximations S=2 M=0.5 N=71 Statistic Value F Num DF Den DF Pr > F Wilks' Lambda 0.023438631 199.145 8 288 0.0001 Pillai's Trace 1.191898825 53.4665 8 290 0.0001 Hotelling-Lawley Trace 32.47732024 580.532 8 286 0.0001 Roy's Greatest Root 32.1919292 1166.96 4 145 0.0001 NOTE: ROY'S GREATEST ROOT のF統計量は上側限界です. NOTE: WILKS のラムダのF統計量は正確です. Fisher's Iris Data 20 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Classification Results for Calibration Data: WORK.IRIS Resubstitution Results using Quadratic Discriminant Function Generalized Squared Distance Function: 2 _ -1 _ D (X) = (X-X )' COV (X-X ) + ln |COV | j j j j j Posterior Probability of Membership in each SPECIES: 2 2 Pr(j|X) = exp(-.5 D (X)) / SUM exp(-.5 D (X)) j k k Posterior Probability of Membership in SPECIES: Obs From Classified SPECIES into SPECIES Sentosa Versicolor Virginica 71 Versicolor Virginica * 0.0000 0.3359 0.6641 84 Versicolor Virginica * 0.0000 0.1543 0.8457 134 Virginica Versicolor * 0.0000 0.6050 0.3950 * Misclassified observation Fisher's Iris Data 21 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Classification Summary for Calibration Data: WORK.IRIS Resubstitution Summary using Quadratic Discriminant Function Generalized Squared Distance Function: 2 _ -1 _ D (X) = (X-X )' COV (X-X ) + ln |COV | j j j j j Posterior Probability of Membership in each SPECIES: 2 2 Pr(j|X) = exp(-.5 D (X)) / SUM exp(-.5 D (X)) j k k Number of Observations and Percent Classified into SPECIES: From SPECIES Sentosa Versicolor Virginica Total Sentosa 50 0 0 50 100.00 0.00 0.00 100.00 Versicolor 0 48 2 50 0.00 96.00 4.00 100.00 Virginica 0 1 49 50 0.00 2.00 98.00 100.00 Total 50 49 51 150 Percent 33.33 32.67 34.00 100.00 Priors 0.3333 0.3333 0.3333 Error Count Estimates for SPECIES: Sentosa Versicolor Virginica Total Rate 0.0000 0.0400 0.0200 0.0200 Priors 0.3333 0.3333 0.3333 Fisher's Iris Data 22 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Classification Results for Calibration Data: WORK.IRIS Cross-validation Results using Quadratic Discriminant Function Generalized Squared Distance Function: 2 _ -1 _ D (X) = (X-X )' COV (X-X ) + ln |COV | j (X)j (X)j (X)j (X)j Posterior Probability of Membership in each SPECIES: 2 2 Pr(j|X) = exp(-.5 D (X)) / SUM exp(-.5 D (X)) j k k Posterior Probability of Membership in SPECIES: Obs From Classified SPECIES into SPECIES Sentosa Versicolor Virginica 69 Versicolor Virginica * 0.0000 0.3134 0.6866 71 Versicolor Virginica * 0.0000 0.1616 0.8384 84 Versicolor Virginica * 0.0000 0.0713 0.9287 134 Virginica Versicolor * 0.0000 0.6632 0.3368 * Misclassified observation Fisher's Iris Data 23 Discriminant Analysis 13:04 Thursday, December 19, 2002 Discriminant Analysis Classification Summary for Calibration Data: WORK.IRIS Cross-validation Summary using Quadratic Discriminant Function Generalized Squared Distance Function: 2 _ -1 _ D (X) = (X-X )' COV (X-X ) + ln |COV | j (X)j (X)j (X)j (X)j Posterior Probability of Membership in each SPECIES: 2 2 Pr(j|X) = exp(-.5 D (X)) / SUM exp(-.5 D (X)) j k k Number of Observations and Percent Classified into SPECIES: From SPECIES Sentosa Versicolor Virginica Total Sentosa 50 0 0 50 100.00 0.00 0.00 100.00 Versicolor 0 47 3 50 0.00 94.00 6.00 100.00 Virginica 0 1 49 50 0.00 2.00 98.00 100.00 Total 50 48 52 150 Percent 33.33 32.00 34.67 100.00 Priors 0.3333 0.3333 0.3333 Error Count Estimates for SPECIES: Sentosa Versicolor Virginica Total Rate 0.0000 0.0600 0.0200 0.0267 Priors 0.3333 0.3333 0.3333