前回までに分布特性を把握するためのいくつかの指標を説明し、 その使い方や注意点を喚起した。 データの特性を考慮して、グループ毎の集計を行なうと、 今までは判らなかったデータの特徴を把握することができる。 また、外れ値を除外して解析する方法についても紹介する。
iv. 統計関連
/* Lesson 8-01 */
/* File Name = les0801.sas 06/09/05 */
data gakusei;
infile 'all05a.prn'
firstobs=2;
input sex $ shintyou taijyuu kyoui
jitaku $ kodukai carryer $ tsuuwa;
proc print data=gakusei(obs=5);
run;
proc means data=gakusei;
run;
proc univariate data=gakusei plot;
var shintyou taijyuu kyoui kodukai;
run;
proc chart data=gakusei; : ヒストグラム
hbar shintyou taijyuu kyoui kodukai; : 指定した変量の水平棒グラフを表示
run; :
:
proc sort data=gakusei; : 並べ替え(ソート)
by sex; : 性別ごとに
run; :
:
proc means data=gakusei; : 平均の計算
by sex; : 性別ごとに
run; :
proc univariate data=gakusei plot; : 基礎統計量の計算
var shintyou taijyuu kyoui kodukai; : 指定した変量について計算
by sex; : 性別ごとに
run; :
proc chart data=gakusei; : ヒストグラム
hbar shintyou taijyuu kyoui kodukai; : 指定した変量の水平棒グラフを表示
by sex; : 性別ごとに
run; :
:
proc chart data=gakusei; : ヒストグラム
hbar shintyou taijyuu kyoui kodukai/group=sex; : 性別ごとに併置して
run; :
SAS システム 2
00:51 Thursday, June 9, 2005
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 313 167.6891374 8.2031934 145.0000000 186.0000000
TAIJYUU 282 58.5638298 9.3640889 35.0000000 100.0000000
KYOUI 104 86.4903846 7.6424913 56.0000000 112.0000000
KODUKAI 302 48557.95 48920.34 0 300000.00
TSUUWA 104 7101.42 4608.50 200.0000000 30000.00
---------------------------------------------------------------------
SAS システム 7
00:51 Thursday, June 9, 2005
Univariate Procedure
Variable=SHINTYOU
Histogram # Boxplot
187.5+* 2 |
.********** 19 |
.*********************** 45 |
.****************************************** 83 +-----+
167.5+******************************* 62 *--+--*
.************************* 49 +-----+
.**************** 32 |
.******** 15 |
147.5+*** 6 |
----+----+----+----+----+----+----+----+--
* may represent up to 2 counts
SAS システム 8
00:51 Thursday, June 9, 2005
Univariate Procedure
Variable=SHINTYOU
Normal Probability Plot
187.5+ +++*
| ******+***
| ********
| ********
167.5+ ******+
| *****+
| ******
| +******
147.5+**+**
+----+----+----+----+----+----+----+----+----+----+
-2 -1 0 +1 +2
SAS システム 21
00:51 Thursday, June 9, 2005
Univariate Procedure
Variable=KODUKAI
Moments
N 302 Sum Wgts 302
Mean 48557.95 Sum 14664500
Std Dev 48920.34 Variance 2.3932E9
Skewness 1.739247 Kurtosis 4.291792
USS 1.432E12 CSS 7.204E11
CV 100.7463 Std Mean 2815.05
T:Mean=0 17.24941 Pr>|T| 0.0001
Num ^= 0 251 Num > 0 251
M(Sign) 125.5 Pr>=|M| 0.0001
Sgn Rank 15813 Pr>=|S| 0.0001
SAS システム 22
00:51 Thursday, June 9, 2005
Univariate Procedure
Variable=KODUKAI
Quantiles(Def=5)
100% Max 300000 99% 200000
75% Q3 65000 95% 150000
50% Med 30000 90% 120000
25% Q1 20000 10% 0
0% Min 0 5% 0
1% 0
Range 300000
Q3-Q1 45000
Mode 0
SAS システム 25
00:51 Thursday, June 9, 2005
Univariate Procedure
Variable=KODUKAI
Histogram # Boxplot
325000+* 2 *
.
.* 2 0
175000+***** 18 0
.******** 32 |
.***************** 66 +-----+
25000+********************************************** 182 *--+--*
----+----+----+----+----+----+----+----+----+-
* may represent up to 4 counts
SAS システム 26
00:51 Thursday, June 9, 2005
Univariate Procedure
Variable=KODUKAI
Normal Probability Plot
325000+ *
|
| **
175000+ ********+++
| ******++++++
| ++*******+
25000+****************************
+----+----+----+----+----+----+----+----+----+----+
-2 -1 0 +1 +2
SAS システム 31
00:51 Thursday, June 9, 2005
KODUKAI Cum. Cum.
Midpoint Freq Freq Percent Percent
|
0 |************** 70 70 23.18 23.18
30000 |********************** 109 179 36.09 59.27
60000 |*********** 56 235 18.54 77.81
90000 |******* 33 268 10.93 88.74
120000 |** 12 280 3.97 92.72
150000 |*** 16 296 5.30 98.01
180000 | 2 298 0.66 98.68
210000 | 2 300 0.66 99.34
240000 | 0 300 0.00 99.34
270000 | 0 300 0.00 99.34
300000 | 2 302 0.66 100.00
|
----+---+---+---+---+--
20 40 60 80 100
SAS システム 33
00:51 Thursday, June 9, 2005
--------------------------------- SEX=F --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 105 159.0780952 5.4871525 145.0000000 171.0000000
TAIJYUU 74 48.6108108 4.7643530 35.0000000 59.0000000
KYOUI 39 83.1025641 4.0314218 70.0000000 90.0000000
KODUKAI 102 49044.12 46049.91 0 300000.00
TSUUWA 47 6908.51 4589.91 200.0000000 25000.00
---------------------------------------------------------------------
SAS システム 34
00:51 Thursday, June 9, 2005
--------------------------------- SEX=M --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 207 172.0555556 5.4556780 156.0000000 186.0000000
TAIJYUU 207 62.1149758 7.9560942 46.0000000 100.0000000
KYOUI 65 88.5230769 8.5533394 56.0000000 112.0000000
KODUKAI 198 48176.77 50521.18 0 300000.00
TSUUWA 56 7211.57 4685.79 500.0000000 30000.00
---------------------------------------------------------------------
SAS システム 53
00:51 Thursday, June 9, 2005
-------------------------------- SEX=F ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Moments
N 105 Sum Wgts 105
Mean 159.0781 Sum 16703.2
Std Dev 5.487152 Variance 30.10884
Skewness -0.20963 Kurtosis -0.36868
USS 2660245 CSS 3131.32
CV 3.449345 Std Mean 0.535491
T:Mean=0 297.0695 Pr>|T| 0.0001
Num ^= 0 105 Num > 0 105
M(Sign) 52.5 Pr>=|M| 0.0001
Sgn Rank 2782.5 Pr>=|S| 0.0001
SAS システム 55
00:51 Thursday, June 9, 2005
-------------------------------- SEX=F ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Quantiles(Def=5)
100% Max 171 99% 170
75% Q3 163 95% 167
50% Med 160 90% 166
25% Q1 156 10% 152
0% Min 145 5% 149
1% 146.7
Range 26
Q3-Q1 7
Mode 156
SAS システム 58
00:51 Thursday, June 9, 2005
-------------------------------- SEX=F ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Stem Leaf # Boxplot
17 001 3 |
16 5555666666677778 16 |
16 000000000000001112222222222333344444 36 +-----+
15 55556666666666666777788889999 29 +--+--+
15 012222333333444 15 |
14 578899 6 0
----+----+----+----+----+----+----+-
Multiply Stem.Leaf by 10**+1
SAS システム 59
00:51 Thursday, June 9, 2005
-------------------------------- SEX=F ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Normal Probability Plot
172.5+ ++*++*
| *****+*+**+*
| **********+
| *********+
| +*******+
147.5+*++*+*++**
+----+----+----+----+----+----+----+----+----+----+
-2 -1 0 +1 +2
SAS システム 81
00:51 Thursday, June 9, 2005
-------------------------------- SEX=M ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Moments
N 207 Sum Wgts 207
Mean 172.0556 Sum 35615.5
Std Dev 5.455678 Variance 29.76442
Skewness -0.11122 Kurtosis 0.087639
USS 6133976 CSS 6131.471
CV 3.170882 Std Mean 0.379196
T:Mean=0 453.738 Pr>|T| 0.0001
Num ^= 0 207 Num > 0 207
M(Sign) 103.5 Pr>=|M| 0.0001
Sgn Rank 10764 Pr>=|S| 0.0001
SAS システム 83
00:51 Thursday, June 9, 2005
-------------------------------- SEX=M ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Quantiles(Def=5)
100% Max 186 99% 184
75% Q3 175 95% 181
50% Med 172 90% 180
25% Q1 168.5 10% 165
0% Min 156 5% 163
1% 160
Range 30
Q3-Q1 6.5
Mode 170
SAS システム 86
00:51 Thursday, June 9, 2005
-------------------------------- SEX=M ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Histogram # Boxplot
187.5+* 2 0
.********** 19 |
.*********************** 45 +-----+
172.5+***************************************** 81 *--+--*
.********************** 44 +-----+
.******* 14 |
157.5+* 2 0
----+----+----+----+----+----+----+----+-
* may represent up to 2 counts
SAS システム 87
00:51 Thursday, June 9, 2005
-------------------------------- SEX=M ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Normal Probability Plot
187.5+ **
| *******+**+
| *********+
172.5+ ***********
| *********++
| ********++
157.5+**+
+----+----+----+----+----+----+----+----+----+----+
-2 -1 0 +1 +2
SAS システム 109
00:51 Thursday, June 9, 2005
Univariate Procedure
Schematic Plots
Variable=SHINTYOU
200 +
|
| 0
180 + |
| | *--+--*
| *--+--* | +-----+
160 + *--+--* 0
| +-----+ 0
| 0
140 +
------------+-----------+-----------+-----------
SEX F M
SAS システム 110
00:51 Thursday, June 9, 2005
Univariate Procedure
Schematic Plots
Variable=TAIJYUU
|
100 + *
| 0
| *--+--* | *--+--*
50 + *--+--* +-----+
| 0
|
0 +
------------+-----------+-----------+-----------
SEX F M
SAS システム 112
00:51 Thursday, June 9, 2005
Univariate Procedure
Schematic Plots
Variable=KODUKAI
300000 + * *
|
|
200000 + * 0
| 0 |
| 0 |
100000 + +-----+ | |
| *--+--* +-----+ +-----+
| +-----+ *--+--* *--+--*
0 + | +-----+
------------+-----------+-----------+-----------
SEX F M
SAS システム 116
00:51 Thursday, June 9, 2005
-------------------------------- SEX=F ---------------------------------
SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
146 |** 2 2 1.90 1.90
150 |******* 7 9 6.67 8.57
154 |**************** 16 25 15.24 23.81
158 |************************** 26 51 24.76 48.57
162 |****************************** 30 81 28.57 77.14
166 |******************** 20 101 19.05 96.19
170 |**** 4 105 3.81 100.00
|
-----+----+----+----+----+----+
5 10 15 20 25 30
Frequency
Frequency
SAS システム 120
00:51 Thursday, June 9, 2005
-------------------------------- SEX=M ---------------------------------
SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
156 |* 2 2 0.97 0.97
159 |*** 5 7 2.42 3.38
162 |**** 7 14 3.38 6.76
165 |****** 11 25 5.31 12.08
168 |****************** 35 60 16.91 28.99
171 |*************************** 54 114 26.09 55.07
174 |********************** 43 157 20.77 75.85
177 |*********** 22 179 10.63 86.47
180 |********** 19 198 9.18 95.65
183 |**** 7 205 3.38 99.03
186 |* 2 207 0.97 100.00
|
-----+----+----+----+----+--
10 20 30 40 50
Frequency
SAS システム 127
00:51 Thursday, June 9, 2005
SEX SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
146 | 0 0 0.00 0.00
150 | 0 0 0.00 0.00
154 | 0 0 0.00 0.00
158 | 0 0 0.00 0.00
162 | 0 0 0.00 0.00
166 | 0 0 0.00 0.00
170 | 1 1 0.32 0.32
174 | 0 1 0.00 0.32
178 | 0 1 0.00 0.32
182 | 0 1 0.00 0.32
186 | 0 1 0.00 0.32
|
F 146 | 2 3 0.64 0.96
150 |* 7 10 2.24 3.19
154 |*** 16 26 5.11 8.31
158 |***** 26 52 8.31 16.61
162 |****** 30 82 9.58 26.20
166 |**** 20 102 6.39 32.59
170 |* 4 106 1.28 33.87
174 | 0 106 0.00 33.87
178 | 0 106 0.00 33.87
182 | 0 106 0.00 33.87
186 | 0 106 0.00 33.87
|
M 146 | 0 106 0.00 33.87
150 | 0 106 0.00 33.87
154 | 0 106 0.00 33.87
158 | 2 108 0.64 34.50
162 |** 12 120 3.83 38.34
166 |***** 23 143 7.35 45.69
170 |************ 61 204 19.49 65.18
174 |************ 59 263 18.85 84.03
178 |****** 29 292 9.27 93.29
182 |**** 18 310 5.75 99.04
186 |* 3 313 0.96 100.00
|
----+---+---+
20 40 60
Frequency
Frequency
SAS システム 135
00:51 Thursday, June 9, 2005
SEX KODUKAI Cum. Cum.
Midpoint Freq Freq Percent Percent
|
0 | 0 0 0.00 0.00
30000 | 1 1 0.33 0.33
60000 | 0 1 0.00 0.33
90000 | 1 2 0.33 0.66
120000 | 0 2 0.00 0.66
150000 | 0 2 0.00 0.66
180000 | 0 2 0.00 0.66
210000 | 0 2 0.00 0.66
240000 | 0 2 0.00 0.66
270000 | 0 2 0.00 0.66
300000 | 0 2 0.00 0.66
|
F 0 |*** 16 18 5.30 5.96
30000 |******** 40 58 13.25 19.21
60000 |****** 30 88 9.93 29.14
90000 |* 7 95 2.32 31.46
120000 |* 4 99 1.32 32.78
150000 | 2 101 0.66 33.44
180000 | 1 102 0.33 33.77
210000 | 1 103 0.33 34.11
240000 | 0 103 0.00 34.11
270000 | 0 103 0.00 34.11
300000 | 1 104 0.33 34.44
|
M 0 |*********** 54 158 17.88 52.32
30000 |************** 68 226 22.52 74.83
60000 |***** 26 252 8.61 83.44
90000 |***** 25 277 8.28 91.72
120000 |** 8 285 2.65 94.37
150000 |*** 14 299 4.64 99.01
180000 | 1 300 0.33 99.34
210000 | 1 301 0.33 99.67
240000 | 0 301 0.00 99.67
270000 | 0 301 0.00 99.67
300000 | 1 302 0.33 100.00
|
----+---+---+--
20 40 60
Frequency
/* Lesson 8-02 */
/* File Name = les0802.sas 06/09/05 */
data gakusei;
infile 'all05a.prn'
firstobs=2;
input sex $ shintyou taijyuu kyoui
jitaku $ kodukai carryer $ tsuuwa;
if kodukai>=200000 then delete; : 20万円以上の場合、除外
if sex^='M' & sex^='F' then delete; : 男でも女でもない場合、除外
(以下略)
SAS システム 2
00:51 Thursday, June 9, 2005
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 308 167.6581169 8.2318737 145.0000000 186.0000000
TAIJYUU 277 58.6281588 9.3999615 35.0000000 100.0000000
KYOUI 101 86.5841584 7.7333917 56.0000000 112.0000000
KODUKAI 296 45748.31 42879.47 0 180000.00
TSUUWA 103 7073.28 4622.05 200.0000000 30000.00
---------------------------------------------------------------------
SAS システム 21
00:51 Thursday, June 9, 2005
Univariate Procedure
Variable=KODUKAI
Moments
N 296 Sum Wgts 296
Mean 45748.31 Sum 13541500
Std Dev 42879.47 Variance 1.8386E9
Skewness 1.151633 Kurtosis 0.646084
USS 1.162E12 CSS 5.424E11
CV 93.72908 Std Mean 2492.319
T:Mean=0 18.35572 Pr>|T| 0.0001
Num ^= 0 245 Num > 0 245
M(Sign) 122.5 Pr>=|M| 0.0001
Sgn Rank 15067.5 Pr>=|S| 0.0001
SAS システム 22
00:51 Thursday, June 9, 2005
Univariate Procedure
Variable=KODUKAI
Quantiles(Def=5)
100% Max 180000 99% 163000
75% Q3 60000 95% 150000
50% Med 30000 90% 115000
25% Q1 20000 10% 0
0% Min 0 5% 0
1% 0
Range 180000
Q3-Q1 40000
Mode 0
SAS システム 25
00:51 Thursday, June 9, 2005
Univariate Procedure
Variable=KODUKAI
Histogram # Boxplot
190000+* 1 0
.*** 6 0
.****** 11 0
130000+****** 11 0
.********** 20 |
.***** 9 |
70000+**************** 32 +-----+
.********************** 43 | + |
.********************************************* 90 *-----*
10000+************************************* 73 |
----+----+----+----+----+----+----+----+----+
* may represent up to 2 counts
SAS システム 26
00:51 Thursday, June 9, 2005
Univariate Procedure
Variable=KODUKAI
Normal Probability Plot
190000+ *
| *** **
| **** ++++
130000+ *** ++++
| ****++++
| +**++
70000+ +****
| ++*****
| *********
10000+** ****************
+----+----+----+----+----+----+----+----+----+----+
-2 -1 0 +1 +2
SAS システム 32
00:51 Thursday, June 9, 2005
--------------------------------- SEX=F --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 103 158.9825243 5.4896771 145.0000000 171.0000000
TAIJYUU 72 48.6416667 4.8265624 35.0000000 59.0000000
KYOUI 37 83.1351351 4.1376467 70.0000000 90.0000000
KODUKAI 100 45025.00 35788.12 0 180000.00
TSUUWA 47 6908.51 4589.91 200.0000000 25000.00
---------------------------------------------------------------------
SAS システム 33
00:51 Thursday, June 9, 2005
--------------------------------- SEX=M --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 205 172.0170732 5.4602172 156.0000000 186.0000000
TAIJYUU 205 62.1356098 7.9922297 46.0000000 100.0000000
KYOUI 64 88.5781250 8.6093428 56.0000000 112.0000000
KODUKAI 196 46117.35 46161.61 0 165000.00
TSUUWA 56 7211.57 4685.79 500.0000000 30000.00
---------------------------------------------------------------------
SAS システム 90
00:51 Thursday, June 9, 2005
Univariate Procedure
Schematic Plots
Variable=SHINTYOU
200 +
|
| 0
180 + |
| | *--+--*
| | +-----+
160 + *--+--* 0
| +-----+ 0
| 0
140 +
------------+-----------+-----------
SEX F M
SAS システム 93
00:51 Thursday, June 9, 2005
Univariate Procedure
Schematic Plots
Variable=KODUKAI
|
200000 +
| 0 0
| 0 |
100000 + | |
| +-----+ +-----+
| *--+--* *--+--*
0 + | +-----+
------------+-----------+-----------
SEX F M
SAS システム 104
00:51 Thursday, June 9, 2005
SEX SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
F 146 | 2 2 0.65 0.65
150 |* 7 9 2.27 2.92
154 |*** 16 25 5.19 8.12
158 |***** 26 51 8.44 16.56
162 |****** 29 80 9.42 25.97
166 |**** 19 99 6.17 32.14
170 |* 4 103 1.30 33.44
174 | 0 103 0.00 33.44
178 | 0 103 0.00 33.44
182 | 0 103 0.00 33.44
186 | 0 103 0.00 33.44
|
M 146 | 0 103 0.00 33.44
150 | 0 103 0.00 33.44
154 | 0 103 0.00 33.44
158 | 2 105 0.65 34.09
162 |** 12 117 3.90 37.99
166 |***** 23 140 7.47 45.45
170 |************ 61 201 19.81 65.26
174 |************ 58 259 18.83 84.09
178 |****** 28 287 9.09 93.18
182 |**** 18 305 5.84 99.03
186 |* 3 308 0.97 100.00
|
----+---+---+
20 40 60
Frequency
SAS システム 110
00:51 Thursday, June 9, 2005
SEX KODUKAI Cum. Cum.
Midpoint Freq Freq Percent Percent
|
F 0 |****** 12 12 4.05 4.05
20000 |********** 19 31 6.42 10.47
40000 |************** 27 58 9.12 19.59
60000 |************ 23 81 7.77 27.36
80000 |**** 8 89 2.70 30.07
100000 |** 4 93 1.35 31.42
120000 |** 3 96 1.01 32.43
140000 |* 1 97 0.34 32.77
160000 |* 2 99 0.68 33.45
180000 |* 1 100 0.34 33.78
|
M 0 |*********************** 46 146 15.54 49.32
20000 |******************* 37 183 12.50 61.82
40000 |******************** 40 223 13.51 75.34
60000 |*********** 22 245 7.43 82.77
80000 |****** 12 257 4.05 86.82
100000 |******** 16 273 5.41 92.23
120000 |*** 6 279 2.03 94.26
140000 |* 2 281 0.68 94.93
160000 |******** 15 296 5.07 100.00
180000 | 0 296 0.00 100.00
|
-----+----+----+----+---
10 20 30 40
Frequency
data mon2004;
infile 'd:\home\mon05d.csv' dlm=','
firstobs=2
truncover;
data mon2004;
infile 'd:\home\mon05e.txt' dlm='09'x
firstobs=2
truncover;
options linesize=72 pagesize=20;
sas les9999.sas