前回までに分布特性を把握するためのいくつかの指標を説明し、 その使い方や注意点を喚起した。 データの特性を考慮して、グループ毎の集計を行なうと、 今までは判らなかったデータの特徴を把握することができる。 また、外れ値を除外して解析する方法についても紹介する。
/* Lesson 6-01 */
/* File Name = les0601.sas 05/25/06 */
data gakusei;
infile 'all06ae.prn'
firstobs=2;
input sex $ shintyou taijyuu kyoui
jitaku $ kodukai carryer $ tsuuwa;
proc print data=gakusei(obs=5);
run;
proc means data=gakusei;
run;
proc univariate data=gakusei plot;
var shintyou taijyuu kyoui kodukai;
run;
proc chart data=gakusei; : ヒストグラム
hbar shintyou taijyuu kyoui kodukai; : 指定した変量の水平棒グラフを表示
run; :
:
proc sort data=gakusei; : 並べ替え(ソート)
by sex; : 性別ごとに
run; :
:
proc means data=gakusei; : 平均の計算
by sex; : 性別ごとに
run; :
proc univariate data=gakusei plot; : 基礎統計量の計算
var shintyou taijyuu kyoui kodukai; : 指定した変量について計算
by sex; : 性別ごとに
run; :
proc chart data=gakusei; : ヒストグラム
hbar shintyou taijyuu kyoui kodukai; : 指定した変量の水平棒グラフを表示
by sex; : 性別ごとに
run; :
:
proc chart data=gakusei; : ヒストグラム
hbar shintyou taijyuu kyoui kodukai/group=sex; : 性別ごとに併置して
run; :
SAS システム 2
23:11 Wednesday, May 24, 2006
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 339 167.9486726 8.1611337 145.0000000 186.0000000
TAIJYUU 307 58.7159609 9.3051275 35.0000000 100.0000000
KYOUI 111 86.4684685 7.5073538 56.0000000 112.0000000
KODUKAI 327 47752.29 47971.89 0 300000.00
TSUUWA 129 6594.59 4502.81 0 30000.00
---------------------------------------------------------------------
SAS システム 3
23:11 Wednesday, May 24, 2006
Univariate Procedure
Variable=SHINTYOU
Moments
N 339 Sum Wgts 339
Mean 167.9487 Sum 56934.6
Std Dev 8.161134 Variance 66.6041
Skewness -0.35954 Kurtosis -0.42016
USS 9584603 CSS 22512.19
CV 4.859302 Std Mean 0.443252
T:Mean=0 378.9012 Pr>|T| 0.0001
Num ^= 0 339 Num > 0 339
M(Sign) 169.5 Pr>=|M| 0.0001
Sgn Rank 28815 Pr>=|S| 0.0001
SAS システム 4
23:11 Wednesday, May 24, 2006
Univariate Procedure
Variable=SHINTYOU
Quantiles(Def=5)
100% Max 186 99% 184
75% Q3 174 95% 180
50% Med 169 90% 178
25% Q1 162 10% 156
0% Min 145 5% 153
1% 148
Range 41
Q3-Q1 12
Mode 170
SAS システム 7
23:11 Wednesday, May 24, 2006
Univariate Procedure
Variable=SHINTYOU
Histogram # Boxplot
187.5+* 2 |
.*********** 21 |
.************************** 52 |
.********************************************** 92 +-----+
167.5+********************************* 65 *--+--*
.************************** 51 +-----+
.****************** 35 |
.******** 15 |
147.5+*** 6 |
----+----+----+----+----+----+----+----+----+-
* may represent up to 2 counts
SAS システム 21
23:11 Wednesday, May 24, 2006
Univariate Procedure
Variable=KODUKAI
Moments
N 327 Sum Wgts 327
Mean 47752.29 Sum 15615000
Std Dev 47971.89 Variance 2.3013E9
Skewness 1.769178 Kurtosis 4.461749
USS 1.496E12 CSS 7.502E11
CV 100.4599 Std Mean 2652.851
T:Mean=0 18.00036 Pr>|T| 0.0001
Num ^= 0 274 Num > 0 274
M(Sign) 137 Pr>=|M| 0.0001
Sgn Rank 18837.5 Pr>=|S| 0.0001
SAS システム 22
23:11 Wednesday, May 24, 2006
Univariate Procedure
Variable=KODUKAI
Quantiles(Def=5)
100% Max 300000 99% 200000
75% Q3 60000 95% 150000
50% Med 30000 90% 120000
25% Q1 20000 10% 0
0% Min 0 5% 0
1% 0
Range 300000
Q3-Q1 40000
Mode 0
SAS システム 25
23:11 Wednesday, May 24, 2006
Univariate Procedure
Variable=KODUKAI
Histogram # Boxplot
325000+* 2 *
.
.* 2 *
175000+**** 18 0
.******* 34 0
.*************** 71 +-----+
25000+**************************************** 200 *--+--*
----+----+----+----+----+----+----+----+
* may represent up to 5 counts
SAS システム 27
23:11 Wednesday, May 24, 2006
SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
146 |* 2 2 0.59 0.59
150 |*** 7 9 2.06 2.65
154 |****** 16 25 4.72 7.37
158 |************ 31 56 9.14 16.52
162 |****************** 44 100 12.98 29.50
166 |****************** 44 144 12.98 42.48
170 |**************************** 71 215 20.94 63.42
174 |*************************** 67 282 19.76 83.19
178 |************** 34 316 10.03 93.22
182 |******** 19 335 5.60 98.82
186 |** 4 339 1.18 100.00
|
----+---+---+---+---+---+---+
10 20 30 40 50 60 70
SAS システム 31
23:11 Wednesday, May 24, 2006
KODUKAI Cum. Cum.
Midpoint Freq Freq Percent Percent
|
0 |*************** 75 75 22.94 22.94
30000 |************************ 121 196 37.00 59.94
60000 |************ 61 257 18.65 78.59
90000 |******* 34 291 10.40 88.99
120000 |*** 13 304 3.98 92.97
150000 |*** 17 321 5.20 98.17
180000 | 2 323 0.61 98.78
210000 | 2 325 0.61 99.39
240000 | 0 325 0.00 99.39
270000 | 0 325 0.00 99.39
300000 | 2 327 0.61 100.00
|
----+---+---+---+---+---+
20 40 60 80 100 120
SAS システム 33
23:11 Wednesday, May 24, 2006
--------------------------------- SEX=F --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 110 159.1290909 5.3946345 145.0000000 171.0000000
TAIJYUU 78 48.6282051 4.6764094 35.0000000 59.0000000
KYOUI 42 82.9523810 3.9382575 70.0000000 90.0000000
KODUKAI 107 48579.44 45373.46 0 300000.00
TSUUWA 52 6867.38 4523.79 200.0000000 25000.00
---------------------------------------------------------------------
SAS システム 34
23:11 Wednesday, May 24, 2006
--------------------------------- SEX=M --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 228 172.2035088 5.4149724 156.0000000 186.0000000
TAIJYUU 228 62.1614035 7.8995568 46.0000000 100.0000000
KYOUI 69 88.6086957 8.3388132 56.0000000 112.0000000
KODUKAI 218 47220.18 49351.53 0 300000.00
TSUUWA 76 6363.13 4519.59 0 30000.00
---------------------------------------------------------------------
SAS システム 53
23:11 Wednesday, May 24, 2006
-------------------------------- SEX=F ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Moments
N 110 Sum Wgts 110
Mean 159.1291 Sum 17504.2
Std Dev 5.394635 Variance 29.10208
Skewness -0.22655 Kurtosis -0.2914
USS 2788600 CSS 3172.127
CV 3.3901 Std Mean 0.514358
T:Mean=0 309.3741 Pr>|T| 0.0001
Num ^= 0 110 Num > 0 110
M(Sign) 55 Pr>=|M| 0.0001
Sgn Rank 3052.5 Pr>=|S| 0.0001
SAS システム 55
23:11 Wednesday, May 24, 2006
-------------------------------- SEX=F ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Quantiles(Def=5)
100% Max 171 99% 170
75% Q3 163 95% 167
50% Med 160 90% 166
25% Q1 156 10% 152
0% Min 145 5% 149
1% 146.7
Range 26
Q3-Q1 7
Mode 156
SAS システム 58
23:11 Wednesday, May 24, 2006
-------------------------------- SEX=F ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Stem Leaf # Boxplot
17 001 3 |
16 55555666666677778 17 |
16 0000000000000011112222222222333344444 37 +-----+
15 55556666666666666777788888899999 32 +--+--+
15 012222333333444 15 |
14 578899 6 0
----+----+----+----+----+----+----+--
Multiply Stem.Leaf by 10**+1
SAS システム 81
23:11 Wednesday, May 24, 2006
-------------------------------- SEX=M ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Moments
N 228 Sum Wgts 228
Mean 172.2035 Sum 39262.4
Std Dev 5.414972 Variance 29.32193
Skewness -0.13968 Kurtosis 0.129877
USS 6767779 CSS 6656.077
CV 3.144519 Std Mean 0.358615
T:Mean=0 480.1901 Pr>|T| 0.0001
Num ^= 0 228 Num > 0 228
M(Sign) 114 Pr>=|M| 0.0001
Sgn Rank 13053 Pr>=|S| 0.0001
SAS システム 83
23:11 Wednesday, May 24, 2006
-------------------------------- SEX=M ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Quantiles(Def=5)
100% Max 186 99% 184
75% Q3 175.5 95% 181
50% Med 172 90% 180
25% Q1 168.95 10% 165
0% Min 156 5% 163
1% 160
Range 30
Q3-Q1 6.55
Mode 170
SAS システム 86
23:11 Wednesday, May 24, 2006
-------------------------------- SEX=M ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Histogram # Boxplot
187.5+* 2 0
.*********** 21 |
.************************** 52 +-----+
172.5+********************************************* 90 *--+--*
.*********************** 46 +-----+
.******** 15 |
157.5+* 2 0
----+----+----+----+----+----+----+----+----+
* may represent up to 2 counts
SAS システム 109
23:11 Wednesday, May 24, 2006
Univariate Procedure
Schematic Plots
Variable=SHINTYOU
200 +
|
| 0
180 + |
| | *--+--*
| *--+--* | +-----+
160 + *--+--* 0
| +-----+ 0
| 0
140 +
------------+-----------+-----------+-----------
SEX F M
SAS システム 110
23:11 Wednesday, May 24, 2006
Univariate Procedure
Schematic Plots
Variable=TAIJYUU
|
100 + *
| 0
| *--+--* | *--+--*
50 + *--+--* +-----+
| 0
|
0 +
------------+-----------+-----------+-----------
SEX F M
SAS システム 111
23:11 Wednesday, May 24, 2006
Univariate Procedure
Schematic Plots
Variable=KYOUI
|
150 +
|
| 0
100 + +-----+
| *--+--* *--+--*
| 0 0
50 + *
------------+-----------+-----------+-----------
SEX F M
SAS システム 112
23:11 Wednesday, May 24, 2006
Univariate Procedure
Schematic Plots
Variable=KODUKAI
300000 + * *
|
|
200000 + * 0
| 0 0
| 0 |
100000 + +-----+ | |
| *--+--* +-----+ +-----+
| +-----+ *--+--* *--+--*
0 + | +-----+
------------+-----------+-----------+-----------
SEX F M
SAS システム 116
23:11 Wednesday, May 24, 2006
-------------------------------- SEX=F ---------------------------------
SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
144 |* 1 1 0.91 0.91
147 |*** 3 4 2.73 3.64
150 |**** 4 8 3.64 7.27
153 |************* 13 21 11.82 19.09
156 |********************* 21 42 19.09 38.18
159 |************************* 25 67 22.73 60.91
162 |****************** 18 85 16.36 77.27
165 |***************** 17 102 15.45 92.73
168 |***** 5 107 4.55 97.27
171 |*** 3 110 2.73 100.00
|
-----+----+----+----+----+
SAS システム 121
23:11 Wednesday, May 24, 2006
-------------------------------- SEX=M ---------------------------------
SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
156 |* 2 2 0.88 0.88
159 |*** 5 7 2.19 3.07
162 |**** 8 15 3.51 6.58
165 |****** 11 26 4.82 11.40
168 |******************* 37 63 16.23 27.63
171 |***************************** 58 121 25.44 53.07
174 |************************* 50 171 21.93 75.00
177 |************* 26 197 11.40 86.40
180 |*********** 21 218 9.21 95.61
183 |**** 8 226 3.51 99.12
186 |* 2 228 0.88 100.00
|
-----+----+----+----+----+----
10 20 30 40 50
SAS システム 128
23:11 Wednesday, May 24, 2006
SEX SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
146 | 0 0 0.00 0.00
150 | 0 0 0.00 0.00
154 | 0 0 0.00 0.00
158 | 0 0 0.00 0.00
162 | 0 0 0.00 0.00
166 | 0 0 0.00 0.00
170 | 1 1 0.29 0.29
174 | 0 1 0.00 0.29
178 | 0 1 0.00 0.29
182 | 0 1 0.00 0.29
186 | 0 1 0.00 0.29
|
F 146 | 2 3 0.59 0.88
150 |* 7 10 2.06 2.95
154 |*** 16 26 4.72 7.67
158 |****** 29 55 8.55 16.22
162 |****** 31 86 9.14 25.37
166 |**** 21 107 6.19 31.56
170 |* 4 111 1.18 32.74
174 | 0 111 0.00 32.74
178 | 0 111 0.00 32.74
182 | 0 111 0.00 32.74
186 | 0 111 0.00 32.74
|
M 146 | 0 111 0.00 32.74
150 | 0 111 0.00 32.74
154 | 0 111 0.00 32.74
158 | 2 113 0.59 33.33
162 |*** 13 126 3.83 37.17
166 |***** 23 149 6.78 43.95
170 |************* 66 215 19.47 63.42
174 |************* 67 282 19.76 83.19
178 |******* 34 316 10.03 93.22
182 |**** 19 335 5.60 98.82
186 |* 4 339 1.18 100.00
|
----+---+---+-
20 40 60
Frequency
SAS システム 136
23:11 Wednesday, May 24, 2006
SEX KODUKAI Cum. Cum.
Midpoint Freq Freq Percent Percent
|
0 | 0 0 0.00 0.00
30000 | 1 1 0.31 0.31
60000 | 0 1 0.00 0.31
90000 | 1 2 0.31 0.61
120000 | 0 2 0.00 0.61
150000 | 0 2 0.00 0.61
180000 | 0 2 0.00 0.61
210000 | 0 2 0.00 0.61
240000 | 0 2 0.00 0.61
270000 | 0 2 0.00 0.61
300000 | 0 2 0.00 0.61
|
F 0 |*** 17 19 5.20 5.81
30000 |******** 42 61 12.84 18.65
60000 |****** 31 92 9.48 28.13
90000 |** 8 100 2.45 30.58
120000 |* 4 104 1.22 31.80
150000 | 2 106 0.61 32.42
180000 | 1 107 0.31 32.72
210000 | 1 108 0.31 33.03
240000 | 0 108 0.00 33.03
270000 | 0 108 0.00 33.03
300000 | 1 109 0.31 33.33
|
M 0 |************ 58 167 17.74 51.07
30000 |**************** 78 245 23.85 74.92
60000 |****** 30 275 9.17 84.10
90000 |***** 25 300 7.65 91.74
120000 |** 9 309 2.75 94.50
150000 |*** 15 324 4.59 99.08
180000 | 1 325 0.31 99.39
210000 | 1 326 0.31 99.69
240000 | 0 326 0.00 99.69
270000 | 0 326 0.00 99.69
300000 | 1 327 0.31 100.00
|
----+---+---+---+
20 40 60 80
Frequency
/* Lesson 6-02 */
/* File Name = les0602.sas 05/25/06 */
data gakusei;
infile 'all06ae.prn'
firstobs=2;
input sex $ shintyou taijyuu kyoui
jitaku $ kodukai carryer $ tsuuwa;
if kodukai>=200000 then delete; : 20万円以上の場合、除外
if sex^='M' & sex^='F' then delete; : 男でも女でもない場合、除外
(以下略)
SAS システム 2
23:11 Wednesday, May 24, 2006
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 334 167.9239521 8.1879477 145.0000000 186.0000000
TAIJYUU 302 58.7774834 9.3362699 35.0000000 100.0000000
KYOUI 108 86.5555556 7.5903694 56.0000000 112.0000000
KODUKAI 321 45146.42 42245.03 0 180000.00
TSUUWA 128 6567.98 4510.31 0 30000.00
---------------------------------------------------------------------
SAS システム 21
23:11 Wednesday, May 24, 2006
Univariate Procedure
Variable=KODUKAI
Moments
N 321 Sum Wgts 321
Mean 45146.42 Sum 14492000
Std Dev 42245.03 Variance 1.7846E9
Skewness 1.187351 Kurtosis 0.75934
USS 1.225E12 CSS 5.711E11
CV 93.57338 Std Mean 2357.888
T:Mean=0 19.14698 Pr>|T| 0.0001
Num ^= 0 268 Num > 0 268
M(Sign) 134 Pr>=|M| 0.0001
Sgn Rank 18023 Pr>=|S| 0.0001
SAS システム 22
23:11 Wednesday, May 24, 2006
Univariate Procedure
Variable=KODUKAI
Quantiles(Def=5)
100% Max 180000 99% 160000
75% Q3 60000 95% 150000
50% Med 30000 90% 100000
25% Q1 20000 10% 0
0% Min 0 5% 0
1% 0
Range 180000
Q3-Q1 40000
Mode 0
SAS システム 25
23:11 Wednesday, May 24, 2006
Univariate Procedure
Variable=KODUKAI
Histogram # Boxplot
190000+* 1 0
.** 6 0
.**** 12 0
130000+**** 12 0
.******* 20 |
.**** 10 |
70000+*********** 33 +-----+
.**************** 48 | + |
.********************************** 100 *-----*
10000+*************************** 79 |
----+----+----+----+----+----+----
* may represent up to 3 counts
SAS システム 32
23:11 Wednesday, May 24, 2006
--------------------------------- SEX=F --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 108 159.0388889 5.3962575 145.0000000 171.0000000
TAIJYUU 76 48.6578947 4.7339698 35.0000000 59.0000000
KYOUI 40 82.9750000 4.0350230 70.0000000 90.0000000
KODUKAI 105 44742.86 35428.90 0 180000.00
TSUUWA 52 6867.38 4523.79 200.0000000 25000.00
---------------------------------------------------------------------
SAS システム 33
23:11 Wednesday, May 24, 2006
--------------------------------- SEX=M --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 226 172.1699115 5.4197152 156.0000000 186.0000000
TAIJYUU 226 62.1805310 7.9319479 46.0000000 100.0000000
KYOUI 68 88.6617647 8.3890659 56.0000000 112.0000000
KODUKAI 216 45342.59 45265.04 0 165000.00
TSUUWA 76 6363.13 4519.59 0 30000.00
---------------------------------------------------------------------
SAS システム 90
23:11 Wednesday, May 24, 2006
Univariate Procedure
Schematic Plots
Variable=SHINTYOU
200 +
|
| 0
180 + |
| | *--+--*
| | +-----+
160 + *--+--* 0
| +-----+ 0
| 0
140 +
------------+-----------+-----------
SEX F M
SAS システム 93
23:11 Wednesday, May 24, 2006
Univariate Procedure
Schematic Plots
Variable=KODUKAI
|
200000 +
| 0 0
| 0 |
100000 + | |
| +-----+ +-----+
| *--+--* *--+--*
0 + | +-----+
------------+-----------+-----------
SEX F M
SAS システム 104
23:11 Wednesday, May 24, 2006
SEX SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
F 146 | 2 2 0.60 0.60
150 |* 7 9 2.10 2.69
154 |*** 16 25 4.79 7.49
158 |****** 29 54 8.68 16.17
162 |****** 30 84 8.98 25.15
166 |**** 20 104 5.99 31.14
170 |* 4 108 1.20 32.34
174 | 0 108 0.00 32.34
178 | 0 108 0.00 32.34
182 | 0 108 0.00 32.34
186 | 0 108 0.00 32.34
|
M 146 | 0 108 0.00 32.34
150 | 0 108 0.00 32.34
154 | 0 108 0.00 32.34
158 | 2 110 0.60 32.93
162 |*** 13 123 3.89 36.83
166 |***** 23 146 6.89 43.71
170 |************* 66 212 19.76 63.47
174 |************* 66 278 19.76 83.23
178 |******* 33 311 9.88 93.11
182 |**** 19 330 5.69 98.80
186 |* 4 334 1.20 100.00
|
----+---+---+-
20 40 60
Frequency
SAS システム 110
23:11 Wednesday, May 24, 2006
SEX KODUKAI Cum. Cum.
Midpoint Freq Freq Percent Percent
|
F 0 |******* 13 13 4.05 4.05
20000 |*********** 21 34 6.54 10.59
40000 |************** 27 61 8.41 19.00
60000 |************ 24 85 7.48 26.48
80000 |***** 9 94 2.80 29.28
100000 |** 4 98 1.25 30.53
120000 |** 3 101 0.93 31.46
140000 |* 1 102 0.31 31.78
160000 |* 2 104 0.62 32.40
180000 |* 1 105 0.31 32.71
|
M 0 |************************* 49 154 15.26 47.98
20000 |********************* 41 195 12.77 60.75
40000 |************************ 48 243 14.95 75.70
60000 |************* 25 268 7.79 83.49
80000 |****** 12 280 3.74 87.23
100000 |******** 16 296 4.98 92.21
120000 |**** 7 303 2.18 94.39
140000 |** 3 306 0.93 95.33
160000 |******** 15 321 4.67 100.00
180000 | 0 321 0.00 100.00
|
-----+----+----+----+----+
10 20 30 40 50
Frequency
data math; infile 'foo.dat' lrecl=230;
data math; infile 'foo.dat' lrecl=230 truncover;
input
kamoku $ 2
kesseki $ 3
k_code $ 10-11
t_score 12-14
s_scor01 103-104
s_scor02 105-106
s_scor03 107-108
s_scor04 109-110
;
data math; infile 'foo.dat' firstobs=4;