前回までに分布特性を把握するためのいくつかの指標を説明し、 その使い方や注意点を喚起した。 データの特性を考慮して、グループ毎の集計を行なうと、 今までは判らなかったデータの特徴を把握することができる。 また、外れ値を除外して解析する方法についても紹介する。
/* Lesson 8-01 */
/* File Name = les0801.sas 12/01/05 */
data gakusei;
infile 'all05be.prn'
firstobs=2;
input sex $ shintyou taijyuu kyoui
jitaku $ kodukai carryer $ tsuuwa;
proc print data=gakusei(obs=5);
run;
proc means data=gakusei;
run;
proc univariate data=gakusei plot;
var shintyou taijyuu kyoui kodukai;
run;
proc chart data=gakusei; : ヒストグラム
hbar shintyou taijyuu kyoui kodukai; : 指定した変量の水平棒グラフを表示
run; :
:
proc sort data=gakusei; : 並べ替え(ソート)
by sex; : 性別ごとに
run; :
:
proc means data=gakusei; : 平均の計算
by sex; : 性別ごとに
run; :
proc univariate data=gakusei plot; : 基礎統計量の計算
var shintyou taijyuu kyoui kodukai; : 指定した変量について計算
by sex; : 性別ごとに
run; :
proc chart data=gakusei; : ヒストグラム
hbar shintyou taijyuu kyoui kodukai; : 指定した変量の水平棒グラフを表示
by sex; : 性別ごとに
run; :
:
proc chart data=gakusei; : ヒストグラム
hbar shintyou taijyuu kyoui kodukai/group=sex; : 性別ごとに併置して
run; :
SAS システム 2
21:33 Wednesday, November 30, 2005
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 328 167.8314024 8.1814563 145.0000000 186.0000000
TAIJYUU 297 58.5245791 9.3173716 35.0000000 100.0000000
KYOUI 108 86.4166667 7.5406995 56.0000000 112.0000000
KODUKAI 317 48397.48 48516.71 0 300000.00
TSUUWA 118 6810.61 4554.79 0 30000.00
---------------------------------------------------------------------
SAS システム 3
21:33 Wednesday, November 30, 2005
Univariate Procedure
Variable=SHINTYOU
Moments
N 328 Sum Wgts 328
Mean 167.8314 Sum 55048.7
Std Dev 8.181456 Variance 66.93623
Skewness -0.34476 Kurtosis -0.42358
USS 9260789 CSS 21888.15
CV 4.874807 Std Mean 0.451745
T:Mean=0 371.5177 Pr>|T| 0.0001
Num ^= 0 328 Num > 0 328
M(Sign) 164 Pr>=|M| 0.0001
Sgn Rank 26978 Pr>=|S| 0.0001
SAS システム 4
21:33 Wednesday, November 30, 2005
Univariate Procedure
Variable=SHINTYOU
Quantiles(Def=5)
100% Max 186 99% 184
75% Q3 173.9 95% 180
50% Med 169 90% 178
25% Q1 162 10% 156
0% Min 145 5% 153
1% 148
Range 41
Q3-Q1 11.9
Mode 170
SAS システム 7
21:33 Wednesday, November 30, 2005
Univariate Procedure
Variable=SHINTYOU
Histogram # Boxplot
187.5+* 2 |
.********** 20 |
.************************* 50 |
.******************************************** 87 +-----+
167.5+******************************** 64 *--+--*
.************************* 50 +-----+
.***************** 34 |
.******** 15 |
147.5+*** 6 |
----+----+----+----+----+----+----+----+----
* may represent up to 2 counts
SAS システム 21
21:33 Wednesday, November 30, 2005
Univariate Procedure
Variable=KODUKAI
Moments
N 317 Sum Wgts 317
Mean 48397.48 Sum 15342000
Std Dev 48516.71 Variance 2.3539E9
Skewness 1.730545 Kurtosis 4.262016
USS 1.486E12 CSS 7.438E11
CV 100.2464 Std Mean 2724.97
T:Mean=0 17.76074 Pr>|T| 0.0001
Num ^= 0 264 Num > 0 264
M(Sign) 132 Pr>=|M| 0.0001
Sgn Rank 17490 Pr>=|S| 0.0001
SAS システム 22
21:33 Wednesday, November 30, 2005
Univariate Procedure
Variable=KODUKAI
Quantiles(Def=5)
100% Max 300000 99% 200000
75% Q3 65000 95% 150000
50% Med 30000 90% 120000
25% Q1 20000 10% 0
0% Min 0 5% 0
1% 0
Range 300000
Q3-Q1 45000
Mode 0
SAS システム 25
21:33 Wednesday, November 30, 2005
Univariate Procedure
Variable=KODUKAI
Histogram # Boxplot
325000+* 2 *
.
.* 2 0
175000+***** 18 0
.********* 34 0
.****************** 70 +-----+
25000+************************************************ 191 *--+--*
----+----+----+----+----+----+----+----+----+---
* may represent up to 4 counts
SAS システム 27
21:33 Wednesday, November 30, 2005
SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
146 |* 2 2 0.61 0.61
150 |*** 7 9 2.13 2.74
154 |****** 16 25 4.88 7.62
158 |************ 30 55 9.15 16.77
162 |***************** 43 98 13.11 29.88
166 |****************** 44 142 13.41 43.29
170 |*************************** 68 210 20.73 64.02
174 |************************** 64 274 19.51 83.54
178 |************* 32 306 9.76 93.29
182 |******* 18 324 5.49 98.78
186 |** 4 328 1.22 100.00
|
----+---+---+---+---+---+---
10 20 30 40 50 60
SAS システム 31
21:33 Wednesday, November 30, 2005
KODUKAI Cum. Cum.
Midpoint Freq Freq Percent Percent
|
0 |*************** 73 73 23.03 23.03
30000 |*********************** 115 188 36.28 59.31
60000 |************ 59 247 18.61 77.92
90000 |******* 34 281 10.73 88.64
120000 |*** 13 294 4.10 92.74
150000 |*** 17 311 5.36 98.11
180000 | 2 313 0.63 98.74
210000 | 2 315 0.63 99.37
240000 | 0 315 0.00 99.37
270000 | 0 315 0.00 99.37
300000 | 2 317 0.63 100.00
|
----+---+---+---+---+---
20 40 60 80 100
SAS システム 33
21:33 Wednesday, November 30, 2005
--------------------------------- SEX=F --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 108 159.1222222 5.4407142 145.0000000 171.0000000
TAIJYUU 77 48.5714286 4.6799364 35.0000000 59.0000000
KYOUI 41 83.0243902 3.9590896 70.0000000 90.0000000
KODUKAI 105 49238.10 45543.57 0 300000.00
TSUUWA 50 6842.08 4585.62 200.0000000 25000.00
---------------------------------------------------------------------
SAS システム 34
21:33 Wednesday, November 30, 2005
--------------------------------- SEX=M --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 219 172.1255708 5.4565241 156.0000000 186.0000000
TAIJYUU 219 62.0173516 7.9362003 46.0000000 100.0000000
KYOUI 67 88.4925373 8.4358060 56.0000000 112.0000000
KODUKAI 210 47852.38 50103.26 0 300000.00
TSUUWA 67 6739.52 4583.18 0 30000.00
---------------------------------------------------------------------
SAS システム 53
21:33 Wednesday, November 30, 2005
-------------------------------- SEX=F ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Moments
N 108 Sum Wgts 108
Mean 159.1222 Sum 17185.2
Std Dev 5.440714 Variance 29.60137
Skewness -0.22149 Kurtosis -0.33447
USS 2737715 CSS 3167.347
CV 3.419204 Std Mean 0.523533
T:Mean=0 303.9393 Pr>|T| 0.0001
Num ^= 0 108 Num > 0 108
M(Sign) 54 Pr>=|M| 0.0001
Sgn Rank 2943 Pr>=|S| 0.0001
SAS システム 55
21:33 Wednesday, November 30, 2005
-------------------------------- SEX=F ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Quantiles(Def=5)
100% Max 171 99% 170
75% Q3 163 95% 167
50% Med 160 90% 166
25% Q1 156 10% 152
0% Min 145 5% 149
1% 146.7
Range 26
Q3-Q1 7
Mode 156
SAS システム 58
21:33 Wednesday, November 30, 2005
-------------------------------- SEX=F ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Stem Leaf # Boxplot
17 001 3 |
16 55555666666677778 17 |
16 000000000000001112222222222333344444 36 +-----+
15 5555666666666666677778888899999 31 +--+--+
15 012222333333444 15 |
14 578899 6 0
----+----+----+----+----+----+----+-
Multiply Stem.Leaf by 10**+1
SAS システム 81
21:33 Wednesday, November 30, 2005
-------------------------------- SEX=M ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Moments
N 219 Sum Wgts 219
Mean 172.1256 Sum 37695.5
Std Dev 5.456524 Variance 29.77366
Skewness -0.12749 Kurtosis 0.112604
USS 6494850 CSS 6490.657
CV 3.170083 Std Mean 0.368718
T:Mean=0 466.8221 Pr>|T| 0.0001
Num ^= 0 219 Num > 0 219
M(Sign) 109.5 Pr>=|M| 0.0001
Sgn Rank 12045 Pr>=|S| 0.0001
SAS システム 83
21:33 Wednesday, November 30, 2005
-------------------------------- SEX=M ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Quantiles(Def=5)
100% Max 186 99% 184
75% Q3 175 95% 181
50% Med 172 90% 180
25% Q1 168.5 10% 165
0% Min 156 5% 163
1% 160
Range 30
Q3-Q1 6.5
Mode 170
SAS システム 86
21:33 Wednesday, November 30, 2005
-------------------------------- SEX=M ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Histogram # Boxplot
187.5+* 2 0
.********** 20 |
.************************* 50 +-----+
172.5+******************************************* 85 *--+--*
.*********************** 45 +-----+
.******** 15 |
157.5+* 2 0
----+----+----+----+----+----+----+----+---
* may represent up to 2 counts
SAS システム 109
21:33 Wednesday, November 30, 2005
Univariate Procedure
Schematic Plots
Variable=SHINTYOU
200 +
|
| 0
180 + |
| | *--+--*
| *--+--* | +-----+
160 + *--+--* 0
| +-----+ 0
| 0
140 +
------------+-----------+-----------+-----------
SEX F M
SAS システム 110
21:33 Wednesday, November 30, 2005
Univariate Procedure
Schematic Plots
Variable=TAIJYUU
|
100 + *
| 0
| *--+--* | *--+--*
50 + *--+--* +-----+
| 0
|
0 +
------------+-----------+-----------+-----------
SEX F M
SAS システム 111
21:33 Wednesday, November 30, 2005
Univariate Procedure
Schematic Plots
Variable=KYOUI
|
150 +
|
| 0
100 + +-----+
| *--0--* *--+--*
| * 0
50 + *
------------+-----------+-----------+-----------
SEX F M
SAS システム 112
21:33 Wednesday, November 30, 2005
Univariate Procedure
Schematic Plots
Variable=KODUKAI
300000 + * *
|
|
200000 + * 0
| 0 0
| 0 |
100000 + +-----+ | |
| *--+--* +-----+ +-----+
| +-----+ *--+--* *--+--*
0 + | +-----+
------------+-----------+-----------+-----------
SEX F M
SAS システム 116
21:33 Wednesday, November 30, 2005
-------------------------------- SEX=F ---------------------------------
SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
146 |** 2 2 1.85 1.85
150 |******* 7 9 6.48 8.33
154 |**************** 16 25 14.81 23.15
158 |**************************** 28 53 25.93 49.07
162 |****************************** 30 83 27.78 76.85
166 |********************* 21 104 19.44 96.30
170 |**** 4 108 3.70 100.00
|
-----+----+----+----+----+----+
5 10 15 20 25 30
Frequency
SAS システム 120
21:33 Wednesday, November 30, 2005
-------------------------------- SEX=M ---------------------------------
SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
156 |* 2 2 0.91 0.91
159 |*** 5 7 2.28 3.20
162 |**** 8 15 3.65 6.85
165 |****** 11 26 5.02 11.87
168 |****************** 36 62 16.44 28.31
171 |**************************** 56 118 25.57 53.88
174 |************************ 47 165 21.46 75.34
177 |************* 25 190 11.42 86.76
180 |********** 19 209 8.68 95.43
183 |**** 8 217 3.65 99.09
186 |* 2 219 0.91 100.00
|
-----+----+----+----+----+---
10 20 30 40 50
Frequency
SAS システム 127
21:33 Wednesday, November 30, 2005
SEX SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
146 | 0 0 0.00 0.00
150 | 0 0 0.00 0.00
154 | 0 0 0.00 0.00
158 | 0 0 0.00 0.00
162 | 0 0 0.00 0.00
166 | 0 0 0.00 0.00
170 | 1 1 0.30 0.30
174 | 0 1 0.00 0.30
178 | 0 1 0.00 0.30
182 | 0 1 0.00 0.30
186 | 0 1 0.00 0.30
|
F 146 | 2 3 0.61 0.91
150 |* 7 10 2.13 3.05
154 |*** 16 26 4.88 7.93
158 |****** 28 54 8.54 16.46
162 |****** 30 84 9.15 25.61
166 |**** 21 105 6.40 32.01
170 |* 4 109 1.22 33.23
174 | 0 109 0.00 33.23
178 | 0 109 0.00 33.23
182 | 0 109 0.00 33.23
186 | 0 109 0.00 33.23
|
M 146 | 0 109 0.00 33.23
150 | 0 109 0.00 33.23
154 | 0 109 0.00 33.23
158 | 2 111 0.61 33.84
162 |*** 13 124 3.96 37.80
166 |***** 23 147 7.01 44.82
170 |************* 63 210 19.21 64.02
174 |************* 64 274 19.51 83.54
178 |****** 32 306 9.76 93.29
182 |**** 18 324 5.49 98.78
186 |* 4 328 1.22 100.00
|
----+---+---+-
20 40 60
Frequency
SAS システム 135
21:33 Wednesday, November 30, 2005
SEX KODUKAI Cum. Cum.
Midpoint Freq Freq Percent Percent
|
0 | 0 0 0.00 0.00
30000 | 1 1 0.32 0.32
60000 | 0 1 0.00 0.32
90000 | 1 2 0.32 0.63
120000 | 0 2 0.00 0.63
150000 | 0 2 0.00 0.63
180000 | 0 2 0.00 0.63
210000 | 0 2 0.00 0.63
240000 | 0 2 0.00 0.63
270000 | 0 2 0.00 0.63
300000 | 0 2 0.00 0.63
|
F 0 |*** 16 18 5.05 5.68
30000 |******** 41 59 12.93 18.61
60000 |****** 31 90 9.78 28.39
90000 |** 8 98 2.52 30.91
120000 |* 4 102 1.26 32.18
150000 | 2 104 0.63 32.81
180000 | 1 105 0.32 33.12
210000 | 1 106 0.32 33.44
240000 | 0 106 0.00 33.44
270000 | 0 106 0.00 33.44
300000 | 1 107 0.32 33.75
|
M 0 |*********** 57 164 17.98 51.74
30000 |*************** 73 237 23.03 74.76
60000 |****** 28 265 8.83 83.60
90000 |***** 25 290 7.89 91.48
120000 |** 9 299 2.84 94.32
150000 |*** 15 314 4.73 99.05
180000 | 1 315 0.32 99.37
210000 | 1 316 0.32 99.68
240000 | 0 316 0.00 99.68
270000 | 0 316 0.00 99.68
300000 | 1 317 0.32 100.00
|
----+---+---+---
20 40 60
Frequency
/* Lesson 8-02 */
/* File Name = les0802.sas 12/01/05 */
data gakusei;
infile 'all05be.prn'
firstobs=2;
input sex $ shintyou taijyuu kyoui
jitaku $ kodukai carryer $ tsuuwa;
if kodukai>=200000 then delete; : 20万円以上の場合、除外
if sex^='M' & sex^='F' then delete; : 男でも女でもない場合、除外
(以下略)
SAS システム 2
21:33 Wednesday, November 30, 2005
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 323 167.8040248 8.2090438 145.0000000 186.0000000
TAIJYUU 292 58.5849315 9.3510375 35.0000000 100.0000000
KYOUI 105 86.5047619 7.6272838 56.0000000 112.0000000
KODUKAI 311 45720.26 42718.59 0 180000.00
TSUUWA 117 6783.35 4564.70 0 30000.00
---------------------------------------------------------------------
SAS システム 21
21:33 Wednesday, November 30, 2005
Univariate Procedure
Variable=KODUKAI
Moments
N 311 Sum Wgts 311
Mean 45720.26 Sum 14219000
Std Dev 42718.59 Variance 1.8249E9
Skewness 1.152086 Kurtosis 0.642729
USS 1.216E12 CSS 5.657E11
CV 93.43471 Std Mean 2422.349
T:Mean=0 18.87435 Pr>|T| 0.0001
Num ^= 0 258 Num > 0 258
M(Sign) 129 Pr>=|M| 0.0001
Sgn Rank 16705.5 Pr>=|S| 0.0001
SAS システム 22
21:33 Wednesday, November 30, 2005
Univariate Procedure
Variable=KODUKAI
Quantiles(Def=5)
100% Max 180000 99% 160000
75% Q3 60000 95% 150000
50% Med 30000 90% 115000
25% Q1 20000 10% 0
0% Min 0 5% 0
1% 0
Range 180000
Q3-Q1 40000
Mode 0
SAS システム 25
21:33 Wednesday, November 30, 2005
Univariate Procedure
Variable=KODUKAI
Histogram # Boxplot
190000+* 1 0
.*** 6 0
.****** 12 0
130000+****** 12 0
.********** 20 |
.***** 10 |
70000+***************** 33 +-----+
.*********************** 45 | + |
.************************************************ 96 *-----*
10000+************************************** 76 |
----+----+----+----+----+----+----+----+----+---
* may represent up to 2 counts
SAS システム 32
21:33 Wednesday, November 30, 2005
--------------------------------- SEX=F --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 106 159.0301887 5.4430938 145.0000000 171.0000000
TAIJYUU 75 48.6000000 4.7386850 35.0000000 59.0000000
KYOUI 39 83.0512821 4.0584462 70.0000000 90.0000000
KODUKAI 103 45339.81 35499.60 0 180000.00
TSUUWA 50 6842.08 4585.62 200.0000000 25000.00
---------------------------------------------------------------------
SAS システム 33
21:33 Wednesday, November 30, 2005
--------------------------------- SEX=M --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 217 172.0898618 5.4612938 156.0000000 186.0000000
TAIJYUU 217 62.0359447 7.9704720 46.0000000 100.0000000
KYOUI 66 88.5454545 8.4892363 56.0000000 112.0000000
KODUKAI 208 45908.65 45954.57 0 165000.00
TSUUWA 67 6739.52 4583.18 0 30000.00
---------------------------------------------------------------------
SAS システム 90
21:33 Wednesday, November 30, 2005
Univariate Procedure
Schematic Plots
Variable=SHINTYOU
200 +
|
| 0
180 + |
| | *--+--*
| | +-----+
160 + *--+--* 0
| +-----+ 0
| 0
140 +
------------+-----------+-----------
SEX F M
SAS システム 93
21:33 Wednesday, November 30, 2005
Univariate Procedure
Schematic Plots
Variable=KODUKAI
|
200000 +
| 0 0
| 0 |
100000 + | |
| +-----+ +-----+
| *--+--* *--+--*
0 + | +-----+
------------+-----------+-----------
SEX F M
SAS システム 104
21:33 Wednesday, November 30, 2005
SEX SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
F 146 | 2 2 0.62 0.62
150 |* 7 9 2.17 2.79
154 |*** 16 25 4.95 7.74
158 |****** 28 53 8.67 16.41
162 |****** 29 82 8.98 25.39
166 |**** 20 102 6.19 31.58
170 |* 4 106 1.24 32.82
174 | 0 106 0.00 32.82
178 | 0 106 0.00 32.82
182 | 0 106 0.00 32.82
186 | 0 106 0.00 32.82
|
M 146 | 0 106 0.00 32.82
150 | 0 106 0.00 32.82
154 | 0 106 0.00 32.82
158 | 2 108 0.62 33.44
162 |*** 13 121 4.02 37.46
166 |***** 23 144 7.12 44.58
170 |************* 63 207 19.50 64.09
174 |************* 63 270 19.50 83.59
178 |****** 31 301 9.60 93.19
182 |**** 18 319 5.57 98.76
186 |* 4 323 1.24 100.00
|
----+---+---+-
20 40 60
Frequency
Frequency
SAS システム 110
21:33 Wednesday, November 30, 2005
SEX KODUKAI Cum. Cum.
Midpoint Freq Freq Percent Percent
|
F 0 |****** 12 12 3.86 3.86
20000 |********** 20 32 6.43 10.29
40000 |************** 27 59 8.68 18.97
60000 |************ 24 83 7.72 26.69
80000 |***** 9 92 2.89 29.58
100000 |** 4 96 1.29 30.87
120000 |** 3 99 0.96 31.83
140000 |* 1 100 0.32 32.15
160000 |* 2 102 0.64 32.80
180000 |* 1 103 0.32 33.12
|
M 0 |************************ 48 151 15.43 48.55
20000 |******************** 40 191 12.86 61.41
40000 |********************** 43 234 13.83 75.24
60000 |************ 24 258 7.72 82.96
80000 |****** 12 270 3.86 86.82
100000 |******** 16 286 5.14 91.96
120000 |**** 7 293 2.25 94.21
140000 |** 3 296 0.96 95.18
160000 |******** 15 311 4.82 100.00
180000 | 0 311 0.00 100.00
|
-----+----+----+----+----
10 20 30 40
Frequency
data mon2004;
infile 'd:\home\mon05d.csv' dlm=','
firstobs=2
truncover;
data mon2004;
infile 'd:\home\mon05e.txt' dlm='09'x
firstobs=2
truncover;