前回まではデータ全体の特徴を調べてきた。 データの特性を考慮して、グループ毎の集計を行なうと、 今までは判らなかったデータの特徴を把握することができる。 また、外れ値を除外して解析する方法についても紹介する。
/* Lesson 8-01 */
/* File Name = les0801.sas 06/10/04 */
data gakusei;
infile 'all04a.prn'
firstobs=2;
input sex $ shintyou taijyuu kyoui
jitaku $ kodukai carryer $ tsuuwa;
proc print data=gakusei(obs=5);
run;
proc means data=gakusei;
run;
proc univariate data=gakusei plot;
var shintyou taijyuu kyoui kodukai;
run;
proc chart data=gakusei; : ヒストグラム
hbar shintyou taijyuu kyoui kodukai; : 指定した変量について計算
run; :
:
proc sort data=gakusei; : 並べ替え(ソート)
by sex; : 性別ごとに
run; :
:
proc means data=gakusei; : 平均の計算
by sex; : 性別ごとに
run; :
proc univariate data=gakusei plot; : 基礎統計量の計算
var shintyou taijyuu kyoui kodukai; : 指定した変量について計算
by sex; : 性別ごとに
run; :
proc chart data=gakusei; : ヒストグラム
hbar shintyou taijyuu kyoui kodukai; : 指定した変量について計算
by sex; : 性別ごとに
run; :
proc chart data=gakusei; : ヒストグラム
hbar shintyou taijyuu kyoui kodukai/group=sex; : 性別ごとに併置して
run; :
SAS システム 2
21:57 Wednesday, June 9, 2004
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 285 167.8207018 8.1248549 145.0000000 186.0000000
TAIJYUU 254 58.7303150 9.2226857 35.0000000 100.0000000
KYOUI 93 86.7204301 7.8979436 56.0000000 112.0000000
KODUKAI 272 49810.66 50253.80 0 300000.00
TSUUWA 77 7314.91 5050.78 200.0000000 30000.00
---------------------------------------------------------------------
SAS システム 7
21:57 Wednesday, June 9, 2004
Univariate Procedure
Variable=SHINTYOU
Histogram # Boxplot
187.5+* 2 |
.********* 18 |
.******************** 39 |
.*************************************** 78 +-----+
167.5+***************************** 57 *--+--*
.*********************** 45 +-----+
.************** 27 |
.******* 14 |
147.5+*** 5 |
----+----+----+----+----+----+----+----
* may represent up to 2 counts
SAS システム 8
21:57 Wednesday, June 9, 2004
Univariate Procedure
Variable=SHINTYOU
Normal Probability Plot
187.5+ +++*
| *****+** *
| ********
| ********
167.5+ ******++
| ******+
| ******
| *******
147.5+*++**
+----+----+----+----+----+----+----+----+----+----+
-2 -1 0 +1 +2
SAS システム 21
21:57 Wednesday, June 9, 2004
Univariate Procedure
Variable=KODUKAI
Moments
N 272 Sum Wgts 272
Mean 49810.66 Sum 13548500
Std Dev 50253.8 Variance 2.5254E9
Skewness 1.695952 Kurtosis 4.021976
USS 1.359E12 CSS 6.844E11
CV 100.8896 Std Mean 3047.084
T:Mean=0 16.34699 Pr>|T| 0.0001
Num ^= 0 224 Num > 0 224
M(Sign) 112 Pr>=|M| 0.0001
Sgn Rank 12600 Pr>=|S| 0.0001
SAS システム 22
21:57 Wednesday, June 9, 2004
Univariate Procedure
Variable=KODUKAI
Quantiles(Def=5)
100% Max 300000 99% 200000
75% Q3 70000 95% 150000
50% Med 30000 90% 120000
25% Q1 20000 10% 0
0% Min 0 5% 0
1% 0
Range 300000
Q3-Q1 50000
Mode 0
SAS システム 25
21:57 Wednesday, June 9, 2004
Univariate Procedure
Variable=KODUKAI
Histogram # Boxplot
325000+* 2 *
.
.* 2 0
175000+***** 17 0
.******** 31 |
.*************** 58 +-----+
25000+***************************************** 162 *--+--*
----+----+----+----+----+----+----+----+-
* may represent up to 4 counts
SAS システム 26
21:57 Wednesday, June 9, 2004
Univariate Procedure
Variable=KODUKAI
Normal Probability Plot
325000+ *
|
| **
175000+ ********++++
| ******+++++
| +********
25000+* **************************
+----+----+----+----+----+----+----+----+----+----+
-2 -1 0 +1 +2
SAS システム 30
21:57 Wednesday, June 9, 2004
KODUKAI Cum. Cum.
Midpoint Freq Freq Percent Percent
|
0 |************ 62 62 22.79 22.79
30000 |******************** 98 160 36.03 58.82
60000 |********** 49 209 18.01 76.84
90000 |****** 30 239 11.03 87.87
120000 |** 12 251 4.41 92.28
150000 |*** 15 266 5.51 97.79
180000 | 2 268 0.74 98.53
210000 | 2 270 0.74 99.26
240000 | 0 270 0.00 99.26
270000 | 0 270 0.00 99.26
300000 | 2 272 0.74 100.00
|
----+---+---+---+---+
20 40 60 80 100
SAS システム 32
21:57 Wednesday, June 9, 2004
--------------------------------- SEX=F --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 94 159.1893617 5.4552031 145.0000000 171.0000000
TAIJYUU 63 48.4285714 4.6445052 35.0000000 59.0000000
KYOUI 32 82.9375000 4.3547342 70.0000000 90.0000000
KODUKAI 89 50589.89 48581.27 0 300000.00
TSUUWA 35 7062.86 5041.31 200.0000000 25000.00
---------------------------------------------------------------------
SAS システム 33
21:57 Wednesday, June 9, 2004
--------------------------------- SEX=M --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 190 172.0900000 5.4013574 156.0000000 186.0000000
TAIJYUU 190 62.1394737 7.7276505 46.0000000 100.0000000
KYOUI 61 88.7049180 8.6146083 56.0000000 112.0000000
KODUKAI 181 49298.34 51281.57 0 300000.00
TSUUWA 41 7464.59 5158.33 500.0000000 30000.00
---------------------------------------------------------------------
SAS システム 52
21:57 Wednesday, June 9, 2004
-------------------------------- SEX=F ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Moments
N 94 Sum Wgts 94
Mean 159.1894 Sum 14963.8
Std Dev 5.455203 Variance 29.75924
Skewness -0.20544 Kurtosis -0.3905
USS 2384845 CSS 2767.609
CV 3.426864 Std Mean 0.562661
T:Mean=0 282.9222 Pr>|T| 0.0001
Num ^= 0 94 Num > 0 94
M(Sign) 47 Pr>=|M| 0.0001
Sgn Rank 2232.5 Pr>=|S| 0.0001
SAS システム 54
21:57 Wednesday, June 9, 2004
-------------------------------- SEX=F ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Quantiles(Def=5)
100% Max 171 99% 171
75% Q3 163 95% 167
50% Med 160 90% 166
25% Q1 156 10% 152
0% Min 145 5% 149
1% 145
Range 26
Q3-Q1 7
Mode 156
SAS システム 57
21:57 Wednesday, June 9, 2004
-------------------------------- SEX=F ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Stem Leaf # Boxplot
17 001 3 |
16 55566666667777 14 |
16 000000000000111222222222333344444 33 +-----+
15 5556666666666677778889999 25 +--+--+
15 01222333333444 14 |
14 58899 5 0
----+----+----+----+----+----+---
Multiply Stem.Leaf by 10**+1
SAS システム 58
21:57 Wednesday, June 9, 2004
-------------------------------- SEX=F ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Normal Probability Plot
172.5+ +*+++*
| *****+*+*+*
| **********+
| ********++
| ********+
147.5+*+++*++**
+----+----+----+----+----+----+----+----+----+----+
-2 -1 0 +1 +2
SAS システム 80
21:57 Wednesday, June 9, 2004
-------------------------------- SEX=M ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Moments
N 190 Sum Wgts 190
Mean 172.09 Sum 32697.1
Std Dev 5.401357 Variance 29.17466
Skewness -0.0443 Kurtosis 0.062762
USS 5632358 CSS 5514.011
CV 3.138682 Std Mean 0.391856
T:Mean=0 439.1668 Pr>|T| 0.0001
Num ^= 0 190 Num > 0 190
M(Sign) 95 Pr>=|M| 0.0001
Sgn Rank 9072.5 Pr>=|S| 0.0001
SAS システム 82
21:57 Wednesday, June 9, 2004
-------------------------------- SEX=M ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Quantiles(Def=5)
100% Max 186 99% 185
75% Q3 175 95% 181
50% Med 172 90% 180
25% Q1 168.5 10% 165.5
0% Min 156 5% 163
1% 160
Range 30
Q3-Q1 6.5
Mode 170
SAS システム 85
21:57 Wednesday, June 9, 2004
-------------------------------- SEX=M ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Histogram # Boxplot
187.5+* 2 0
.********* 18 |
.******************** 39 +-----+
172.5+************************************** 76 *--+--*
.********************* 41 +-----+
.******* 13 |
157.5+* 1 0
----+----+----+----+----+----+----+---
* may represent up to 2 counts
SAS システム 86
21:57 Wednesday, June 9, 2004
-------------------------------- SEX=M ---------------------------------
Univariate Procedure
Variable=SHINTYOU
Normal Probability Plot
187.5+ **
| *****+*+**+
| ********++
172.5+ ***********
| *********++
| * ********+
157.5+*++
+----+----+----+----+----+----+----+----+----+----+
-2 -1 0 +1 +2
SAS システム 108
21:57 Wednesday, June 9, 2004
Univariate Procedure
Schematic Plots
Variable=SHINTYOU
200 +
|
| 0
180 + |
| | *--+--*
| *--+--* | +-----+
160 + *--+--* |
| +-----+ 0
| 0
140 +
------------+-----------+-----------+-----------
SEX F M
SAS システム 109
21:57 Wednesday, June 9, 2004
Univariate Procedure
Schematic Plots
Variable=TAIJYUU
|
100 + *
| 0
| *--+--* | *--+--*
50 + *--+--* +-----+
| 0
|
0 +
------------+-----------+-----------+-----------
SEX F M
SAS システム 111
21:57 Wednesday, June 9, 2004
Univariate Procedure
Schematic Plots
Variable=KODUKAI
300000 + * *
|
|
200000 + * 0
| 0 |
| 0 |
100000 + +-----+ | |
| *--+--* +--+--+ +-----+
| +-----+ *-----* *--+--*
0 + | +-----+
------------+-----------+-----------+-----------
SEX F M
SAS システム 115
21:57 Wednesday, June 9, 2004
-------------------------------- SEX=F ---------------------------------
SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
146 |* 1 1 1.06 1.06
150 |******* 7 8 7.45 8.51
154 |************** 14 22 14.89 23.40
158 |*********************** 23 45 24.47 47.87
162 |*************************** 27 72 28.72 76.60
166 |******************* 19 91 20.21 96.81
170 |*** 3 94 3.19 100.00
|
-----+----+----+----+----+--
5 10 15 20 25
Frequency
SAS システム 119
21:57 Wednesday, June 9, 2004
-------------------------------- SEX=M ---------------------------------
SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
156 |* 1 1 0.53 0.53
159 |*** 5 6 2.63 3.16
162 |**** 7 13 3.68 6.84
165 |***** 9 22 4.74 11.58
168 |***************** 33 55 17.37 28.95
171 |************************* 50 105 26.32 55.26
174 |******************** 39 144 20.53 75.79
177 |********** 20 164 10.53 86.32
180 |********* 17 181 8.95 95.26
183 |**** 7 188 3.68 98.95
186 |* 2 190 1.05 100.00
|
-----+----+----+----+----+
10 20 30 40 50
Frequency
SAS システム 126
21:57 Wednesday, June 9, 2004
SEX SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
146 | 0 0 0.00 0.00
150 | 0 0 0.00 0.00
154 | 0 0 0.00 0.00
158 | 0 0 0.00 0.00
162 | 0 0 0.00 0.00
166 | 0 0 0.00 0.00
170 | 1 1 0.35 0.35
174 | 0 1 0.00 0.35
178 | 0 1 0.00 0.35
182 | 0 1 0.00 0.35
186 | 0 1 0.00 0.35
|
F 146 | 1 2 0.35 0.70
150 |*** 7 9 2.46 3.16
154 |****** 14 23 4.91 8.07
158 |********* 23 46 8.07 16.14
162 |*********** 27 73 9.47 25.61
166 |******** 19 92 6.67 32.28
170 |* 3 95 1.05 33.33
174 | 0 95 0.00 33.33
178 | 0 95 0.00 33.33
182 | 0 95 0.00 33.33
186 | 0 95 0.00 33.33
|
M 146 | 0 95 0.00 33.33
150 | 0 95 0.00 33.33
154 | 0 95 0.00 33.33
158 | 1 96 0.35 33.68
162 |***** 12 108 4.21 37.89
166 |******** 20 128 7.02 44.91
170 |*********************** 57 185 20.00 64.91
174 |********************** 54 239 18.95 83.86
178 |********** 26 265 9.12 92.98
182 |******* 17 282 5.96 98.95
186 |* 3 285 1.05 100.00
|
----+---+---+---+---+---
10 20 30 40 50
Frequency
SAS システム 134
21:57 Wednesday, June 9, 2004
SEX KODUKAI Cum. Cum.
Midpoint Freq Freq Percent Percent
|
0 | 0 0 0.00 0.00
30000 | 1 1 0.37 0.37
60000 | 0 1 0.00 0.37
90000 | 1 2 0.37 0.74
120000 | 0 2 0.00 0.74
150000 | 0 2 0.00 0.74
180000 | 0 2 0.00 0.74
210000 | 0 2 0.00 0.74
240000 | 0 2 0.00 0.74
270000 | 0 2 0.00 0.74
300000 | 0 2 0.00 0.74
|
F 0 |****** 14 16 5.15 5.88
30000 |************** 35 51 12.87 18.75
60000 |********** 25 76 9.19 27.94
90000 |** 6 82 2.21 30.15
120000 |** 4 86 1.47 31.62
150000 |* 2 88 0.74 32.35
180000 | 1 89 0.37 32.72
210000 | 1 90 0.37 33.09
240000 | 0 90 0.00 33.09
270000 | 0 90 0.00 33.09
300000 | 1 91 0.37 33.46
|
M 0 |******************* 48 139 17.65 51.10
30000 |************************* 62 201 22.79 73.90
60000 |********** 24 225 8.82 82.72
90000 |********* 23 248 8.46 91.18
120000 |*** 8 256 2.94 94.12
150000 |***** 13 269 4.78 98.90
180000 | 1 270 0.37 99.26
210000 | 1 271 0.37 99.63
240000 | 0 271 0.00 99.63
270000 | 0 271 0.00 99.63
300000 | 1 272 0.37 100.00
|
----+---+---+---+---+---+-
10 20 30 40 50 60
Frequency
/* Lesson 8-02 */
/* File Name = les0802.sas 06/10/04 */
data gakusei;
infile 'all04a.prn'
firstobs=2;
input sex $ shintyou taijyuu kyoui
jitaku $ kodukai carryer $ tsuuwa;
if kodukai>=200000 then delete; : 20万円以上の場合、除外
if sex^='M' & sex^='F' then delete; : 男でも女でもない場合、除外
(以下略)
SAS システム 2
23:50 Wednesday, June 9, 2004
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 280 167.7889286 8.1558422 145.0000000 186.0000000
TAIJYUU 249 58.8052209 9.2589719 35.0000000 100.0000000
KYOUI 90 86.8333333 8.0017554 56.0000000 112.0000000
KODUKAI 266 46712.41 43796.32 0 180000.00
TSUUWA 76 7279.58 5074.75 200.0000000 30000.00
---------------------------------------------------------------------
SAS システム 21
23:50 Wednesday, June 9, 2004
Univariate Procedure
Variable=KODUKAI
Moments
N 266 Sum Wgts 266
Mean 46712.41 Sum 12425500
Std Dev 43796.32 Variance 1.9181E9
Skewness 1.105688 Kurtosis 0.470363
USS 1.089E12 CSS 5.083E11
CV 93.75737 Std Mean 2685.325
T:Mean=0 17.39544 Pr>|T| 0.0001
Num ^= 0 218 Num > 0 218
M(Sign) 109 Pr>=|M| 0.0001
Sgn Rank 11935.5 Pr>=|S| 0.0001
SAS システム 22
23:50 Wednesday, June 9, 2004
Univariate Procedure
Variable=KODUKAI
Quantiles(Def=5)
100% Max 180000 99% 163000
75% Q3 65000 95% 150000
50% Med 30000 90% 120000
25% Q1 20000 10% 0
0% Min 0 5% 0
1% 0
Range 180000
Q3-Q1 45000
Mode 0
SAS システム 25
23:50 Wednesday, June 9, 2004
Univariate Procedure
Variable=KODUKAI
Histogram # Boxplot
190000+* 1 0
.*** 6 0
.***** 10 0
130000+****** 11 |
.********** 19 |
.**** 8 |
70000+*************** 29 +-----+
.******************* 37 | + |
.**************************************** 80 *-----*
10000+********************************* 65 |
----+----+----+----+----+----+----+----+
* may represent up to 2 counts
SAS システム 26
23:50 Wednesday, June 9, 2004
Univariate Procedure
Variable=KODUKAI
Normal Probability Plot
190000+ *
| **** *
| ***** ++++
130000+ *** +++++
| ****++++
| **+++
70000+ +****
| ++*****
| *********
10000+* *****************
+----+----+----+----+----+----+----+----+----+----+
-2 -1 0 +1 +2
SAS システム 31
23:50 Wednesday, June 9, 2004
--------------------------------- SEX=F --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 92 159.0847826 5.4594597 145.0000000 171.0000000
TAIJYUU 61 48.4590164 4.7172512 35.0000000 59.0000000
KYOUI 30 82.9666667 4.4989143 70.0000000 90.0000000
KODUKAI 87 46005.75 37561.62 0 180000.00
TSUUWA 35 7062.86 5041.31 200.0000000 25000.00
---------------------------------------------------------------------
SAS システム 32
23:50 Wednesday, June 9, 2004
--------------------------------- SEX=M --------------------------------
Variable N Mean Std Dev Minimum Maximum
---------------------------------------------------------------------
SHINTYOU 188 172.0484043 5.4060322 156.0000000 186.0000000
TAIJYUU 188 62.1622340 7.7656800 46.0000000 100.0000000
KYOUI 60 88.7666667 8.6736825 56.0000000 112.0000000
KODUKAI 179 47055.87 46621.92 0 165000.00
TSUUWA 41 7464.59 5158.33 500.0000000 30000.00
---------------------------------------------------------------------
SAS システム 89
23:50 Wednesday, June 9, 2004
Univariate Procedure
Schematic Plots
Variable=SHINTYOU
200 +
|
| 0
180 + |
| | *--+--*
| | +-----+
160 + *--+--* |
| +-----+ 0
| 0
140 +
------------+-----------+-----------
SEX F M
SAS システム 92
23:50 Wednesday, June 9, 2004
Univariate Procedure
Schematic Plots
Variable=KODUKAI
|
200000 +
| 0 |
| 0 |
100000 + | |
| +-----+ +-----+
| *--+--* *--+--*
0 + | +-----+
------------+-----------+-----------
SEX F M
SAS システム 103
23:50 Wednesday, June 9, 2004
SEX SHINTYOU Cum. Cum.
Midpoint Freq Freq Percent Percent
|
F 146 | 1 1 0.36 0.36
150 |*** 7 8 2.50 2.86
154 |****** 14 22 5.00 7.86
158 |********* 23 45 8.21 16.07
162 |********** 26 71 9.29 25.36
166 |******* 18 89 6.43 31.79
170 |* 3 92 1.07 32.86
174 | 0 92 0.00 32.86
178 | 0 92 0.00 32.86
182 | 0 92 0.00 32.86
186 | 0 92 0.00 32.86
|
M 146 | 0 92 0.00 32.86
150 | 0 92 0.00 32.86
154 | 0 92 0.00 32.86
158 | 1 93 0.36 33.21
162 |***** 12 105 4.29 37.50
166 |******** 20 125 7.14 44.64
170 |*********************** 57 182 20.36 65.00
174 |********************* 53 235 18.93 83.93
178 |********** 25 260 8.93 92.86
182 |******* 17 277 6.07 98.93
186 |* 3 280 1.07 100.00
|
----+---+---+---+---+---
10 20 30 40 50
Frequency
SAS システム 109
23:50 Wednesday, June 9, 2004
SEX KODUKAI Cum. Cum.
Midpoint Freq Freq Percent Percent
|
F 0 |****** 12 12 4.51 4.51
20000 |******** 16 28 6.02 10.53
40000 |*********** 22 50 8.27 18.80
60000 |********** 19 69 7.14 25.94
80000 |**** 7 76 2.63 28.57
100000 |** 4 80 1.50 30.08
120000 |** 3 83 1.13 31.20
140000 |* 1 84 0.38 31.58
160000 |* 2 86 0.75 32.33
180000 |* 1 87 0.38 32.71
|
M 0 |********************* 41 128 15.41 48.12
20000 |***************** 34 162 12.78 60.90
40000 |****************** 36 198 13.53 74.44
60000 |********** 20 218 7.52 81.95
80000 |****** 11 229 4.14 86.09
100000 |******** 15 244 5.64 91.73
120000 |*** 6 250 2.26 93.98
140000 |* 2 252 0.75 94.74
160000 |******* 14 266 5.26 100.00
180000 | 0 266 0.00 100.00
|
-----+----+----+----+-
10 20 30 40
Frequency
data mon2004;
infile 'd:\home\mon_all8d.csv' dlm=','
firstobs=2
truncover;
data mon2004;
infile 'd:\home\mon_all8d.txt' dlm='09'x
firstobs=2
truncover;
options linesize=72 pagesize=20;
sas les9999.sas