data example2021;
infile 'StatM21/foo1.csv'
firstobs=2
dlm=',' dsd
missover truncover
encoding=sjis termstr=crlf
;
input No $ Univ : $30. SName : $40. Faculty : $50. Dept : $50.
Center1 : $8. Center2 : $8. Sel1 : $8. Sel2 : $8.
Book1 : $10. Book2 : $10.
Vol0 VolS VolT
ZenKou $ ScoreS ScoreT KoKouSi
;
data example2021;
infile 'StatM21/foo2.txt'
firstobs=2
dlm='09'x
missover truncover
encoding=sjis termstr=crlf
;
data math;
infile 'StatM21/foo3.csv'
firstobs=2
dlm=',' dsd
lrecl=230
;
data math;
infile 'StatM21/foo4.csv'
firstobs=2
dlm=',' dsd
lrecl=230 truncover
;
input
UketsukeID 1- 6
JyukenID 7- 11
BirthDay 13- 20
Area $ 32- 41
s_scor01 103-104
s_scor02 105-106
s_scor03 107-108
;
/* Lesson 13-02 */
/* File Name = les1302.sas 01/18/22 */
options nocenter linesize=78 pagesize=30;
options locale='en_US';
/* options locale='ja_JP'; */
proc printto print = 'StatM21/les1302-Results.txt' new;
data gakusei;
infile 'StatM21/StudAll21d.csv'
firstobs=9 dlm=',' dsd missover
encoding=sjis termstr=crlf;
input sex $ shintyou taijyuu kyoui
jitaku : $10. kodukai carryer $ tsuuwa;
/* if shintyou="." or taijyuu="." or kyoui="." then delete; */
if carryer="DoCoMo" then carryer="docomo"; : 名称の不揃いを統一する
if carryer="DoCoMo+w" then carryer="docomo+W";
if carryer="Vodafone" then carryer="Softbank";
if carryer="vodafone" then carryer="Softbank";
if carryer="softbank" then carryer="Softbank";
if carryer="au+willc" then carryer="au+YMobile";
if carryer="Willcom" then carryer="YMobile";
if carryer="DDIp" then carryer="YMobile";
proc print data=gakusei(obs=5);
run;
title '*** 通常の頻度集計、クロス集計(アルファベット順になる) ***';
proc freq data=gakusei; : オプションなし=アルファベット順
tables sex jitaku carryer;
run;
proc freq data=gakusei; : オプションなし=アルファベット順
tables sex*jitaku;
tables sex*carryer;
tables jitaku*carryer;
run;
title '*** 頻度の大きい順に表示 ***';
proc freq data=gakusei order=freq; : 頻度順に
tables sex jitaku carryer;
run;
proc freq data=gakusei order=freq; : 頻度順に
tables sex*jitaku;
tables sex*carryer;
tables jitaku*carryer;
run;
title '*** 頻度の大きい順に表示(頻度のみ) ***';
proc freq data=gakusei order=freq; : 頻度順に
tables sex jitaku carryer / nopercent norow nocol; : 頻度のみ
run;
proc freq data=gakusei order=freq; : 頻度順に
tables sex*jitaku / nopercent norow nocol; : 頻度のみ
tables sex*carryer / nopercent norow nocol; : 頻度のみ
tables jitaku*carryer / nopercent norow nocol; : 頻度のみ
run;
*** 通常の頻度集計、クロス集計(アルファベット順にな 58
Tuesday, February 1, 2022 10:46:37 AM
The FREQ Procedure
Cumulative Cumulative
sex Frequency Percent Frequency Percent
--------------------------------------------------------
F 141 29.81 141 29.81
M 332 70.19 473 100.00
Frequency Missing = 125
Cumulative Cumulative
jitaku Frequency Percent Frequency Percent
--------------------------------------------------------------
下宿生 185 34.84 185 34.84
自宅生 346 65.16 531 100.00
Frequency Missing = 67
*** 通常の頻度集計、クロス集計(アルファベット順にな 59
Tuesday, February 1, 2022 10:46:37 AM
The FREQ Procedure
Cumulative Cumulative
carryer Frequency Percent Frequency Percent
-------------------------------------------------------------
J-PHONE 10 6.71 10 6.71
KDDI 1 0.67 11 7.38
No 5 3.36 16 10.74
OCN 1 0.67 17 11.41
Softbank 22 14.77 39 26.17
UQ-mobil 1 0.67 40 26.85
YMobile 3 2.01 43 28.86
au 39 26.17 82 55.03
au+YMobi 1 0.67 83 55.70
docomo 65 43.62 148 99.33
docomo+w 1 0.67 149 100.00
Frequency Missing = 449
*** 通常の頻度集計、クロス集計(アルファベット順にな 60
Tuesday, February 1, 2022 10:46:37 AM
The FREQ Procedure
Table of sex by jitaku
sex jitaku
Frequency|
Percent |
Row Pct |
Col Pct |下宿 |自宅 | Total
|生 |生 |
---------+--------+--------+
F | 39 | 83 | 122
| 9.35 | 19.90 | 29.26
| 31.97 | 68.03 |
| 26.17 | 30.97 |
---------+--------+--------+
M | 110 | 185 | 295
| 26.38 | 44.36 | 70.74
| 37.29 | 62.71 |
| 73.83 | 69.03 |
---------+--------+--------+
Total 149 268 417
35.73 64.27 100.00
Frequency Missing = 181
*** 通常の頻度集計、クロス集計(アルファベット順にな 61
Tuesday, February 1, 2022 10:46:37 AM
The FREQ Procedure
Table of sex by carryer
sex carryer
Frequency|
Percent |
Row Pct |
Col Pct |J-PHONE |KDDI |No |OCN |Softbank|UQ-mobil| Total
---------+--------+--------+--------+--------+--------+--------+
F | 4 | 0 | 1 | 0 | 10 | 0 | 56
| 2.74 | 0.00 | 0.68 | 0.00 | 6.85 | 0.00 | 38.36
| 7.14 | 0.00 | 1.79 | 0.00 | 17.86 | 0.00 |
| 44.44 | 0.00 | 20.00 | . | 45.45 | . |
---------+--------+--------+--------+--------+--------+--------+
M | 5 | 1 | 4 | 0 | 12 | 0 | 90
| 3.42 | 0.68 | 2.74 | 0.00 | 8.22 | 0.00 | 61.64
| 5.56 | 1.11 | 4.44 | 0.00 | 13.33 | 0.00 |
| 55.56 | 100.00 | 80.00 | . | 54.55 | . |
---------+--------+--------+--------+--------+--------+--------+
Total 9 1 5 0 22 0 146
6.16 0.68 3.42 0.00 15.07 0.00 100.00
(Continued)
*** 通常の頻度集計、クロス集計(アルファベット順にな 62
Tuesday, February 1, 2022 10:46:37 AM
The FREQ Procedure
Table of sex by carryer
sex carryer
Frequency|
Percent |
Row Pct |
Col Pct |YMobile |au |au+YMobi|docomo |docomo+w| Total
---------+--------+--------+--------+--------+--------+
F | 2 | 12 | 1 | 26 | 0 | 56
| 1.37 | 8.22 | 0.68 | 17.81 | 0.00 | 38.36
| 3.57 | 21.43 | 1.79 | 46.43 | 0.00 |
| 66.67 | 30.77 | 100.00 | 40.00 | 0.00 |
---------+--------+--------+--------+--------+--------+
M | 1 | 27 | 0 | 39 | 1 | 90
| 0.68 | 18.49 | 0.00 | 26.71 | 0.68 | 61.64
| 1.11 | 30.00 | 0.00 | 43.33 | 1.11 |
| 33.33 | 69.23 | 0.00 | 60.00 | 100.00 |
---------+--------+--------+--------+--------+--------+
Total 3 39 1 65 1 146
2.05 26.71 0.68 44.52 0.68 100.00
Frequency Missing = 452
≪中略≫
*** 頻度の大きい順に表示 *** 65
Tuesday, February 1, 2022 10:46:37 AM
The FREQ Procedure
Cumulative Cumulative
sex Frequency Percent Frequency Percent
--------------------------------------------------------
M 332 70.19 332 70.19
F 141 29.81 473 100.00
Frequency Missing = 125
Cumulative Cumulative
jitaku Frequency Percent Frequency Percent
--------------------------------------------------------------
自宅生 346 65.16 346 65.16
下宿生 185 34.84 531 100.00
Frequency Missing = 67
*** 頻度の大きい順に表示 *** 66
Tuesday, February 1, 2022 10:46:37 AM
The FREQ Procedure
Cumulative Cumulative
carryer Frequency Percent Frequency Percent
-------------------------------------------------------------
docomo 65 43.62 65 43.62
au 39 26.17 104 69.80
Softbank 22 14.77 126 84.56
J-PHONE 10 6.71 136 91.28
No 5 3.36 141 94.63
YMobile 3 2.01 144 96.64
KDDI 1 0.67 145 97.32
OCN 1 0.67 146 97.99
UQ-mobil 1 0.67 147 98.66
au+YMobi 1 0.67 148 99.33
docomo+w 1 0.67 149 100.00
Frequency Missing = 449
*** 頻度の大きい順に表示 *** 67
Tuesday, February 1, 2022 10:46:37 AM
The FREQ Procedure
Table of sex by jitaku
sex jitaku
Frequency|
Percent |
Row Pct |
Col Pct |自宅 |下宿 | Total
|生 |生 |
---------+--------+--------+
M | 185 | 110 | 295
| 44.36 | 26.38 | 70.74
| 62.71 | 37.29 |
| 69.03 | 73.83 |
---------+--------+--------+
F | 83 | 39 | 122
| 19.90 | 9.35 | 29.26
| 68.03 | 31.97 |
| 30.97 | 26.17 |
---------+--------+--------+
Total 268 149 417
64.27 35.73 100.00
Frequency Missing = 181
*** 頻度の大きい順に表示 *** 68
Tuesday, February 1, 2022 10:46:37 AM
The FREQ Procedure
Table of sex by carryer
sex carryer
Frequency|
Percent |
Row Pct |
Col Pct |docomo |au |Softbank|J-PHONE |No |YMobile | Total
---------+--------+--------+--------+--------+--------+--------+
M | 39 | 27 | 12 | 5 | 4 | 1 | 90
| 26.71 | 18.49 | 8.22 | 3.42 | 2.74 | 0.68 | 61.64
| 43.33 | 30.00 | 13.33 | 5.56 | 4.44 | 1.11 |
| 60.00 | 69.23 | 54.55 | 55.56 | 80.00 | 33.33 |
---------+--------+--------+--------+--------+--------+--------+
F | 26 | 12 | 10 | 4 | 1 | 2 | 56
| 17.81 | 8.22 | 6.85 | 2.74 | 0.68 | 1.37 | 38.36
| 46.43 | 21.43 | 17.86 | 7.14 | 1.79 | 3.57 |
| 40.00 | 30.77 | 45.45 | 44.44 | 20.00 | 66.67 |
---------+--------+--------+--------+--------+--------+--------+
Total 65 39 22 9 5 3 146
44.52 26.71 15.07 6.16 3.42 2.05 100.00
(Continued)
≪中略≫
*** 頻度の大きい順に表示(頻度のみ) *** 72
Tuesday, February 1, 2022 10:46:37 AM
The FREQ Procedure
Cumulative
sex Frequency Frequency
------------------------------
M 332 332
F 141 473
Frequency Missing = 125
Cumulative
jitaku Frequency Frequency
------------------------------------
自宅生 346 346
下宿生 185 531
Frequency Missing = 67
*** 頻度の大きい順に表示(頻度のみ) *** 73
Tuesday, February 1, 2022 10:46:37 AM
The FREQ Procedure
Cumulative
carryer Frequency Frequency
-----------------------------------
docomo 65 65
au 39 104
Softbank 22 126
J-PHONE 10 136
No 5 141
YMobile 3 144
KDDI 1 145
OCN 1 146
UQ-mobil 1 147
au+YMobi 1 148
docomo+w 1 149
Frequency Missing = 449
*** 頻度の大きい順に表示(頻度のみ) *** 74
Tuesday, February 1, 2022 10:46:37 AM
The FREQ Procedure
Table of sex by jitaku
sex jitaku
Frequency|自宅 |下宿 | Total
|生 |生 |
---------+--------+--------+
M | 185 | 110 | 295
---------+--------+--------+
F | 83 | 39 | 122
---------+--------+--------+
Total 268 149 417
Frequency Missing = 181
*** 頻度の大きい順に表示(頻度のみ) *** 75
Tuesday, February 1, 2022 10:46:37 AM
The FREQ Procedure
Table of sex by carryer
sex carryer
Frequency|docomo |au |Softbank|J-PHONE |No |YMobile | Total
---------+--------+--------+--------+--------+--------+--------+
M | 39 | 27 | 12 | 5 | 4 | 1 | 90
---------+--------+--------+--------+--------+--------+--------+
F | 26 | 12 | 10 | 4 | 1 | 2 | 56
---------+--------+--------+--------+--------+--------+--------+
Total 65 39 22 9 5 3 146
(Continued)
≪後略≫
≪前略≫
if carryer="DoCoMo" then carryer="docomo";
if carryer="DoCoMo+w" then carryer="docomo+W";
if carryer="Vodafone" then carryer="Softbank";
if carryer="vodafone" then carryer="Softbank";
if carryer="softbank" then carryer="Softbank";
if carryer="au+willc" then carryer="au+YMobile";
if carryer="Willcom" then carryer="YMobile";
if carryer="DDIp" then carryer="YMobile";
≪後略≫
≪前略≫
title '*** 頻度の大きい順に表示 ***';
proc freq data=gakusei order=freq;
tables sex jitaku carryer;
run;
proc freq data=gakusei order=freq;
tables sex*jitaku;
tables sex*carryer;
tables jitaku*carryer;
run;
≪後略≫
≪前略≫
title '*** 頻度の大きい順に表示(頻度のみ) ***';
proc freq data=gakusei order=freq;
tables sex jitaku carryer / nopercent norow nocol;
run;
proc freq data=gakusei order=freq;
tables sex*jitaku / nopercent norow nocol;
tables sex*carryer / nopercent norow nocol;
tables jitaku*carryer / nopercent norow nocol;
run;
≪後略≫
≪前略≫
title '*** 3重クロス集計 ***';
proc freq data=gakusei;
tables sex*jitaku*carryer;
run;
≪前略≫
proc corr data=gakusei; : 相関係数
var shintyou taijyuu kyoui : 行列形式で表現される
kodukai tsuuwa;
run;
title "Scatterplot Matrix";
proc sgscatter data=gakusei; : 散布図行列
matrix shintyou taijyuu kyoui : 変量を指定
kodukai tsuuwa;
run;
title "Scatterplot Matrix grouped sex";
proc sgscatter data=gakusei; : 散布図行列
matrix shintyou taijyuu kyoui : 変量を指定
kodukai tsuuwa
/ group=sex; : 区別する変量を指定
run;
回帰分析の中で「残差は正規分布を仮定して理論が構成されている」と紹介した。 また、統計検定の中では「分布が正規分布に従っているか」によって、 検定手法が異なることも紹介した。 これらの「正規分布に従っているか」を調べるための手法として Q-Qプロットがあり、「斜め右上がりの直線からの乖離具合で判断できる」と説明した。 どのような理由・原理からこの手法が導出されているかに 疑問(興味)に思った人も居たのではないかと思うので、 動画を交えて簡単に説明しておく。
この手法は、平均を0に、分散を1に標準化した2つの分布を比較するものであり、基準の分布を標準正規正規分布(平均0,分散1)に取り、
調べたい変量の分布を対象分布として そのズレをプロットしたものである。 大まかな手順は以下の通り。 なお、確率側を比較するP-Pプロットもある。
ちなみに、Qはquantile(分位点)、Pはprobability(確率)の頭文字を示している。