<- read.csv("students.csv")
df
library(kableExtra)
%>% kbl(format = "html") %>%
df kable_styling(bootstrap_options = c("striped",
"hover",
"condensed",
"bordered",
"responsive")) %>%
row_spec(0, bold = TRUE, align = "c", color = "white", background = "#1d6c00") %>%
kable_classic(full_width = TRUE, html_font = "arial") -> output
save_kable(output, file = "output.html")
Các lệnh R thường dùng khi thực hiện thống kê mô tả
1 Tình huống thường gặp
Bạn có file dữ liệu df
gồm rất nhiều hàng và cột (trong bài này ta sử dụng dummy dataset để làm ví dụ minh họa). Việc đầu tiên cần làm là kiểm tra đặc điểm dữ liệu để đánh giá tổng quát toàn bộ dataset như thế nào nhằm có một hình dung cụ thể về dữ liệu trước khi thực hiện các bước phân tích sâu hơn.
Để tạo ra 1 file HTML table show full dataset, bạn sử dụng package kableExtra
1. View
<- read.csv("students.csv")
df head(df, n = 30) ### xem 30 dòng đầu của df
no stud.id name gender age height weight religion nc.score semester major minor score1 score2 online.tutorial graduated salary
1 1 833917 Gonzales, Christina Female 19 160 64.8 Muslim 1.91 1st Political Science Social Sciences NA NA 0 0 NA
2 2 898539 Lozano, T'Hani Female 19 172 73.0 Other 1.56 2nd Social Sciences Mathematics and Statistics NA NA 0 0 NA
3 3 379678 Williams, Hanh Female 22 168 70.6 Protestant 1.24 3rd Social Sciences Mathematics and Statistics 45 46 0 0 NA
4 4 807564 Nem, Denzel Male 19 183 79.7 Other 1.37 2nd Environmental Sciences Mathematics and Statistics NA NA 0 0 NA
5 5 383291 Powell, Heather Female 21 175 71.4 Catholic 1.46 1st Environmental Sciences Mathematics and Statistics NA NA 0 0 NA
6 6 256074 Perez, Jadrian Male 19 189 85.8 Catholic 1.34 2nd Political Science Mathematics and Statistics NA NA 0 0 NA
7 7 754591 Clardy, Anita Female 21 156 65.9 Protestant 1.11 2nd Political Science Social Sciences NA NA 0 0 NA
8 8 146494 Allen, Rebecca Marie Female 21 167 65.7 Other 2.03 3rd Political Science Economics and Finance 58 62 0 0 NA
9 9 723584 Tracy, Robert Male 18 195 94.4 Other 1.29 3rd Economics and Finance Environmental Sciences 57 67 0 0 NA
10 10 314281 Nimmons, Laura Female 18 165 66.0 Orthodox 1.19 2nd Environmental Sciences Mathematics and Statistics NA NA 0 0 NA
11 11 200803 Lang, Mackenzie Female 22 162 66.8 Other 1.04 4th Economics and Finance Environmental Sciences 62 61 1 1 45254.11
12 12 444907 Rodriguez, Brianna Female 18 172 66.8 Other 3.81 3rd Environmental Sciences Economics and Finance 76 82 0 0 NA
13 13 354271 Covar Orendain, Christopher Male 23 185 84.6 Orthodox 1.00 4th Environmental Sciences Mathematics and Statistics 71 76 1 1 40552.79
14 14 317812 Lopez, Monique Female 20 158 64.4 Catholic 2.50 6th Environmental Sciences Social Sciences 66 70 1 1 27007.03
15 15 604115 Davis, Shagun Female 19 157 66.3 Orthodox 1.92 2nd Economics and Finance Political Science NA NA 0 0 NA
16 16 889551 Adams, Jose Male 20 172 73.9 Other 3.61 4th Mathematics and Statistics Political Science 87 91 1 0 NA
17 17 350040 Hines, Haileigh Female 22 156 61.7 Other 2.27 6th Political Science Biology 57 54 0 1 33969.16
18 18 240279 Daugherty, Jesus Male 22 182 82.1 Catholic 1.42 1st Economics and Finance Environmental Sciences NA NA 0 0 NA
19 19 865835 Roybal, Ebony Female 21 162 69.2 Catholic 1.32 3rd Political Science Environmental Sciences 69 46 1 0 NA
20 20 137196 Baysinger, Tanisha Female 22 168 70.9 Protestant 2.33 2nd Environmental Sciences Political Science NA NA 0 0 NA
21 21 708242 Phillips, Laiba Female 20 167 68.5 Other 1.79 4th Biology Economics and Finance 77 80 1 0 NA
22 22 499002 Culbertson, Deshawn Male 37 175 70.4 Protestant 1.97 2nd Political Science Environmental Sciences NA NA 0 0 NA
23 23 873149 O Reilly, Joshua Male 19 164 70.3 Protestant 1.68 2nd Political Science Environmental Sciences NA NA 0 0 NA
24 24 807361 Johnson, Stephanie Female 38 155 67.0 Catholic 2.30 2nd Environmental Sciences Biology NA NA 0 0 NA
25 25 531029 Mix, Aziel Male 23 183 81.8 Catholic 2.11 4th Economics and Finance Environmental Sciences 69 65 0 0 NA
26 26 970589 Gonzalez, Dixie Female 26 145 54.0 Other 1.14 1st Biology Environmental Sciences NA NA 0 0 NA
27 27 250298 Clark, Janelle Female 25 161 66.8 Other 1.45 3rd Social Sciences Economics and Finance 45 37 1 0 NA
28 28 763393 Woolsey, Bronson Male 24 182 80.1 Protestant 1.09 5th Economics and Finance Social Sciences 61 73 1 1 50617.64
29 29 544433 Diawara, Erica Female 54 169 71.4 Protestant 1.75 2nd Political Science Environmental Sciences NA NA 0 0 NA
30 30 252935 Lord, Benjamin Male 22 172 69.6 Protestant 3.94 3rd Mathematics and Statistics Social Sciences 89 90 1 0 NA
2 Các lệnh kiểm tra dataset thường dùng
dim(df) ### Kiểm tra dataset gồm bao nhiêu hàng và cột
[1] 8239 17
str(df) ### Xem tổng quát cấu trúc dataset
'data.frame': 8239 obs. of 17 variables:
$ no : int 1 2 3 4 5 6 7 8 9 10 ...
$ stud.id : int 833917 898539 379678 807564 383291 256074 754591 146494 723584 314281 ...
$ name : chr "Gonzales, Christina" "Lozano, T'Hani" "Williams, Hanh" "Nem, Denzel" ...
$ gender : chr "Female" "Female" "Female" "Male" ...
$ age : int 19 19 22 19 21 19 21 21 18 18 ...
$ height : int 160 172 168 183 175 189 156 167 195 165 ...
$ weight : num 64.8 73 70.6 79.7 71.4 85.8 65.9 65.7 94.4 66 ...
$ religion : chr "Muslim" "Other" "Protestant" "Other" ...
$ nc.score : num 1.91 1.56 1.24 1.37 1.46 1.34 1.11 2.03 1.29 1.19 ...
$ semester : chr "1st" "2nd" "3rd" "2nd" ...
$ major : chr "Political Science" "Social Sciences" "Social Sciences" "Environmental Sciences" ...
$ minor : chr "Social Sciences" "Mathematics and Statistics" "Mathematics and Statistics" "Mathematics and Statistics" ...
$ score1 : int NA NA 45 NA NA NA NA 58 57 NA ...
$ score2 : int NA NA 46 NA NA NA NA 62 67 NA ...
$ online.tutorial: int 0 0 0 0 0 0 0 0 0 0 ...
$ graduated : int 0 0 0 0 0 0 0 0 0 0 ...
$ salary : num NA NA NA NA NA NA NA NA NA NA ...
summary(df) ### Tóm tắt đặc điểm từng cột trong dataset
no stud.id name gender age height weight religion nc.score semester major minor score1 score2 online.tutorial graduated salary
Min. : 1 Min. :110250 Length:8239 Length:8239 Min. :18.00 Min. :135.0 Min. : 51.4 Length:8239 Min. :1.000 Length:8239 Length:8239 Length:8239 Min. :30.00 Min. :31.0 Min. :0.0000 Min. :0.0000 Min. :11444
1st Qu.:2060 1st Qu.:331223 Class :character Class :character 1st Qu.:20.00 1st Qu.:163.0 1st Qu.: 66.5 Class :character 1st Qu.:1.460 Class :character Class :character Class :character 1st Qu.:58.00 1st Qu.:59.0 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:35207
Median :4120 Median :545132 Mode :character Mode :character Median :21.00 Median :171.0 Median : 71.8 Mode :character Median :2.040 Mode :character Mode :character Mode :character Median :70.00 Median :71.0 Median :0.0000 Median :0.0000 Median :41672
Mean :4120 Mean :548911 Mean :22.54 Mean :171.4 Mean : 73.0 Mean :2.166 Mean :68.17 Mean :69.5 Mean :0.3862 Mean :0.2128 Mean :42522
3rd Qu.:6180 3rd Qu.:770808 3rd Qu.:23.00 3rd Qu.:180.0 3rd Qu.: 78.5 3rd Qu.:2.780 3rd Qu.:78.00 3rd Qu.:80.0 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:49373
Max. :8239 Max. :989901 Max. :64.00 Max. :206.0 Max. :116.0 Max. :4.000 Max. :97.00 Max. :98.0 Max. :1.0000 Max. :1.0000 Max. :75597
NA's :3347 NA's :3347 NA's :6486
Vì giá trị NA
missing values chỉ thể hiện ở numeric vector mà không thể hiện ở character vector. Vì vậy kết quả trong lệnh summary()
chỉ để tham khảo sơ bộ khi đánh giá về tình trạng NA
của bộ dữ liệu.
### Lệnh này xác nhận là toàn bộ dataset có `NA` hay không.
### Nếu FALSE là chắc chắn không có `NA`, còn TRUE thì chắc chắn là có `NA`.
any(is.na(df))
[1] TRUE
2.1 Kiểm tra kỹ class
từng cột
sapply(df, class) ### Kiểm tra class từng cột.
no stud.id name gender age height weight religion nc.score semester major minor score1 score2 online.tutorial graduated salary
"integer" "integer" "character" "character" "integer" "integer" "numeric" "character" "numeric" "character" "character" "character" "integer" "integer" "integer" "integer" "numeric"
Ta chuyển toàn bộ các cột ở class integer
sang class numeric
(nếu cần thiết). Ở đây mình làm ví dụ minh họa để có code mẫu áp dụng cho các trường hợp tương tự.
### Tách ra vector chứa kết quả kiểm tra class từng cột
<- sapply(df, class, simplify = TRUE, USE.NAMES = TRUE)
class_tung_cot ### Tìm vị trí index của các cột có class là `integer`
<- grep(pattern = "integer", ignore.case = FALSE, fixed = TRUE, x = class_tung_cot)
integer_index_cot ### Tách ra tên các cột có class `integer`
<- names(class_tung_cot)[integer_index_cot]
ten_cot_integer ### Chuyển toàn bộ các cột `integer` này về dạng `numeric`
<- lapply(df[, ten_cot_integer], as.numeric)
df[, ten_cot_integer] ### Kiểm tra lại thì thấy toàn bộ các cột `integer` đã chuyển qua dạng `numeric`
sapply(df, class, simplify = TRUE, USE.NAMES = TRUE)
no stud.id name gender age height weight religion nc.score semester major minor score1 score2 online.tutorial graduated salary
"numeric" "numeric" "character" "character" "numeric" "numeric" "numeric" "character" "numeric" "character" "character" "character" "numeric" "numeric" "numeric" "numeric" "numeric"
2.2 Xác nhận mỗi dòng (observation, row) là một quan sát riêng biệt
Để tăng mức độ tự tin khi thao tác với dữ liệu, bạn cần xác nhận là mỗi dòng (ở đây là mỗi sinh viên) thì dữ liệu là duy nhất, không có lặp lại. Việc này rất quan trọng để tránh bị trùng lắp (trong trường hợp dataset ở dạng semi-long) để ta tìm cách trải dữ liệu ra làm sao thu được dạng true long hoặc true wide, tức là mỗi một dòng là một quan sát riêng biệt.
### Thông thường trong các dataset luôn có cột ID để làm cơ sở theo dõi thông tin,
### nếu không có cột ID thì bạn cần tạo ra cột ID để thuận tiện xử lý sau này (nếu cần)
### Kiểm tra thông tin ở cột ID sinh viên xem có trùng lắp hay không,
### Lệnh unique() trả về kết quả của toàn bộ các giá trị xuất hiện duy nhất trong cột ID
### Lệnh length() giúp đếm tổng chiều dài của vector
length(unique(df$stud.id))
[1] 8239
Như vậy ta xác nhận xem tổng số hàng của dataset df
có bằng với tổng số giá trị ID riêng biệt ở cột stud.id
hay không. Nếu là TRUE
thì chắc chắn là mỗi một dòng là một sinh viên riêng biệt.
identical(dim(df)[1], length(unique(df$stud.id)))
[1] TRUE
Cách 2 là ta dùng lệnh duplicated()
### Lệnh này trả về vector logical check cho mỗi giá trị trong vector stud.id
### Nếu có trùng lặp thì sẽ có TRUE, còn nếu không có trùng lặp sẽ là FALSE
<- duplicated(df$stud.id)
check_duplicated ### Lệnh all này sẽ trả về kết quả TRUE nếu toàn bộ giá trị trong vector !check_duplicated
### là TRUE, nghĩa là không có trùng lặp
all(!check_duplicated)
[1] TRUE
Cách 3 là ta dùng lệnh anyDuplicated()
### Để thuận tiện thì ta dùng lệnh này, nếu kết quả trả về 0 nghĩa là không có trùng lặp
anyDuplicated(df$stud.id)
[1] 0
2.3 Dùng lệnh table()
để kiểm tra giá trị NA
ở từng cột
2.3.1 Tham số gender
ở vị trí hàng, tham số major
ở vị trí cột
Lệnh table()
là một lệnh rất mạnh trong R. Ta nên dùng tham số useNA = "ifany"
nhằm thể hiện luôn số lượng NA
nếu có trong vector đang kiểm tra.
options(width = 120)
<- lapply(X = df[, !(names(df) %in% c("no", "stud.id", "name"))], ### Bỏ 3 cột đầu vì không cần thiết phải check `NA`
check_na FUN = table,
### Tham số `gender` ở vị trí hàng, tham số `major` ở vị trí cột
useNA = "ifany")
check_na
$gender
Female Male
4110 4129
$age
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
259 987 1497 1729 1508 948 485 252 106 39 21 15 9 11 13 11 12 15 9 12 8 10 7 18
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
16 14 17 11 9 15 5 11 12 10 9 6 14 16 10 12 9 11 16 10 9 13 3
$height
135 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
1 2 4 3 9 8 8 15 24 17 32 38 56 49 71 70 106 106 157 137 155 177 236 177 246 200 279 209 254 234
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
287 246 293 241 294 224 298 227 292 198 269 210 237 189 243 185 183 140 166 112 146 80 89 57 63 39 43 38 24 12
198 199 200 201 202 203 206
8 5 7 6 4 2 2
$weight
51.4 54 54.3 54.6 54.7 54.8 55 55.2 55.3 55.4 55.5 55.7 55.9 56.1 56.2 56.3 56.4 56.5 56.6 56.7
1 1 1 1 1 1 2 1 1 2 1 1 2 5 5 1 4 5 2 3
56.8 56.9 57 57.1 57.2 57.3 57.4 57.5 57.6 57.7 57.8 57.9 58 58.1 58.2 58.3 58.4 58.5 58.6 58.7
2 3 4 2 2 2 2 4 5 4 7 6 7 3 8 7 4 10 8 9
58.8 58.9 59 59.1 59.2 59.3 59.4 59.5 59.6 59.7 59.8 59.9 60 60.1 60.2 60.3 60.4 60.5 60.6 60.7
8 13 5 7 5 8 8 10 12 14 17 13 9 17 11 12 16 14 12 15
60.8 60.9 61 61.1 61.2 61.3 61.4 61.5 61.6 61.7 61.8 61.9 62 62.1 62.2 62.3 62.4 62.5 62.6 62.7
20 16 11 19 11 22 28 16 24 18 15 30 24 28 20 19 19 24 36 20
62.8 62.9 63 63.1 63.2 63.3 63.4 63.5 63.6 63.7 63.8 63.9 64 64.1 64.2 64.3 64.4 64.5 64.6 64.7
30 25 37 23 34 35 40 34 25 32 33 26 28 20 31 24 30 39 29 36
64.8 64.9 65 65.1 65.2 65.3 65.4 65.5 65.6 65.7 65.8 65.9 66 66.1 66.2 66.3 66.4 66.5 66.6 66.7
43 36 41 28 34 41 31 30 35 38 49 41 47 52 39 38 32 33 39 36
66.8 66.9 67 67.1 67.2 67.3 67.4 67.5 67.6 67.7 67.8 67.9 68 68.1 68.2 68.3 68.4 68.5 68.6 68.7
36 47 30 57 35 43 36 34 34 54 38 30 42 37 36 51 34 44 48 40
68.8 68.9 69 69.1 69.2 69.3 69.4 69.5 69.6 69.7 69.8 69.9 70 70.1 70.2 70.3 70.4 70.5 70.6 70.7
49 37 36 40 46 34 38 33 38 36 38 37 40 34 42 34 25 23 47 48
70.8 70.9 71 71.1 71.2 71.3 71.4 71.5 71.6 71.7 71.8 71.9 72 72.1 72.2 72.3 72.4 72.5 72.6 72.7
43 32 42 32 31 27 37 32 54 41 38 35 31 41 30 44 30 29 29 33
72.8 72.9 73 73.1 73.2 73.3 73.4 73.5 73.6 73.7 73.8 73.9 74 74.1 74.2 74.3 74.4 74.5 74.6 74.7
51 28 32 41 37 34 41 29 32 36 36 31 34 30 30 49 37 30 29 35
74.8 74.9 75 75.1 75.2 75.3 75.4 75.5 75.6 75.7 75.8 75.9 76 76.1 76.2 76.3 76.4 76.5 76.6 76.7
32 26 30 31 28 23 18 41 35 30 33 30 24 38 22 29 25 27 22 22
76.8 76.9 77 77.1 77.2 77.3 77.4 77.5 77.6 77.7 77.8 77.9 78 78.1 78.2 78.3 78.4 78.5 78.6 78.7
38 30 30 28 25 24 26 28 34 28 26 30 25 24 31 21 18 24 14 32
78.8 78.9 79 79.1 79.2 79.3 79.4 79.5 79.6 79.7 79.8 79.9 80 80.1 80.2 80.3 80.4 80.5 80.6 80.7
30 22 27 32 25 22 26 18 26 28 11 18 21 25 29 19 20 19 25 24
80.8 80.9 81 81.1 81.2 81.3 81.4 81.5 81.6 81.7 81.8 81.9 82 82.1 82.2 82.3 82.4 82.5 82.6 82.7
21 26 27 16 23 25 21 21 20 14 19 16 19 27 20 11 22 15 17 24
82.8 82.9 83 83.1 83.2 83.3 83.4 83.5 83.6 83.7 83.8 83.9 84 84.1 84.2 84.3 84.4 84.5 84.6 84.7
19 15 18 19 13 14 13 14 19 11 14 16 12 17 15 8 12 12 14 11
84.8 84.9 85 85.1 85.2 85.3 85.4 85.5 85.6 85.7 85.8 85.9 86 86.1 86.2 86.3 86.4 86.5 86.6 86.7
17 18 10 12 12 15 15 10 18 17 14 7 12 10 10 8 10 15 11 12
86.8 86.9 87 87.1 87.2 87.3 87.4 87.5 87.6 87.7 87.8 87.9 88 88.1 88.2 88.3 88.4 88.5 88.6 88.7
9 14 6 8 6 9 6 6 8 8 6 8 13 8 8 6 7 10 11 5
88.8 88.9 89 89.1 89.2 89.3 89.4 89.5 89.6 89.7 89.8 89.9 90 90.1 90.2 90.3 90.4 90.5 90.6 90.7
8 8 8 10 8 12 3 6 8 11 11 7 11 8 9 7 6 4 5 7
90.8 90.9 91 91.1 91.2 91.3 91.4 91.5 91.6 91.7 91.8 91.9 92 92.1 92.2 92.3 92.4 92.5 92.6 92.7
4 10 6 5 4 4 5 5 4 4 4 7 8 4 6 1 2 2 1 5
92.8 92.9 93 93.1 93.2 93.3 93.4 93.5 93.6 93.7 93.8 94 94.1 94.2 94.3 94.4 94.5 94.6 94.7 94.8
3 5 3 4 3 4 2 1 1 3 4 1 9 2 4 6 2 2 1 1
94.9 95 95.1 95.2 95.3 95.4 95.7 95.8 95.9 96 96.1 96.2 96.3 96.4 96.5 96.6 96.7 96.8 96.9 97
2 3 1 6 2 2 2 2 2 4 4 1 3 2 3 3 3 3 2 2
97.2 97.4 97.6 97.8 97.9 98 98.1 98.2 98.3 98.4 98.6 98.7 98.8 99.1 99.2 99.4 99.8 99.9 100 100.2
2 4 2 3 4 1 3 1 1 1 5 1 2 3 2 1 1 1 1 1
100.4 100.7 101.1 101.2 101.5 101.8 102.1 102.2 102.5 103.1 103.2 103.6 104.6 104.8 105.3 105.5 105.7 106.2 106.3 109.3
1 1 1 3 2 1 1 1 1 1 1 1 1 1 2 2 2 1 1 1
109.9 110.5 113.6 116
1 1 1 1
$religion
Catholic Muslim Orthodox Other Protestant
2797 330 585 2688 1839
$nc.score
1 1.01 1.02 1.03 1.04 1.05 1.06 1.07 1.08 1.09 1.1 1.11 1.12 1.13 1.14 1.15 1.16 1.17 1.18 1.19 1.2 1.21 1.22 1.23
27 41 46 57 33 50 35 47 45 44 48 40 37 59 55 36 45 50 72 46 48 43 49 61
1.24 1.25 1.26 1.27 1.28 1.29 1.3 1.31 1.32 1.33 1.34 1.35 1.36 1.37 1.38 1.39 1.4 1.41 1.42 1.43 1.44 1.45 1.46 1.47
47 37 32 46 39 60 39 50 56 24 37 55 32 43 49 29 39 49 44 42 44 42 41 39
1.48 1.49 1.5 1.51 1.52 1.53 1.54 1.55 1.56 1.57 1.58 1.59 1.6 1.61 1.62 1.63 1.64 1.65 1.66 1.67 1.68 1.69 1.7 1.71
35 39 29 43 47 34 43 36 38 50 30 29 32 44 30 46 42 43 33 33 35 44 28 37
1.72 1.73 1.74 1.75 1.76 1.77 1.78 1.79 1.8 1.81 1.82 1.83 1.84 1.85 1.86 1.87 1.88 1.89 1.9 1.91 1.92 1.93 1.94 1.95
41 33 33 45 35 35 21 36 30 28 41 30 33 28 32 28 28 34 30 40 27 26 31 52
1.96 1.97 1.98 1.99 2 2.01 2.02 2.03 2.04 2.05 2.06 2.07 2.08 2.09 2.1 2.11 2.12 2.13 2.14 2.15 2.16 2.17 2.18 2.19
26 31 51 40 32 34 29 31 31 35 28 28 23 39 30 27 39 29 19 30 32 33 27 41
2.2 2.21 2.22 2.23 2.24 2.25 2.26 2.27 2.28 2.29 2.3 2.31 2.32 2.33 2.34 2.35 2.36 2.37 2.38 2.39 2.4 2.41 2.42 2.43
22 22 47 34 32 16 27 46 32 25 34 25 34 27 35 19 17 31 30 29 28 28 23 18
2.44 2.45 2.46 2.47 2.48 2.49 2.5 2.51 2.52 2.53 2.54 2.55 2.56 2.57 2.58 2.59 2.6 2.61 2.62 2.63 2.64 2.65 2.66 2.67
35 42 23 27 35 26 20 27 24 16 32 20 44 18 34 29 27 15 26 42 25 21 23 18
2.68 2.69 2.7 2.71 2.72 2.73 2.74 2.75 2.76 2.77 2.78 2.79 2.8 2.81 2.82 2.83 2.84 2.85 2.86 2.87 2.88 2.89 2.9 2.91
23 17 36 33 20 31 30 26 23 15 25 24 20 26 25 23 24 19 24 20 12 36 31 25
2.92 2.93 2.94 2.95 2.96 2.97 2.98 2.99 3 3.01 3.02 3.03 3.04 3.05 3.06 3.07 3.08 3.09 3.1 3.11 3.12 3.13 3.14 3.15
20 21 19 18 24 18 15 16 22 21 15 25 25 11 32 27 27 18 23 14 17 20 26 21
3.16 3.17 3.18 3.19 3.2 3.21 3.22 3.23 3.24 3.25 3.26 3.27 3.28 3.29 3.3 3.31 3.32 3.33 3.34 3.35 3.36 3.37 3.38 3.39
31 16 18 15 15 9 15 27 13 15 17 10 27 18 20 17 16 16 15 25 24 17 24 17
3.4 3.41 3.42 3.43 3.44 3.45 3.46 3.47 3.48 3.49 3.5 3.51 3.52 3.53 3.54 3.55 3.56 3.57 3.58 3.59 3.6 3.61 3.62 3.63
17 10 11 13 17 12 16 13 16 11 24 18 11 21 17 10 10 21 21 13 12 13 15 12
3.64 3.65 3.66 3.67 3.68 3.69 3.7 3.71 3.72 3.73 3.74 3.75 3.76 3.77 3.78 3.79 3.8 3.81 3.82 3.83 3.84 3.85 3.86 3.87
11 16 10 7 12 8 23 18 14 5 12 10 17 8 14 5 18 14 17 6 9 20 15 6
3.88 3.89 3.9 3.91 3.92 3.93 3.94 3.95 3.96 3.97 3.98 3.99 4
20 12 12 11 9 12 17 9 7 11 8 10 11
$semester
>6th 1st 2nd 3rd 4th 5th 6th
303 1709 1638 1641 1368 876 704
$major
Biology Economics and Finance Environmental Sciences Mathematics and Statistics
1597 1324 1626 1225
Political Science Social Sciences
1455 1012
$minor
Biology Economics and Finance Environmental Sciences Mathematics and Statistics
1318 1382 1318 1446
Political Science Social Sciences
1387 1388
$score1
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
2 2 5 3 2 6 10 15 19 23 46 32 54 54 56 60 60 64 62 56 57 66 78 79
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
64 67 72 79 79 58 62 71 85 97 78 103 124 132 132 156 148 168 158 162 161 175 139 141
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 <NA>
130 98 81 91 60 65 51 73 77 81 94 122 86 82 29 26 10 10 2 2 3347
$score2
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
1 3 3 1 8 9 10 19 25 25 30 29 58 72 45 53 68 54 54 72 53 48 72 80
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
73 77 86 77 65 72 74 70 87 79 103 101 102 100 122 141 130 171 134 168 141 153 143 145
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 <NA>
140 116 102 98 76 61 63 80 76 70 79 84 92 87 62 46 35 14 2 3 3347
$online.tutorial
0 1
5057 3182
$graduated
0 1
6486 1753
$salary
11444.14165 13300.1062 14081.09871 14709.40754 17552.54847 18571.24357 19481.33381 19636.58837 19855.6943 20223.34086
1 1 1 1 1 1 1 1 1 1
20336.12153 20443.2513 20482.20954 20511.15096 20690.64749 20773.12519 21000.25041 21281.503 21453.99451 21485.6209
1 1 1 1 1 1 1 1 1 1
21582.80074 21722.33876 22060.65687 22313.32636 22652.15099 22710.2407 22804.08213 22901.28089 22915.87392 22972.67788
1 1 1 1 1 1 1 1 1 1
23039.80161 23044.39984 23090.67376 23175.82345 23307.70447 23391.64004 23648.55651 23717.3565 23849.32402 23866.7016
1 1 1 1 1 1 1 1 1 1
23869.57307 23961.68748 23967.55056 24104.96887 24143.27217 24185.77732 24288.80828 24297.1551 24315.05925 24332.58702
1 1 1 1 1 1 1 1 1 1
24355.95244 24363.68573 24382.6483 24394.5899 24409.72846 24424.33223 24492.36269 24694.91265 24819.14294 24873.16374
1 1 1 1 1 1 1 1 1 1
25082.14482 25164.41043 25217.08047 25272.70248 25272.72813 25350.69013 25420.01946 25445.58294 25455.66907 25591.85062
1 1 1 1 1 1 1 1 1 1
25607.82079 25658.82841 25706.95935 25758.87532 25782.02 25883.02897 25979.0748 25998.48077 26018.60986 26031.96786
1 1 1 1 1 1 1 1 1 1
26061.58737 26089.71721 26097.73519 26124.30672 26235.13457 26253.70748 26265.34574 26269.01718 26291.32302 26452.4497
1 1 1 1 1 1 1 1 1 1
26482.22047 26489.30624 26604.10957 26658.86178 26710.79146 26721.84073 26730.7756 26744.0631 26882.32256 26897.66202
1 1 1 1 1 1 1 1 1 1
26950.55841 26972.6075 27007.03029 27016.8521 27154.28596 27244.06207 27312.48846 27338.02506 27408.49267 27440.24314
1 1 1 1 1 1 1 1 1 1
27455.66092 27488.454 27505.51553 27507.42631 27569.09856 27606.4034 27632.08151 27686.08064 27821.99011 27884.85499
1 1 1 1 1 1 1 1 1 1
27893.25871 27956.28844 28015.62354 28017.87427 28046.20864 28058.50724 28098.74637 28125.03134 28128.37423 28159.27279
1 1 1 1 1 1 1 1 1 1
28235.06109 28281.25028 28294.23853 28321.35307 28338.17215 28442.80425 28453.06441 28506.10855 28555.67784 28672.84826
1 1 1 1 1 1 1 1 1 1
28707.48701 28707.85312 28711.7134 28783.84467 28790.9292 28820.16429 28822.61661 28824.82601 28887.59297 28927.89842
1 1 1 1 1 1 1 1 1 1
28929.07149 29021.96127 29039.69103 29043.64255 29071.65175 29178.47035 29185.29687 29195.49695 29216.61781 29216.86448
1 1 1 1 1 1 1 1 1 1
29316.83188 29321.81134 29325.86874 29393.59825 29401.35898 29430.51967 29496.8661 29531.19894 29538.80082 29543.59877
1 1 1 1 1 1 1 1 1 1
29550.7916 29555.93689 29626.86038 29635.66814 29667.75535 29675.84394 29703.1622 29757.08056 29758.25654 29783.7593
1 1 1 1 1 1 1 1 1 1
29802.21886 29832.21787 29854.7992 29891.67194 29934.00939 29999.35187 30008.16028 30025.74652 30038.21786 30042.73682
1 1 1 1 1 1 1 1 1 1
30062.70983 30086.76481 30095.65611 30183.9305 30195.58528 30253.17107 30276.0621 30280.57596 30326.90761 30327.48519
1 1 1 1 1 1 1 1 1 1
30365.70963 30375.19115 30395.29749 30413.95931 30447.87237 30461.9908 30484.0274 30554.73486 30591.34458 30606.19832
1 1 1 1 1 1 1 1 1 1
30611.50718 30617.08188 30618.60966 30638.37092 30656.10145 30676.6659 30741.23729 30761.86021 30796.15716 30849.91589
1 1 1 1 1 1 1 1 1 1
30853.20719 30854.41782 30866.1173 30873.06356 30902.89282 30908.98677 30911.36097 30972.76399 31011.93188 31025.94829
1 1 1 1 1 1 1 1 1 1
31151.41714 31177.51552 31188.57334 31219.13759 31224.46177 31237.39267 31266.91863 31325.99874 31331.65571 31335.97737
1 1 1 1 1 1 1 1 1 1
31341.46754 31363.24736 31381.29497 31386.58362 31405.47536 31483.59427 31484.79639 31494.32082 31497.09413 31506.68346
1 1 1 1 1 1 1 1 1 1
31538.50718 31539.26559 31607.77123 31646.2595 31660.87118 31703.38063 31723.23986 31731.98312 31749.80683 31763.75495
1 1 1 1 1 1 1 1 1 1
31764.44396 31790.11085 31806.74024 31806.91802 31812.04946 31822.60265 31824.94161 31873.98132 31898.84896 31908.25594
1 1 1 1 1 1 1 1 1 1
31908.45573 31931.74274 31936.89969 31948.58033 31959.17716 31971.22504 31982.90479 32029.74485 32038.03264 32038.40928
1 1 1 1 1 1 1 1 1 1
32055.08294 32103.91396 32154.94012 32177.6822 32194.59225 32206.84722 32208.92012 32334.04416 32342.95721 32371.50152
1 1 1 1 1 1 1 1 1 1
32390.24164 32394.79347 32429.08784 32462.41976 32488.97969 32519.22408 32536.23681 32539.14609 32556.75871 32655.82463
1 1 1 1 1 1 1 1 1 1
32676.80867 32680.60721 32704.01887 32820.90948 32835.09584 32838.25195 32850.00387 32884.3786 32890.65602 32902.34894
1 1 1 1 1 1 1 1 1 1
32916.5963 32943.40901 32966.9137 32996.38521 32999.50223 33021.16964 33034.90655 33058.21697 33068.84865 33077.8687
1 1 1 1 1 1 1 1 1 1
33108.64524 33125.22973 33150.02538 33200.27493 33208.00874 33216.11898 33234.78298 33259.70308 33259.90273 33291.47892
1 1 1 1 1 1 1 1 1 1
33300.31626 33305.36083 33318.22653 33318.23329 33318.30457 33354.01753 33354.29491 33362.65241 33365.41896 33369.05117
1 1 1 1 1 1 1 1 1 1
33422.54445 33463.88449 33472.28144 33476.60003 33493.16088 33511.19814 33530.29802 33565.73896 33604.44821 33606.85339
1 1 1 1 1 1 1 1 1 1
33641.73103 33648.37545 33688.11549 33692.99327 33715.9733 33726.58383 33796.96704 33831.46012 33836.95779 33901.3568
1 1 1 1 1 1 1 1 1 1
33902.00043 33908.17913 33910.84453 33924.94839 33955.21418 33960.12977 33965.70434 33969.15927 33974.63196 34034.80029
1 1 1 1 1 1 1 1 1 1
34054.64039 34055.2891 34077.58553 34094.27401 34103.23368 34116.80988 34136.25733 34144.87573 34148.56559 34168.17036
1 1 1 1 1 1 1 1 1 1
34179.75291 34190.67565 34200.5439 34201.5756 34211.74855 34227.49216 34237.20181 34279.01824 34287.96037 34293.40071
1 1 1 1 1 1 1 1 1 1
34299.46323 34317.3152 34318.02375 34319.47275 34347.31307 34371.44642 34373.99397 34397.78413 34398.15416 34420.48481
1 1 1 1 1 1 1 1 1 1
34444.63995 34460.44442 34462.50761 34467.64314 34468.46827 34485.16661 34491.28028 34492.07721 34519.54699 34601.07419
1 1 1 1 1 1 1 1 1 1
34664.57999 34686.49258 34717.7747 34749.58508 34756.13351 34775.0559 34791.43357 34812.38777 34830.771 34831.01943
1 1 1 1 1 1 1 1 1 1
34840.97433 34849.21479 34855.80353 34869.5075 34870.66553 34964.05908 34967.70439 34979.13595 35062.55921 35065.14453
1 1 1 1 1 1 1 1 1 1
35082.323 35091.48662 35106.01099 35106.96186 35145.79146 35175.69774 35191.66276 35199.86371 35206.5416 35216.88758
1 1 1 1 1 1 1 1 1 1
35258.98358 35260.71163 35270.19209 35294.46948 35353.25946 35374.68036 35407.27312 35410.7879 35433.09215 35507.82884
1 1 1 1 1 1 1 1 1 1
35538.88678 35548.53811 35553.61874 35564.0409 35583.43344 35598.88169 35605.86614 35631.14194 35632.14079 35643.44008
1 1 1 1 1 1 1 1 1 1
35681.97943 35712.8248 35727.41282 35746.22035 35813.74391 35816.24021 35818.65844 35833.90469 35840.89853 35881.20894
1 1 1 1 1 1 1 1 1 1
35881.7362 35908.91648 35929.52307 35930.28718 35939.26382 35945.22833 35987.41162 36015.89082 36017.50497 36031.24722
1 1 1 1 1 1 1 1 1 1
36032.46063 36040.20735 36055.36176 36094.97353 36103.71248 36180.22814 36187.22169 36200.03876 36204.30096 36207.6944
1 1 1 1 1 1 1 1 1 1
36218.48178 36227.44376 36243.86856 36258.60256 36274.87806 36275.91016 36279.91057 36283.25729 36291.58279 36304.55712
1 1 1 1 1 1 1 1 1 1
36348.38069 36351.36267 36377.39393 36409.50602 36428.77991 36433.02889 36439.33122 36451.77061 36472.83298 36488.98065
1 1 1 1 1 1 1 1 1 1
36508.23864 36520.27723 36534.18511 36558.48708 36589.21292 36599.27042 36600.49192 36601.07958 36630.73921 36683.05599
1 1 1 1 1 1 1 1 1 1
36688.41687 36708.23711 36708.75312 36732.51005 36750.08713 36758.02055 36781.29734 36797.79246 36805.50707 36829.37158
1 1 1 1 1 1 1 1 1 1
36838.43802 36840.84932 36845.37157 36856.27919 36870.6885 36911.26186 36922.66206 36929.50731 36960.79584 36965.31809
1 1 1 1 1 1 1 1 1 1
36969.78829 36984.60232 37008.17845 37009.07694 37014.01454 37018.58814 37028.64398 37036.28314 37064.63282 37075.77619
1 1 1 1 1 1 1 1 1 1
37119.17681 37135.93025 37143.85523 37149.11559 37214.34066 37223.99238 37239.17407 37240.11334 37245.86514 37258.96295
1 1 1 1 1 1 1 1 1 1
37275.09048 37306.14735 37312.85357 37363.1056 37364.35355 37391.87237 37396.5711 37415.03543 37437.30722 37468.47498
1 1 1 1 1 1 1 1 1 1
37529.56415 37542.42627 37554.0138 37569.62228 37583.91963 37620.86426 37625.68727 37634.1613 37644.86654 37653.16526
1 1 1 1 1 1 1 1 1 1
37656.94755 37668.89026 37680.55458 37691.92767 37709.17649 37709.72956 37712.89447 37730.31338 37747.91379 37760.04765
1 1 1 1 1 1 1 1 1 1
37782.86471 37790.93953 37798.28405 37803.51321 37803.78855 37806.24041 37807.10625 37809.76469 37812.50445 37819.83009
1 1 1 1 1 1 1 1 1 1
37829.95556 37841.53825 37850.91489 37854.6558 37855.8305 37894.44361 37923.76655 37929.52093 37929.93318 37934.97271
1 1 1 1 1 1 1 1 1 1
37946.70042 37966.33893 37967.02449 37976.07387 37999.59905 38008.97844 38009.95662 38015.31069 38048.29284 38075.26284
1 1 1 1 1 1 1 1 1 1
38082.58337 38134.57404 38155.90835 38159.56134 38182.86915 38188.89195 38211.14289 38292.14116 38296.16185 38303.13324
1 1 1 1 1 1 1 1 1 1
38304.95905 38327.2137 38368.90506 38400.05111 38402.67248 38405.23134 38408.59012 38422.71358 38432.83749 38433.26868
1 1 1 1 1 1 1 1 1 1
38471.68297 38476.91567 38479.94408 38483.72599 38491.65312 38512.63134 38513.05168 38530.37834 38591.54131 38608.39478
1 1 1 1 1 1 1 1 1 1
38612.89308 38655.82558 38721.82904 38738.0142 38746.5298 38746.63601 38750.2014 38758.77897 38764.39933 38768.82232
1 1 1 1 1 1 1 1 1 1
38798.39086 38815.94897 38817.70155 38820.48395 38824.15411 38884.3708 38887.78262 38889.87033 38906.20896 38925.60078
1 1 1 1 1 1 1 1 1 1
38932.04137 38989.61387 38993.24336 39034.08137 39036.8838 39037.89243 39040.71433 39052.77015 39075.85557 39084.70943
1 1 1 1 1 1 1 1 1 1
39086.28914 39092.53699 39101.93989 39111.9216 39124.14494 39134.74489 39135.67584 39145.21302 39156.68995 39159.6583
1 1 1 1 1 1 1 1 1 1
39199.1059 39204.67334 39224.06219 39239.56407 39239.6384 39260.58911 39262.46618 39298.00728 39299.62456 39301.02369
1 1 1 1 1 1 1 1 1 1
39337.20943 39337.90389 39342.52267 39358.56336 39358.78748 39368.90364 39403.4831 39413.48667 39415.44692 39436.54925
1 1 1 1 1 1 1 1 1 1
39442.80394 39448.24826 39451.46699 39461.7776 39467.88316 39492.44865 39496.52535 39562.52429 39587.61419 39612.47567
1 1 1 1 1 1 1 1 1 1
39623.56836 39634.22711 39645.70904 39647.68348 39653.85778 39687.65583 39703.74488 39710.79766 39725.70278 39758.05095
1 1 1 1 1 1 1 1 1 1
39765.06147 39789.56925 39805.78196 39814.40857 39830.88252 39840.36564 39843.84615 39862.09827 39900.29481 39910.86477
1 1 1 1 1 1 1 1 1 1
39920.09645 39931.42447 39939.49943 39958.03454 39981.22984 39982.5565 39994.14221 40007.98851 40032.78371 40064.43476
1 1 1 1 1 1 1 1 1 1
40087.25358 40093.45597 40112.04183 40135.76291 40137.80003 40151.71675 40167.98815 40171.32868 40188.57795 40202.64158
1 1 1 1 1 1 1 1 1 1
40218.46047 40249.48242 40252.95865 40277.57971 40285.02515 40303.74928 40309.10782 40317.79387 40318.84742 40343.2821
1 1 1 1 1 1 1 1 1 1
40355.90281 40379.90448 40447.78416 40453.83153 40460.52043 40466.69455 40473.54825 40480.79397 40489.48767 40495.26216
1 1 1 1 1 1 1 1 1 1
40498.29963 40540.12462 40552.79024 40562.7449 40574.10528 40590.53916 40593.39145 40598.59933 40606.4072 40637.39521
1 1 1 1 1 1 1 1 1 1
40679.82675 40719.79525 40778.37525 40779.20844 40790.06098 40803.80341 40826.44775 40846.99347 40859.55946 40868.55442
1 1 1 1 1 1 1 1 1 1
40882.83535 40894.89823 40899.31612 40905.46136 40913.35322 40925.29349 40934.06357 40944.50895 40958.45002 40959.17313
1 1 1 1 1 1 1 1 1 1
40961.23435 40964.10746 40974.77415 40987.51579 40995.69762 41006.01915 41012.75618 41028.24134 41028.80829 41035.02319
1 1 1 1 1 1 1 1 1 1
41041.5986 41079.42318 41093.03288 41124.25846 41132.29294 41133.10063 41134.36724 41134.95944 41138.70542 41158.08154
1 1 1 1 1 1 1 1 1 1
41163.65455 41163.78567 41193.70531 41199.30543 41224.81874 41235.47522 41239.15434 41247.5699 41263.86408 41265.26403
1 1 1 1 1 1 1 1 1 1
41273.98056 41282.13491 41309.06957 41315.55972 41317.02792 41336.17 41367.11528 41373.01415 41374.54103 41374.97696
1 1 1 1 1 1 1 1 1 1
41383.92801 41386.80089 41387.47997 41387.99385 41391.62004 41393.98842 41414.74231 41430.15745 41431.49812 41445.23411
1 1 1 1 1 1 1 1 1 1
41453.51129 41463.21278 41494.31415 41504.57271 41541.09399 41555.68023 41569.68486 41594.78235 41598.14986 41601.20197
1 1 1 1 1 1 1 1 1 1
41610.77638 41618.86097 41623.61592 41627.1209 41670.38903 41670.71269 41672.00975 41685.63174 41727.90328 41776.47472
1 1 1 1 1 1 1 1 1 1
41806.47756 41816.70065 41847.04556 41878.59507 41919.05975 41919.44976 41924.62904 41950.103 41981.27482 41990.73045
1 1 1 1 1 1 1 1 1 1
42017.83852 42054.72994 42062.54593 42064.67956 42084.25547 42090.61653 42108.0002 42176.90829 42177.15496 42202.39262
1 1 1 1 1 1 1 1 1 1
42211.40605 42217.08519 42222.20509 42233.96254 42234.52306 42235.56698 42244.67913 42245.09512 42246.50833 42283.71753
1 1 1 1 1 1 1 1 1 1
42295.64525 42310.09137 42338.83137 42346.58878 42353.98884 42364.12 42388.6001 42407.2755 42422.63502 42463.4793
1 1 1 1 1 1 1 1 1 1
42484.9275 42495.44398 42531.45773 42533.40038 42536.12991 42539.95084 42545.82742 42548.76059 42555.18759 42573.30908
1 1 1 1 1 1 1 1 1 1
42592.82771 42593.26038 42604.54716 42606.34723 42615.12521 42622.51332 42631.29645 42664.7906 42742.79185 42755.31622
1 1 1 1 1 1 1 1 1 1
42788.59677 42793.1963 42801.21504 42826.17211 42835.74549 42852.91129 42854.58019 42855.00328 42888.75207 42894.0207
1 1 1 1 1 1 1 1 1 1
42928.9405 42951.06119 42971.28923 42994.05327 42995.88798 43000.507 43007.86425 43015.46148 43021.78636 43024.9237
1 1 1 1 1 1 1 1 1 1
43027.55258 43028.07285 43039.05417 43050.49922 43052.21978 43059.20861 43064.4653 43066.60909 43082.77822 43101.36476
1 1 1 1 1 1 1 1 1 1
43142.38266 43181.70886 43190.19697 43191.15122 43191.39645 43241.23902 43245.18656 43246.98294 43250.65428 43281.9246
1 1 1 1 1 1 1 1 1 1
43284.17184 43287.31884 43293.39305 43295.42155 43313.02204 43315.01952 43318.00804 43318.40768 43323.64078 43325.31193
1 1 1 1 1 1 1 1 1 1
43332.83744 43370.37957 43379.48381 43379.77831 43434.40255 43441.641 43496.36866 43508.56802 43516.37662 43528.95806
1 1 1 1 1 1 1 1 1 1
43552.60124 43564.49437 43571.96907 43634.64981 43636.97087 43667.41565 43724.38626 43748.99516 43755.4504 43756.57926
1 1 1 1 1 1 1 1 1 1
43768.99866 43781.33299 43791.21202 43794.00942 43796.67688 43807.59238 43816.50376 43819.49922 43867.783 43894.14821
1 1 1 1 1 1 1 1 1 1
43904.91335 43921.70054 43925.07972 43964.70403 43989.75161 44010.67494 44027.96788 44032.1149 44099.95094 44147.76652
1 1 1 1 1 1 1 1 1 1
44151.2529 44242.27158 44261.35389 44279.06479 44295.98545 44334.98519 44358.89951 44367.34382 44368.14756 44368.51359
1 1 1 1 1 1 1 1 1 1
44368.67011 44385.84288 44396.80246 44409.08068 44423.38722 44449.71048 44467.70153 44482.70705 44487.22841 44492.96741
1 1 1 1 1 1 1 1 1 1
44503.61241 44517.47406 44530.39356 44555.02997 44599.50592 44605.60052 44618.9726 44651.21782 44691.30455 44697.61169
1 1 1 1 1 1 1 1 1 1
44700.41408 44701.60568 44709.66952 44725.97032 44728.78126 44766.72386 44776.9159 44782.26235 44812.41359 44822.052
1 1 1 1 1 1 1 1 1 1
44826.96789 44828.78437 44828.80865 44874.86232 44884.18296 44895.669 44917.8196 44923.88591 44975.0933 44983.99524
1 1 1 1 1 1 1 1 1 1
44989.32738 45001.67137 45007.18747 45017.37392 45018.67952 45042.30058 45047.54041 45054.9538 45055.00962 45120.14398
1 1 1 1 1 1 1 1 1 1
45135.45188 45141.11415 45141.81278 45158.136 45172.30226 45176.64529 45191.63036 45224.23314 45241.25776 45254.10802
1 1 1 1 1 1 1 1 1 1
45258.61402 45258.91466 45268.73454 45274.22673 45277.32161 45281.81173 45286.13245 45294.83994 45295.39127 45355.23086
1 1 1 1 1 1 1 1 1 1
45369.55993 45394.0123 45426.25 45477.97661 45504.19175 45511.57112 45540.26933 45545.79232 45582.76214 45583.58072
1 1 1 1 1 1 1 1 1 1
45614.38544 45625.09346 45676.76977 45676.85895 45678.74949 45685.01664 45699.25134 45702.88957 45712.31886 45733.5924
1 1 1 1 1 1 1 1 1 1
45797.36622 45818.55306 45850.63516 45852.63737 45861.4107 45893.71595 45900.13446 45901.44917 45918.99757 45926.40532
1 1 1 1 1 1 1 1 1 1
45942.99839 45993.38553 46008.15235 46025.53693 46025.85353 46062.84287 46063.93827 46075.27427 46079.66196 46116.22173
1 1 1 1 1 1 1 1 1 1
46116.8013 46127.31167 46130.95409 46139.353 46155.08952 46161.77792 46213.64478 46238.41095 46242.93284 46246.96766
1 1 1 1 1 1 1 1 1 1
46284.13748 46293.44405 46306.61599 46318.75284 46320.61033 46328.7049 46336.96071 46348.55883 46398.33851 46434.88134
1 1 1 1 1 1 1 1 1 1
46446.95929 46452.67436 46461.91714 46509.33139 46548.1477 46557.45072 46559.12993 46574.02426 46575.08113 46576.10439
1 1 1 1 1 1 1 1 1 1
46576.68115 46578.71272 46585.74359 46625.11116 46635.62773 46653.1039 46714.36995 46778.79353 46786.20716 46833.99851
1 1 1 1 1 1 1 1 1 1
46847.07892 46858.67068 46884.35283 46884.99053 46886.9353 46889.48254 46897.26797 46910.33749 46921.10248 46924.16602
1 1 1 1 1 1 1 1 1 1
46945.1741 46950.25198 46959.94396 46974.98243 47000.86575 47107.35419 47119.77885 47172.47153 47209.53661 47211.38772
1 1 1 1 1 1 1 1 1 1
47227.08298 47244.03658 47246.8062 47249.99062 47258.15093 47262.56619 47308.50043 47310.78984 47326.79144 47351.42722
1 1 1 1 1 1 1 1 1 1
47360.78293 47385.20507 47411.78912 47416.05879 47417.82869 47427.7951 47442.15682 47444.1487 47490.25317 47547.65208
1 1 1 1 1 1 1 1 1 1
47553.09478 47556.43909 47577.44261 47596.60773 47613.40818 47665.89303 47675.66337 47683.23657 47723.10304 47732.0466
1 1 1 1 1 1 1 1 1 1
47760.48273 47791.2197 47803.06667 47815.62218 47842.43633 47857.85541 47871.75373 47902.18725 47959.00645 47968.09999
1 1 1 1 1 1 1 1 1 1
47977.18725 47998.39682 48013.40457 48036.54779 48089.85329 48097.64417 48102.53104 48118.37091 48126.35525 48129.27732
1 1 1 1 1 1 1 1 1 1
48164.23128 48190.83336 48230.52783 48288.53006 48335.51625 48360.52231 48380.89957 48393.19056 48426.91185 48445.14113
1 1 1 1 1 1 1 1 1 1
48449.52772 48453.00423 48456.78746 48482.89362 48508.07265 48552.40533 48573.8517 48650.98376 48653.14679 48667.06794
1 1 1 1 1 1 1 1 1 1
48710.43154 48735.87871 48746.04396 48747.96861 48763.23599 48772.66215 48784.55228 48830.7323 48882.08317 48904.29012
1 1 1 1 1 1 1 1 1 1
48935.74115 48950.84288 48969.94787 48991.98355 48997.25119 49004.72712 49042.90145 49050.85828 49068.50218 49069.77774
1 1 1 1 1 1 1 1 1 1
49079.84406 49091.01011 49103.71669 49111.42749 49111.49115 49115.40783 49121.87015 49220.86576 49223.29964 49229.35336
1 1 1 1 1 1 1 1 1 1
49238.03702 49317.3985 49322.92839 49340.01328 49372.54091 49391.3757 49406.77509 49455.71771 49474.96574 49476.01035
1 1 1 1 1 1 1 1 1 1
49485.36939 49525.19765 49528.29047 49554.67915 49555.80823 49604.97441 49620.96813 49672.61226 49704.76729 49716.08996
1 1 1 1 1 1 1 1 1 1
49766.16166 49767.53874 49769.16984 49773.36109 49777.2153 49780.1771 49832.64746 49863.9024 49900.84249 49930.48873
1 1 1 1 1 1 1 1 1 1
49940.00059 49940.55353 49966.24335 49976.93715 49997.4875 50001.27282 50011.69106 50014.29006 50075.85001 50078.32817
1 1 1 1 1 1 1 1 1 1
50083.24151 50086.63515 50138.74108 50142.0207 50164.01044 50206.14187 50225.82045 50233.98463 50317.87484 50329.83591
1 1 1 1 1 1 1 1 1 1
50342.21241 50347.466 50350.42033 50360.62885 50363.35115 50365.09827 50373.10604 50392.36888 50402.74704 50416.15894
1 1 1 1 1 1 1 1 1 1
50474.56517 50474.89975 50496.18194 50508.15632 50533.83751 50617.64187 50670.71403 50704.7586 50732.37162 50739.36604
1 1 1 1 1 1 1 1 1 1
50756.55062 50763.65584 50765.31994 50790.13025 50827.34401 50860.48985 50861.45751 50865.40862 50883.82848 50910.60536
1 1 1 1 1 1 1 1 1 1
50910.6527 50913.03849 50949.08421 50965.81845 50988.39313 51017.43604 51032.83841 51050.36066 51107.29907 51126.52593
1 1 1 1 1 1 1 1 1 1
51132.67759 51206.52279 51222.99786 51239.72342 51332.4885 51353.46231 51367.37446 51401.33122 51416.20041 51439.25574
1 1 1 1 1 1 1 1 1 1
51465.42035 51467.27875 51513.53842 51518.15402 51528.68171 51552.63966 51564.03762 51568.10564 51584.53481 51687.75054
1 1 1 1 1 1 1 1 1 1
51708.81392 51715.69491 51752.34452 51828.65795 51836.25116 51866.92575 51898.02678 51907.27065 51907.97786 51963.77968
1 1 1 1 1 1 1 1 1 1
51975.48306 52046.95926 52062.54889 52083.60762 52102.9318 52111.61058 52183.85437 52229.81531 52238.89879 52259.07091
1 1 1 1 1 1 1 1 1 1
52275.27675 52316.11724 52335.11136 52343.76033 52366.89913 52383.91404 52416.59145 52433.73347 52452.11209 52541.23721
1 1 1 1 1 1 1 1 1 1
52582.45634 52587.05873 52631.1504 52645.28448 52675.52192 52683.89721 52691.50434 52703.29722 52721.56178 52738.10404
1 1 1 1 1 1 1 1 1 1
52856.89077 52875.73972 52930.7619 52979.80498 52997.80195 53051.92513 53064.78751 53074.26246 53086.63097 53113.69606
1 1 1 1 1 1 1 1 1 1
53116.83645 53178.73999 53205.78752 53242.38134 53268.11753 53282.24825 53294.84259 53297.19095 53300.02335 53304.27651
1 1 1 1 1 1 1 1 1 1
53314.99498 53319.9143 53339.86814 53341.43769 53344.13124 53371.55151 53418.19194 53462.19231 53470.06457 53471.52212
1 1 1 1 1 1 1 1 1 1
53485.50589 53511.77025 53527.42558 53565.89762 53575.58092 53632.22382 53637.18229 53682.70811 53704.74968 53800.46341
1 1 1 1 1 1 1 1 1 1
53834.89125 53876.87551 53905.96363 53908.5995 53928.99216 53973.05863 53988.03246 54008.33866 54022.73737 54077.54939
1 1 1 1 1 1 1 1 1 1
54083.05555 54141.45947 54145.98421 54206.22179 54234.84892 54267.01194 54335.24165 54404.83541 54410.43923 54416.31771
1 1 1 1 1 1 1 1 1 1
54441.25209 54455.96631 54494.9356 54543.16171 54590.09034 54595.39574 54631.11424 54647.32078 54677.30235 54680.65283
1 1 1 1 1 1 1 1 1 1
54803.98934 54851.41237 54885.28547 55056.01612 55057.93856 55068.7269 55082.50656 55139.19051 55191.82065 55273.77456
1 1 1 1 1 1 1 1 1 1
55300.01931 55484.64273 55623.29642 55656.78031 55660.20416 55664.49541 55693.3573 55726.0961 55794.58278 55800.95979
1 1 1 1 1 1 1 1 1 1
55860.90432 55882.64197 55888.36651 55899.4617 55938.63945 55980.19599 55993.91371 56015.67544 56036.51648 56110.42814
1 1 1 1 1 1 1 1 1 1
56147.73311 56157.26195 56181.52023 56182.71546 56208.93648 56216.77893 56251.62873 56261.95399 56301.17992 56308.85373
1 1 1 1 1 1 1 1 1 1
56317.58332 56494.33642 56532.11849 56535.24363 56558.16074 56640.59104 56717.13858 56720.38726 56720.61968 56722.28247
1 1 1 1 1 1 1 1 1 1
56732.15944 56756.0606 56853.81379 56869.77624 56899.71485 56954.08833 56960.16603 56975.03642 56996.09438 57010.1622
1 1 1 1 1 1 1 1 1 1
57029.39386 57047.99091 57083.24832 57091.46248 57131.61137 57164.60067 57196.49659 57247.82914 57318.04885 57318.4414
1 1 1 1 1 1 1 1 1 1
57332.58523 57403.47717 57415.39154 57420.1856 57427.14105 57479.75785 57480.48312 57527.17434 57559.89058 57628.75709
1 1 1 1 1 1 1 1 1 1
57710.71909 57765.70759 57805.1414 57841.89486 57849.34349 57897.6534 57905.8291 57924.64244 57928.76564 57948.65112
1 1 1 1 1 1 1 1 1 1
57994.47274 58024.40176 58071.53528 58080.50014 58110.74705 58113.05305 58133.20978 58178.58433 58316.01052 58380.59321
1 1 1 1 1 1 1 1 1 1
58449.33294 58511.54873 58533.49751 58568.91908 58612.23611 58612.84805 58613.80008 58614.71954 58670.84946 58683.51872
1 1 1 1 1 1 1 1 1 1
58692.96293 58760.81747 58824.49903 58913.76002 58970.41264 58973.79878 59001.48731 59288.03173 59404.19598 59472.34268
1 1 1 1 1 1 1 1 1 1
59480.842 59566.77875 59622.89682 59798.4239 59879.061 59885.00012 59920.48058 60085.76449 60101.44384 60177.53783
1 1 1 1 1 1 1 1 1 1
60187.9547 60216.82003 60407.31727 60414.20593 60492.32148 60502.36646 60576.84445 60675.75398 60675.79303 60679.98793
1 1 1 1 1 1 1 1 1 1
60847.56004 60958.21349 60996.3089 61023.75202 61067.56777 61102.02675 61260.51153 61382.06131 61384.54935 61432.2084
1 1 1 1 1 1 1 1 1 1
61615.84752 61663.58283 61778.79067 61819.84592 61828.73586 61853.83195 61861.8622 62038.35297 62052.20232 62091.43961
1 1 1 1 1 1 1 1 1 1
62192.25594 62307.04322 62360.01908 62478.72933 62858.83411 62887.0343 63154.33225 63183.62447 63199.06624 63375.37993
1 1 1 1 1 1 1 1 1 1
63426.3478 63633.9008 64012.32086 64050.00926 64052.1951 64271.11979 64486.40644 64525.11155 64539.88869 64548.11004
1 1 1 1 1 1 1 1 1 1
64592.71068 64707.81886 65013.0088 65107.53227 65353.38112 65389.89613 65397.21855 65422.04962 65441.30143 65635.9975
1 1 1 1 1 1 1 1 1 1
65776.22733 65956.82189 66365.12731 66416.6068 66473.13653 66607.29432 66636.72062 66922.11848 67230.09536 67278.11292
1 1 1 1 1 1 1 1 1 1
67357.92615 67362.35569 67379.49375 67893.55998 68324.0096 68576.68656 68980.28115 68985.59811 69348.60687 69351.41417
1 1 1 1 1 1 1 1 1 1
69518.73751 69521.13573 70094.84888 70324.83769 70744.63869 70931.61002 71467.89992 71537.22292 71612.7081 72787.28668
1 1 1 1 1 1 1 1 1 1
73980.21462 75530.38563 75596.79344 <NA>
1 1 1 6486
Kết quả trả về là bảng tần số ở từng cột, nếu có NA
như ở trường hợp cột salaray
thì sẽ hiện ra ở dòng cuối. Trong trường hợp kết quả này quá nhiều cột, ta chỉ quan tâm tách ra những cột nào có giá trị NA
và muốn biết có bao nhiêu NA
trong cột đó (dù là character
hay numeric
) thì ta sẽ làm thêm một bước kiểm tra sau.
### Lệnh này sẽ kiểm từng thành phần trong list `check_na` sau đó trả về kết quả
### Những thành phần nào (cột nào trong df) có bao nhiêu giá trị `NA`
<- lapply(check_na, function(x) {
check_1 which(is.na(names(x)))]
x[
}
)
### Build function kiểm tra ruột của từng thành phần trong list có giá trị hay không
<- function(x) {
is.integer0 is.integer(x) && length(x) == 0L
}
### Kiểm tra từng thành phần trong check_1 có giá trị hay không
<- lapply(check_1, is.integer0)
check_2
### Trích xuất những cột có giá trị missing value
!unlist(check_2)] check_1[
$score1
<NA>
3347
$score2
<NA>
3347
$salary
<NA>
6486
2.4 Thể hiện giá trị NA
trên toàn bộ dataset qua heatmap
2.4.1 Cách 1: Sử dụng package mice
library(mice)
::md.pattern(df, plot = TRUE, rotate.names = TRUE) mice
no stud.id name gender age height weight religion nc.score semester major minor online.tutorial graduated score1 score2 salary
1753 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
3139 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
3347 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 3
0 0 0 0 0 0 0 0 0 0 0 0 0 0 3347 3347 6486 13180
Kết quả này đọc là có 1753 sinh viên có đủ các thông tin ở tất cả các cột, 3139 sinh viên có đủ thông tin (chỉ trừ cột salary
là có missing value), 3347 sinh viên có đủ thông tin (chỉ trừ các cột score1
, score2
, và salary
). Có tổng cộng 13180 giá trị NA
trong toàn bộ dataset. Cụ thể cách phân tích kết quả missing value theo package mice
được trình bày ở đây
2.4.2 Cách 2: Sử dụng package VIM
Sử dụng đồ thị này để thể hiện tỷ lệ % giá trị NA
trên toàn dataset.
library(VIM)
<- VIM::aggr(df,
aggr_plot col = c("navyblue", "red"),
numbers = TRUE,
sortVars = TRUE,
labels = names(data),
cex.axis = 1,
gap = 3,
ylab = c("Histogram of missing data", "Pattern"))
Variables sorted by number of missings:
Variable Count
salary 0.7872315
score1 0.4062386
score2 0.4062386
no 0.0000000
stud.id 0.0000000
name 0.0000000
gender 0.0000000
age 0.0000000
height 0.0000000
weight 0.0000000
religion 0.0000000
nc.score 0.0000000
semester 0.0000000
major 0.0000000
minor 0.0000000
online.tutorial 0.0000000
graduated 0.0000000
Ta có thể so sánh từng cặp chỉ tiêu với nhau thông qua đồ thị này, với các biện luận tham khảo ở đây2.
::marginplot(df[, c("salary", "score1")]) VIM
::marginplot(df[, c("salary", "score2")]) VIM
::marginplot(df[, c("score1", "score2")]) VIM
3 Các lệnh group và summary dữ liệu
Ở thời điểm này, ta đã có cái nhìn tổng quát về bộ dữ liệu. Lúc này việc phân tích thống kê mô tả sẽ đi theo câu hỏi nghiên cứu/chủ đề mà bạn quan tâm để làm cơ sở chọn ra những biến/cột/variable phân tích mô tả cụ thể hơn. Mình chọn quan tâm về số lượng nam và nữ ở cột giới tính gender
theo học các ngành major
trong bộ dữ liệu này.
3.1 Áp dụng các lệnh tạo bảng summary
3.1.1 Sử dụng lệnh table()
cho hai tham số
### Tham số `gender` ở vị trí hàng, tham số `major` ở vị trí cột
table(df$gender, df$major, useNA = "ifany")
Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences
Female 959 461 745 276 978 691
Male 638 863 881 949 477 321
Nếu dùng lệnh table()
cho ba tham số sẽ tạo ra dạng array 3 chiều, chẻ ra thành từng matrix 2 chiều.
### Tham số `religion` ở vị trí thứ 3 sẽ được tách ra tương ứng các matrix giữa `gender` và `major`
table(df$gender, df$major, df$religion, useNA = "ifany")
, , = Catholic
Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences
Female 322 162 244 95 307 226
Male 223 301 313 344 161 99
, , = Muslim
Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences
Female 47 14 35 8 45 27
Male 22 28 38 32 21 13
, , = Orthodox
Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences
Female 71 32 62 24 68 52
Male 29 55 70 59 38 25
, , = Other
Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences
Female 313 157 229 86 328 246
Male 221 273 280 296 143 116
, , = Protestant
Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences
Female 206 96 175 63 230 140
Male 143 206 180 218 114 68
3.1.2 Add margins vào table
Tạo bảng summary cho hai tham số gender
và major
<- table(df$gender, df$major, useNA = "ifany")
gender_major
names(dimnames(gender_major)) <- c("Gender", "Major")
addmargins(A = gender_major,
margin = seq_along(dim(gender_major)),
FUN = sum,
quiet = TRUE)
Major
Gender Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences sum
Female 959 461 745 276 978 691 4110
Male 638 863 881 949 477 321 4129
sum 1597 1324 1626 1225 1455 1012 8239
Tạo bảng summary cho ba tham số gender
và major
và religion
<- table(df$gender, df$major, df$religion, useNA = "ifany")
gender_major_religion
names(dimnames(gender_major_religion)) <- c("Gender", "Major", "Religion")
addmargins(A = gender_major_religion,
margin = seq_along(dim(gender_major_religion)),
FUN = sum,
quiet = TRUE)
, , Religion = Catholic
Major
Gender Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences sum
Female 322 162 244 95 307 226 1356
Male 223 301 313 344 161 99 1441
sum 545 463 557 439 468 325 2797
, , Religion = Muslim
Major
Gender Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences sum
Female 47 14 35 8 45 27 176
Male 22 28 38 32 21 13 154
sum 69 42 73 40 66 40 330
, , Religion = Orthodox
Major
Gender Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences sum
Female 71 32 62 24 68 52 309
Male 29 55 70 59 38 25 276
sum 100 87 132 83 106 77 585
, , Religion = Other
Major
Gender Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences sum
Female 313 157 229 86 328 246 1359
Male 221 273 280 296 143 116 1329
sum 534 430 509 382 471 362 2688
, , Religion = Protestant
Major
Gender Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences sum
Female 206 96 175 63 230 140 910
Male 143 206 180 218 114 68 929
sum 349 302 355 281 344 208 1839
, , Religion = sum
Major
Gender Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences sum
Female 959 461 745 276 978 691 4110
Male 638 863 881 949 477 321 4129
sum 1597 1324 1626 1225 1455 1012 8239
3.1.3 Sử dụng lệnh prop.table()
### Lệnh này trả về tỷ lệ giữa nam và nữ trong cùng vector `gender`
prop.table(table(df$gender), margin = NULL)
Female Male
0.4988469 0.5011531
### Nếu có 2 tham số trở lên thì các bạn chú ý tham số margin nhé
prop.table(table(df$gender, df$major, useNA = "ifany"), margin = NULL)
Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences
Female 0.11639762 0.05595339 0.09042360 0.03349921 0.11870373 0.08386940
Male 0.07743658 0.10474572 0.10693045 0.11518388 0.05789538 0.03896104
addmargins(A = prop.table(table(df$gender, df$major, useNA = "ifany"), margin = NULL),
margin = seq_along(dim(prop.table(table(df$gender, df$major, useNA = "ifany"), margin = NULL))),
FUN = sum,
quiet = TRUE)
Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences sum
Female 0.11639762 0.05595339 0.09042360 0.03349921 0.11870373 0.08386940 0.49884695
Male 0.07743658 0.10474572 0.10693045 0.11518388 0.05789538 0.03896104 0.50115305
sum 0.19383420 0.16069911 0.19735405 0.14868309 0.17659910 0.12283044 1.00000000
addmargins(A = prop.table(table(df$gender, df$major, useNA = "ifany"), margin = 1),
margin = seq_along(dim(prop.table(table(df$gender, df$major, useNA = "ifany"), margin = 1))),
FUN = sum,
quiet = TRUE)
Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences sum
Female 0.23333333 0.11216545 0.18126521 0.06715328 0.23795620 0.16812652 1.00000000
Male 0.15451683 0.20900945 0.21336885 0.22983773 0.11552434 0.07774279 1.00000000
sum 0.38785017 0.32117490 0.39463406 0.29699102 0.35348054 0.24586932 2.00000000
addmargins(A = prop.table(table(df$gender, df$major, useNA = "ifany"), margin = 2),
margin = seq_along(dim(prop.table(table(df$gender, df$major, useNA = "ifany"), margin = 2))),
FUN = sum,
quiet = TRUE)
Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences sum
Female 0.6005009 0.3481873 0.4581796 0.2253061 0.6721649 0.6828063 2.9871452
Male 0.3994991 0.6518127 0.5418204 0.7746939 0.3278351 0.3171937 3.0128548
sum 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 6.0000000
3.2 Áp dụng các lệnh summary trong package dplyr
library(dplyr)
|> dplyr::group_by(gender, religion) |>
df ::summarise(trung_binh_age = mean(age),
dplyrtrung_vi_age = median(age),
do_lech_chuan_age = sd(age),
so_luong = n(),
max_age = max(age),
min_age = min(age),
tong_so_tuoi = sum(age)) -> summary_all
summary_all
# A tibble: 10 × 9
# Groups: gender [2]
gender religion trung_binh_age trung_vi_age do_lech_chuan_age so_luong max_age min_age tong_so_tuoi
<chr> <chr> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
1 Female Catholic 22.4 21 5.65 1356 63 18 30435
2 Female Muslim 22.3 21 6.14 176 62 18 3932
3 Female Orthodox 22.5 21 5.95 309 63 18 6961
4 Female Other 22.6 21 6.27 1359 64 18 30653
5 Female Protestant 23.0 21 7.23 910 64 18 20933
6 Male Catholic 22.4 21 5.69 1441 63 18 32290
7 Male Muslim 22.3 21 4.56 154 55 18 3438
8 Male Orthodox 22.6 21 6.03 276 63 18 6232
9 Male Other 22.8 21 6.52 1329 63 18 30240
10 Male Protestant 22.2 21 5.17 929 63 18 20606
library(dplyr)
|> dplyr::group_by(major) |>
df ::summarise(so_luong_mon = n_distinct(major),
dplyrso_nguoi_theo_hoc = n()) |>
print(n = Inf)
# A tibble: 6 × 3
major so_luong_mon so_nguoi_theo_hoc
<chr> <int> <int>
1 Biology 1 1597
2 Economics and Finance 1 1324
3 Environmental Sciences 1 1626
4 Mathematics and Statistics 1 1225
5 Political Science 1 1455
6 Social Sciences 1 1012
library(dplyr)
|> dplyr::group_by(major, gender) |>
df ::summarise(so_nguoi_theo_hoc = n()) -> df_major_gender
dplyr
|> print(n = Inf) df_major_gender
# A tibble: 12 × 3
# Groups: major [6]
major gender so_nguoi_theo_hoc
<chr> <chr> <int>
1 Biology Female 959
2 Biology Male 638
3 Economics and Finance Female 461
4 Economics and Finance Male 863
5 Environmental Sciences Female 745
6 Environmental Sciences Male 881
7 Mathematics and Statistics Female 276
8 Mathematics and Statistics Male 949
9 Political Science Female 978
10 Political Science Male 477
11 Social Sciences Female 691
12 Social Sciences Male 321
# df |> dplyr::group_by(major, minor) |>
# dplyr::summarise(so_luong_mon = n_distinct(major),
# so_nguoi_theo_hoc = n()) |>
# print(n = Inf)
#
# df |> dplyr::group_by(major, minor) |>
# dplyr::summarise(so_luong_mon = n_distinct(minor),
# so_nguoi_theo_hoc = n())|>
# print(n = Inf)
4 Vẽ đồ thị mô tả dữ liệu
4.1 Vẽ đồ thị cột
4.1.1 Đồ thị cột side-by-side
Nếu dataset ở dạng như sau thì khi ta vẽ đồ thị cột sẽ dùng theo dạng formula y ~ x1 + x2
df_major_gender
# A tibble: 12 × 3
# Groups: major [6]
major gender so_nguoi_theo_hoc
<chr> <chr> <int>
1 Biology Female 959
2 Biology Male 638
3 Economics and Finance Female 461
4 Economics and Finance Male 863
5 Environmental Sciences Female 745
6 Environmental Sciences Male 881
7 Mathematics and Statistics Female 276
8 Mathematics and Statistics Male 949
9 Political Science Female 978
10 Political Science Male 477
11 Social Sciences Female 691
12 Social Sciences Male 321
barplot(so_nguoi_theo_hoc ~ gender + major,
data = df_major_gender,
beside = TRUE,
col = c("blue", "red"),
angle = c(45, 135),
density = 20,
xlab = "",
ylab = "Sinh viên",
yaxs = "i",
ylim = c(0, 1000),
xaxs = "i",
xlim = c(0.5, 18.5),
# xaxt = "n",
main = "Số lượng sinh viên theo học ở các chuyên ngành khác nhau",
width = 1,
las = 1)
legend(x = "topright",
y = NULL,
title = "Chú thích",
legend = c("Nữ", "Nam"),
col = c("blue", "red"),
fill = c("blue", "red"),
angle = c(45, 135),
density = 20)
box()
Nếu dataset ở dạng table
hay matrix
thì ta vẽ trực tiếp bằng lệnh barplot()
với chính table đó.
<- table(df$gender, df$major)
student_gender_major student_gender_major
Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences
Female 959 461 745 276 978 691
Male 638 863 881 949 477 321
barplot(height = student_gender_major,
beside = TRUE,
col = c("blue", "red"),
angle = c(45, 135),
density = 20,
xlab = "",
ylab = "Sinh viên",
yaxs = "i",
ylim = c(0, 1000),
xaxs = "i",
xlim = c(0.5, 18.5),
# xaxt = "n",
main = "Số lượng sinh viên theo học ở các chuyên ngành khác nhau",
width = 1,
las = 1)
legend(x = "topright",
y = NULL,
title = "Chú thích",
legend = c("Nữ", "Nam"),
col = c("blue", "red"),
fill = c("blue", "red"),
angle = c(45, 135),
density = 20)
box()
4.1.2 Đồ thị cột stacked barchart
barplot(height = student_gender_major,
beside = FALSE,
col = c("blue", "red"),
angle = c(45, 135),
density = 20,
xlab = "",
ylab = "Sinh viên",
yaxs = "i",
ylim = c(0, 2000),
xaxs = "i",
xlim = c(0, 7.4),
# xaxt = "n",
main = "Số lượng sinh viên theo học ở các chuyên ngành khác nhau",
width = 1,
space = 0.2,
las = 1)
par("usr")
[1] 0.0 7.4 0.0 2000.0
legend(x = "topright",
y = NULL,
title = "Chú thích",
legend = c("Nữ", "Nam"),
col = c("blue", "red"),
fill = c("blue", "red"),
angle = c(45, 135),
density = 20)
box()
4.1.3 Đồ thị cột percent stacked barchart
<- prop.table(table(df$gender, df$major), margin = 2)
percent_1 percent_1
Biology Economics and Finance Environmental Sciences Mathematics and Statistics Political Science Social Sciences
Female 0.6005009 0.3481873 0.4581796 0.2253061 0.6721649 0.6828063
Male 0.3994991 0.6518127 0.5418204 0.7746939 0.3278351 0.3171937
barplot(height = percent_1,
beside = FALSE,
col = c("blue", "red"),
angle = c(45, 135),
density = 20,
xlab = "",
ylab = "Sinh viên (%)",
yaxs = "i",
ylim = c(0, 1),
xaxs = "i",
xlim = c(0, 7.4),
# xaxt = "n",
main = "Tỷ lệ sinh viên theo học ở các chuyên ngành khác nhau",
width = 1,
space = 0.2,
las = 1)
# par("usr")
legend(x = 6.461,
y = 1.16,
title = "Chú thích",
legend = c("Nữ", "Nam"),
horiz = TRUE,
col = c("blue", "red"),
fill = c("blue", "red"),
angle = c(45, 135),
density = 20,
xpd = TRUE)
box(which = "plot", col = "black")
# box(which = "figure", col = "red")
4.2 Vẽ đồ thị đường
Ta quan tâm về salary
và age
xem có mối tương quan như thế nào.
### tách ra dataset clean
<- df[, c("age", "gender", "salary")]
df_salary <- na.omit(df_salary)
df_salary
|> dplyr::arrange(age, salary) -> df_salary
df_salary
head(df_salary, n = 30)
age gender salary
1 18 Female 21000.25
2 18 Female 22313.33
3 18 Female 25979.07
4 18 Female 29178.47
5 18 Female 29550.79
6 18 Female 30062.71
7 18 Female 31266.92
8 18 Female 32038.41
9 18 Female 32943.41
10 18 Female 32966.91
11 18 Male 33908.18
12 18 Male 34460.44
13 18 Female 34492.08
14 18 Female 35507.83
15 18 Male 36031.25
16 18 Female 36589.21
17 18 Female 37245.87
18 18 Male 37691.93
19 18 Male 38422.71
20 18 Female 38738.01
21 18 Male 38764.40
22 18 Male 39298.01
23 18 Male 39994.14
24 18 Male 40495.26
25 18 Male 40590.54
26 18 Male 40899.32
27 18 Female 40964.11
28 18 Male 41816.70
29 18 Male 42388.60
30 18 Female 42539.95
options(scipen = 1e9)
<- par(no.readonly = TRUE)
oldpar par(mar = c(6, 8, 4, 4))
par(mgp = c(4, 1, 0))
plot(x = df_salary$age,
y = df_salary$salary,
type = "o",
col = "darkblue",
xlim = c(0, 80),
ylim = c(0, 80000),
las = 1,
xaxs = "i",
yaxs = "i",
xlab = "Age",
ylab = "Salary",
main = paste0("Tương quan giữa tuổi và tiền lương (n = ",
dim(df_salary)[1], " sinh viên)"),
lwd = 1,
lty = 1,
bty = "o")
par(oldpar)
4.3 Vẽ đồ thị hộp
Ta cắt vector age
theo các độ tuổi khác nhau để thuận tiện vẽ đồ thị hộp.
$group_age <- cut(x = df_salary$age,
df_salarybreaks = c(0, 20, 30, 40, 50, 60, 70),
labels = c("≤ 20",
"20 < age ≤ 30",
"30 < age ≤ 40",
"40 < age ≤ 50",
"50 < age ≤ 60",
"> 60"))
|> dplyr::arrange(age, salary) -> df_salary
df_salary
as.data.frame(table(df_salary$group_age))
Var1 Freq
1 ≤ 20 577
2 20 < age ≤ 30 1093
3 30 < age ≤ 40 24
4 40 < age ≤ 50 25
5 50 < age ≤ 60 27
6 > 60 7
# sample kiểu base R
# sample_x <- sample(1:nrow(df_salary), size = 30)
# df_salary[sample_x, ] -> df_sample
# sample kiểu dplyr (ngẫu nhiên)
# dplyr::sample_n(tbl = df_salary, size = 30, replace = FALSE) |> dplyr::arrange(age, salary)
set.seed(1)
# sample kiểu dplyr (ngẫu nhiên theo từng nhóm factor)
|> dplyr::group_by(group_age) |>
df_salary ::sample_frac(size = 0.05, replace = FALSE) |>
dplyr::arrange(age, salary)-> sample_df
dplyr
as.data.frame(sample_df)
age gender salary group_age
1 18 Male 47490.25 ≤ 20
2 18 Male 48013.40 ≤ 20
3 19 Female 34791.43 ≤ 20
4 19 Male 36439.33 ≤ 20
5 19 Male 37929.52 ≤ 20
6 19 Female 38884.37 ≤ 20
7 19 Male 45582.76 ≤ 20
8 19 Female 46959.94 ≤ 20
9 19 Male 57527.17 ≤ 20
10 20 Female 23391.64 ≤ 20
11 20 Female 25706.96 ≤ 20
12 20 Female 29626.86 ≤ 20
13 20 Female 30656.10 ≤ 20
14 20 Female 33318.23 ≤ 20
15 20 Female 33606.85 ≤ 20
16 20 Male 35062.56 ≤ 20
17 20 Female 37653.17 ≤ 20
18 20 Male 38009.96 ≤ 20
19 20 Male 40151.72 ≤ 20
20 20 Male 41386.80 ≤ 20
21 20 Male 46127.31 ≤ 20
22 20 Male 47244.04 ≤ 20
23 20 Male 48036.55 ≤ 20
24 20 Female 49620.97 ≤ 20
25 20 Male 49773.36 ≤ 20
26 20 Male 53282.25 ≤ 20
27 20 Male 54234.85 ≤ 20
28 20 Male 58110.75 ≤ 20
29 20 Male 61819.85 ≤ 20
30 21 Female 26950.56 20 < age ≤ 30
31 21 Female 27884.85 20 < age ≤ 30
32 21 Male 29430.52 20 < age ≤ 30
33 21 Female 34371.45 20 < age ≤ 30
34 21 Female 34398.15 20 < age ≤ 30
35 21 Male 37554.01 20 < age ≤ 30
36 21 Male 41594.78 20 < age ≤ 30
37 21 Male 43989.75 20 < age ≤ 30
38 21 Female 45241.26 20 < age ≤ 30
39 21 Female 48089.85 20 < age ≤ 30
40 21 Male 48426.91 20 < age ≤ 30
41 21 Male 48573.85 20 < age ≤ 30
42 21 Male 52433.73 20 < age ≤ 30
43 21 Male 54008.34 20 < age ≤ 30
44 21 Male 54141.46 20 < age ≤ 30
45 21 Male 57994.47 20 < age ≤ 30
46 21 Male 61778.79 20 < age ≤ 30
47 21 Male 65441.30 20 < age ≤ 30
48 22 Female 30908.99 20 < age ≤ 30
49 22 Male 32394.79 20 < age ≤ 30
50 22 Male 33924.95 20 < age ≤ 30
51 22 Male 35881.21 20 < age ≤ 30
52 22 Male 37028.64 20 < age ≤ 30
53 22 Male 37668.89 20 < age ≤ 30
54 22 Female 38655.83 20 < age ≤ 30
55 22 Female 40466.69 20 < age ≤ 30
56 22 Male 41263.86 20 < age ≤ 30
57 22 Female 43552.60 20 < age ≤ 30
58 22 Female 45055.01 20 < age ≤ 30
59 22 Female 45254.11 20 < age ≤ 30
60 22 Male 48747.97 20 < age ≤ 30
61 22 Male 49068.50 20 < age ≤ 30
62 22 Male 49079.84 20 < age ≤ 30
63 22 Male 50988.39 20 < age ≤ 30
64 22 Male 51568.11 20 < age ≤ 30
65 22 Male 61260.51 20 < age ≤ 30
66 23 Male 33259.90 20 < age ≤ 30
67 23 Male 34103.23 20 < age ≤ 30
68 23 Male 43190.20 20 < age ≤ 30
69 23 Female 43287.32 20 < age ≤ 30
70 23 Male 45685.02 20 < age ≤ 30
71 23 Male 46434.88 20 < age ≤ 30
72 23 Male 47249.99 20 < age ≤ 30
73 23 Male 47871.75 20 < age ≤ 30
74 23 Male 49455.72 20 < age ≤ 30
75 24 Male 41309.07 20 < age ≤ 30
76 24 Female 52259.07 20 < age ≤ 30
77 24 Male 53462.19 20 < age ≤ 30
78 24 Male 54680.65 20 < age ≤ 30
79 24 Male 58133.21 20 < age ≤ 30
80 25 Male 39111.92 20 < age ≤ 30
81 25 Male 39765.06 20 < age ≤ 30
82 26 Female 31484.80 20 < age ≤ 30
83 26 Female 48830.73 20 < age ≤ 30
84 26 Male 75530.39 20 < age ≤ 30
85 39 Male 30375.19 30 < age ≤ 40
86 44 Male 55860.90 40 < age ≤ 50
87 56 Male 37119.18 50 < age ≤ 60
<- par(no.readonly = TRUE)
oldpar par(mar = c(6, 8, 4, 4))
par(mgp = c(4, 1, 0))
boxplot(formula = salary ~ group_age,
data = df_salary,
col = rainbow(n = 6),
las = 1,
xlab = "Nhóm tuổi",
ylab = "Mức lương",
main = "Đồ thị thể hiện mức lương theo các nhóm tuổi")
par(oldpar)
<- par(no.readonly = TRUE)
oldpar par(mar = c(6, 12, 4, 4))
par(mgp = c(4, 1, 0))
boxplot(formula = salary ~ group_age + gender,
data = df_salary,
col = c(rep("cyan", 6), rep("coral", 6)),
las = 1,
xlab = "Mức lương",
ylab = "",
horizontal = TRUE,
sep = "-",
lex.order = FALSE,
# names = c(letters[1:12]),
main = "Đồ thị thể hiện mức lương theo các nhóm tuổi và giới tính")
par(oldpar)
<- par(no.readonly = TRUE)
oldpar par(mar = c(6, 12, 4, 4))
par(mgp = c(4, 1, 0))
boxplot(formula = salary ~ group_age + gender,
data = df_salary,
col = rep(c("cyan", "coral"), 6),
las = 1,
xlab = "Mức lương",
ylab = "",
horizontal = TRUE,
sep = "-",
lex.order = TRUE,
# names = c(letters[1:12]),
main = "Đồ thị thể hiện mức lương theo các nhóm tuổi và giới tính")
par(oldpar)
|> dplyr::group_by(gender, group_age) |>
df_salary ::summarise(trung_binh_salary = mean(salary),
dplyrtrung_vi_salary = median(salary),
do_lech_chuan_salary = sd(salary),
max_salary = max(salary),
min_salary = min(salary),
so_luong = n()) -> summary_salary
as.data.frame(summary_salary)
gender group_age trung_binh_salary trung_vi_salary do_lech_chuan_salary max_salary min_salary so_luong
1 Female ≤ 20 35559.89 35470.46 7704.974 54590.09 19636.59 232
2 Female 20 < age ≤ 30 36167.63 35987.41 7718.234 63154.33 11444.14 413
3 Female 30 < age ≤ 40 37033.79 34200.54 5025.071 43000.51 31806.92 7
4 Female 40 < age ≤ 50 39263.15 38688.85 9980.751 52691.50 23044.40 10
5 Female 50 < age ≤ 60 36698.65 35282.33 9103.857 53565.90 20690.65 8
6 Female > 60 35965.51 33467.47 6982.298 46130.95 30796.16 4
7 Male ≤ 20 46524.48 46306.62 9662.660 73980.21 14081.10 345
8 Male 20 < age ≤ 30 46510.91 45709.30 9695.876 75596.79 18571.24 680
9 Male 30 < age ≤ 40 46259.95 49103.72 8761.094 56853.81 27821.99 17
10 Male 40 < age ≤ 50 47586.86 47360.78 8112.779 58316.01 34491.28 15
11 Male 50 < age ≤ 60 50107.44 52875.74 10326.033 70094.85 35727.41 19
12 Male > 60 44729.42 39413.49 11353.582 57765.71 37009.08 3
4.4 Vẽ nhiều đồ thị con trong một hình
Vẽ đồ thị giữa height
và weight
<- df[, c("age", "gender", "major", "height", "weight", "salary")]
df_height
head(df_height, n = 30)
age gender major height weight salary
1 19 Female Political Science 160 64.8 NA
2 19 Female Social Sciences 172 73.0 NA
3 22 Female Social Sciences 168 70.6 NA
4 19 Male Environmental Sciences 183 79.7 NA
5 21 Female Environmental Sciences 175 71.4 NA
6 19 Male Political Science 189 85.8 NA
7 21 Female Political Science 156 65.9 NA
8 21 Female Political Science 167 65.7 NA
9 18 Male Economics and Finance 195 94.4 NA
10 18 Female Environmental Sciences 165 66.0 NA
11 22 Female Economics and Finance 162 66.8 45254.11
12 18 Female Environmental Sciences 172 66.8 NA
13 23 Male Environmental Sciences 185 84.6 40552.79
14 20 Female Environmental Sciences 158 64.4 27007.03
15 19 Female Economics and Finance 157 66.3 NA
16 20 Male Mathematics and Statistics 172 73.9 NA
17 22 Female Political Science 156 61.7 33969.16
18 22 Male Economics and Finance 182 82.1 NA
19 21 Female Political Science 162 69.2 NA
20 22 Female Environmental Sciences 168 70.9 NA
21 20 Female Biology 167 68.5 NA
22 37 Male Political Science 175 70.4 NA
23 19 Male Political Science 164 70.3 NA
24 38 Female Environmental Sciences 155 67.0 NA
25 23 Male Economics and Finance 183 81.8 NA
26 26 Female Biology 145 54.0 NA
27 25 Female Social Sciences 161 66.8 NA
28 24 Male Economics and Finance 182 80.1 50617.64
29 54 Female Political Science 169 71.4 NA
30 22 Male Mathematics and Statistics 172 69.6 NA
<- par(no.readonly = TRUE)
oldpar par(mar = c(6, 8, 4, 4))
par(mgp = c(4, 1, 0))
plot(x = df_height$weight,
y = df_height$height,
type = "p",
col = "darkgreen",
pch = 1,
cex = 0.5,
xlim = c(0, 120),
ylim = c(0, 250),
las = 1,
xaxs = "i",
yaxs = "i",
xlab = "Cân nặng (kg)",
ylab = "Chiều cao (cm)",
main = paste0("Tương quan giữa chiều cao và cân nặng (n = ",
dim(df_height)[1], " sinh viên)"),
lwd = 1,
lty = 1,
bty = "o")
par(oldpar)
Tô màu theo gender
<- df[, c("age", "gender", "religion", "height", "weight", "salary")]
df_height
<- factor(df_height$gender)
col_1
<- par(no.readonly = TRUE)
oldpar par(mar = c(6, 8, 4, 4))
# par(mgp = c(4, 1, 0))
plot(x = df_height$weight,
y = df_height$height,
type = "p",
col = c("blue", "red")[col_1],
pch = 1,
cex = 0.5,
xlim = c(0, 120),
ylim = c(0, 250),
las = 1,
xaxs = "i",
yaxs = "i",
xlab = "Cân nặng (kg)",
ylab = "Chiều cao (cm)",
main = paste0("Tương quan giữa chiều cao và cân nặng (n = ",
dim(df_height)[1], " sinh viên)"),
lwd = 1,
lty = 1,
bty = "o")
legend(x = "bottomleft",
y = NULL,
title = "Chú thích",
legend = c("Nữ", "Nam"),
# horiz = TRUE,
col = c("blue", "red"),
# fill = c("blue", "red"),
pch = 1,
xpd = TRUE)
par(oldpar)
4.4.1 Sử dụng package lattice
vẽ đồ thị scatterplot
library(lattice)
::xyplot(x = height ~ weight | gender + religion,
latticegroups = gender,
col = c("blue", "red"),
origin = 0,
xlim = c(30, 130),
ylim = c(50, 250),
xlab = "Cân nặng (kg)",
ylab = "Chiều cao (cm)",
main = paste0("Tương quan giữa chiều cao và cân nặng theo giới tính và tôn giáo (n = ",
dim(df_height)[1], " sinh viên)"),
data = df_height)
4.4.2 Vẽ đồ thị nhiều biến
<- df[, c("age", "gender", "religion", "height", "weight", "salary")]
df_clean
<- na.omit(df_clean)
df_clean
$group_age <- cut(x = df_clean$age,
df_cleanbreaks = c(0, 20, 30, 40, 50, 60, 70),
labels = c("≤ 20",
"20 < age ≤ 30",
"30 < age ≤ 40",
"40 < age ≤ 50",
"50 < age ≤ 60",
"> 60"))
|> dplyr::arrange(age) -> df_clean
df_clean
as.data.frame(table(df_clean$group_age))
Var1 Freq
1 ≤ 20 577
2 20 < age ≤ 30 1093
3 30 < age ≤ 40 24
4 40 < age ≤ 50 25
5 50 < age ≤ 60 27
6 > 60 7
library(lattice)
head(df_clean)
age gender religion height weight salary group_age
1 18 Male Protestant 178 74.3 51564.04 ≤ 20
2 18 Male Catholic 190 91.4 43332.84 ≤ 20
3 18 Male Protestant 186 84.2 64525.11 ≤ 20
4 18 Male Catholic 187 84.4 37691.93 ≤ 20
5 18 Male Catholic 169 68.9 38422.71 ≤ 20
6 18 Male Other 192 92.2 34460.44 ≤ 20
options(scipen = 10)
$bmi <- df_clean$weight / (df_clean$height * 0.01)^2
df_clean
$religion <- factor(df_clean$religion)
df_clean<- levels(df_clean$religion)
labels
::xyplot(x = bmi ~ salary | group_age + gender,
latticegroups = religion,
col = adjustcolor(col = c("red", "black", "darkgreen", "blue", "purple"),
alpha.f = 0.5),
pch = c(15, 17, 18, 19, 8),
origin = 0,
ylim = c(15, 31),
xlim = c(0, 80000),
xlab = "Salary (USD)",
ylab = "BMI index",
main = paste0("Phân bố mức lương theo chỉ số BMI, giới tính và nhóm tuổi (n = ",
dim(df_clean)[1], " sinh viên)"),
key = list(space = "top",
columns = 5,
title = "Chú thích",
points=list(pch = c(15, 17, 18, 19, 8),
col = adjustcolor(col = c("red", "black", "darkgreen", "blue", "purple"),
alpha.f = 1)),
text = list(labels)),
scale = list(alternating = 3, rot = 0),
panel = function(...) {
panel.abline(h = 30, col = "red", lty = 2)
panel.abline(h = 18.5, col = "red", lty = 2)
panel.abline(v = 20000, col = "blue", lty = 2)
panel.abline(v = 60000, col = "blue", lty = 2)
panel.xyplot(...)
},data = df_clean)
4.4.3 Vẽ đồ thị histogram
<- hist(df$height,
height_data col = "lightyellow",
main = "Histogram of students' height",
xlab = "Height (cm)",
ylab = "Density",
xlim = c(120, 220),
ylim = c(0, 0.04),
probability = TRUE)
lines(density(df$height),
col = "red",
lty = 1,
lwd = 2)
<- seq(from = min(df$height, na.rm = TRUE),
day_so to = max(df$height, na.rm = TRUE),
length = 1000)
curve(expr = dnorm(day_so,
mean = mean(df$height, na.rm = TRUE),
sd = sd(df$height, na.rm = TRUE)),
type = "l",
add = TRUE,
lwd = 2,
xname = "day_so",
col = "blue",
lty = "dotted")
legend(x = "topright",
y = NULL,
legend = c("kernel density", "normal curve"),
col = c("red", "blue"),
lwd = 2,
lty = c(1, 2))
height_data
$breaks
[1] 135 140 145 150 155 160 165 170 175 180 185 190 195 200 205 210
$counts
[1] 7 43 167 402 862 1111 1314 1284 1206 940 593 240 56 12 2
$density
[1] 1.699235e-04 1.043816e-03 4.053890e-03 9.758466e-03 2.092487e-02 2.696929e-02 3.189707e-02 3.116883e-02 2.927540e-02 2.281830e-02 1.439495e-02 5.825950e-03 1.359388e-03 2.912975e-04 4.854958e-05
$mids
[1] 137.5 142.5 147.5 152.5 157.5 162.5 167.5 172.5 177.5 182.5 187.5 192.5 197.5 202.5 207.5
$xname
[1] "df$height"
$equidist
[1] TRUE
attr(,"class")
[1] "histogram"