|
| 1 | +--- |
| 2 | +author: Derek Devnich |
| 3 | +title: Programming in R |
| 4 | +--- |
| 5 | + |
1 | 6 | - [<span class="toc-section-number">1</span> Fundamentals (Week 1)](#fundamentals-week-1) |
2 | 7 | - [<span class="toc-section-number">1.1</span> Introduction to RStudio](#introduction-to-rstudio) |
3 | 8 | - [<span class="toc-section-number">1.2</span> Introduction to R](#introduction-to-r) |
@@ -859,20 +864,6 @@ See /scripts/curriculum.Rmd |
859 | 864 | } |
860 | 865 | ``` |
861 | 866 |
|
862 | | -5. Vectorize your tests |
863 | | - |
864 | | - ``` r |
865 | | - x <- 1:4 |
866 | | - |
867 | | - if (any(x < 2)) { |
868 | | - print("Some x less than 2") |
869 | | - } |
870 | | - |
871 | | - if (all(x < 2)){ |
872 | | - print("All x less than 2") |
873 | | - } |
874 | | - ``` |
875 | | - |
876 | 867 | ### Review Subsetting section |
877 | 868 |
|
878 | 869 | Subsetting is frequently an alternative to if-else statements in R |
@@ -936,7 +927,21 @@ x + z |
936 | 927 | all(a) |
937 | 928 | ``` |
938 | 929 |
|
939 | | -2. Can you detect missing data? |
| 930 | +2. Vectorize your tests |
| 931 | +
|
| 932 | + ``` r |
| 933 | + x <- 1:4 |
| 934 | +
|
| 935 | + if (any(x < 2)) { |
| 936 | + print("Some x less than 2") |
| 937 | + } |
| 938 | +
|
| 939 | + if (all(x < 2)){ |
| 940 | + print("All x less than 2") |
| 941 | + } |
| 942 | + ``` |
| 943 | +
|
| 944 | +3. Can you detect missing data? |
940 | 945 |
|
941 | 946 | ``` r |
942 | 947 | nan_vec <- c(1, 3, NaN) |
@@ -1222,77 +1227,63 @@ See data/curriculum.Rmd |
1222 | 1227 |
|
1223 | 1228 | ## Reading and writing data |
1224 | 1229 |
|
1225 | | -### Create sample data sets and write them to the \`processed\` directory |
| 1230 | +### How to find files |
1226 | 1231 |
|
1227 | | -1. Preliminaries |
| 1232 | +Get all regional files |
1228 | 1233 |
|
1229 | | - ``` r |
1230 | | - if (!dir.exists("../processed")) { |
1231 | | - dir.create("../processed") |
1232 | | - } |
| 1234 | +``` r |
| 1235 | +## Get matching files from the `processed` subdirectory |
| 1236 | +dir(path = ".", pattern = "gapminder_gdp.*.csv") |
| 1237 | +``` |
1233 | 1238 |
|
1234 | | - north_america <- c("Canada", "Mexico", "United States") |
1235 | | - ``` |
| 1239 | +### Read files using a for loop |
1236 | 1240 |
|
1237 | | -2. Version 1: Use `calcGDP` function |
| 1241 | +1. Read each file into a data frame and add it to a list |
1238 | 1242 |
|
1239 | 1243 | ``` r |
1240 | | - for (year in unique(gapminder$year)) { |
1241 | | - df <- calcGDP(gapminder, year = year, country = north_america) |
| 1244 | + ## Create an empty list |
| 1245 | + df_list <- list() |
1242 | 1246 |
|
1243 | | - ## Generate a file name. This will fail if "processed" doesn't exist |
1244 | | - fname <- paste("../processed/north_america_", as.character(year), ".csv", sep = "") |
| 1247 | + # Get list of files to read |
| 1248 | + file_names <- dir(path = "../data", pattern = "gapminder_gdp.*.csv") |
1245 | 1249 |
|
1246 | | - ## Write the file |
1247 | | - write.csv(x = df, file = fname, row.names = FALSE) |
| 1250 | + # Read files into data frames |
| 1251 | + for (f in file_names){ |
| 1252 | + df_list[[f]] <- read.csv(file = file.path("../data", f, stringsAsFactors = TRUE)) |
1248 | 1253 | } |
1249 | 1254 | ``` |
1250 | 1255 |
|
1251 | | -3. (Optional) Version 2: Bypass `calcGDP` function |
| 1256 | +2. Check our data for compatibility |
1252 | 1257 |
|
1253 | 1258 | ``` r |
1254 | | - for (year in unique(gapminder$year)) { |
1255 | | - df <- gapminder[gapminder$year == year, ] |
1256 | | - df <- df[df$country %in% north_america, ] |
1257 | | - fname <- paste("processed/north_america_", as.character(year), ".csv", sep="") |
1258 | | - write.csv(x = df, file = fname, row.names = FALSE) |
| 1259 | + # Check data frame dimensions |
| 1260 | + for (name in names(df_list)) { |
| 1261 | + print(name) |
| 1262 | + print(dim(df_list[[name]])) |
1259 | 1263 | } |
1260 | | - ``` |
1261 | | - |
1262 | | -### How to find files |
1263 | | - |
1264 | | -``` r |
1265 | | -## Get matching files from the `processed` subdirectory |
1266 | | -dir(path = "../processed", pattern = "north_america_[1-9]*.csv") |
1267 | | -``` |
1268 | 1264 |
|
1269 | | -### Read files using a for loop |
| 1265 | + # What's going on with the Americas? |
| 1266 | + for (name in names(df_list)) { |
| 1267 | + print(name) |
| 1268 | + print(dim(df_list[[name]])) |
| 1269 | + print(colnames(df_list[[name]])) |
| 1270 | + } |
| 1271 | + ``` |
1270 | 1272 |
|
1271 | | -1. Read each file into a data frame and add it to a list |
| 1273 | +3. Drop the continent column for Americas |
1272 | 1274 |
|
1273 | 1275 | ``` r |
1274 | | - ## Create an empty list |
1275 | | - df_list <- list() |
1276 | | - |
1277 | | - ## Get the locations of the matching files |
1278 | | - file_names <- dir(path = "../processed", pattern = "north_america_[1-9]*.csv") |
1279 | | - file_paths <- file.path("../processed", file_names) |
1280 | | - |
1281 | | - for (f in file_paths){ |
1282 | | - df_list[[f]] <- read.csv(f, stringsAsFactors = TRUE) |
1283 | | - } |
| 1276 | + americas <- df_list[["gapminder_gdp_americas.csv"]] |
| 1277 | + df_list[["gapminder_gdp_americas.csv"]] <- americas[, ! colnames(americas) %in% c("continent")] |
1284 | 1278 | ``` |
1285 | 1279 |
|
1286 | | -2. Access the list items to view the individual data frames |
| 1280 | +4. Concatenate data frames |
1287 | 1281 |
|
1288 | 1282 | ``` r |
1289 | | - length(df_list) |
1290 | | - names(df_list) |
1291 | | - lapply(df_list, length) |
1292 | | - df_list[["north_america_1952.csv"]] |
| 1283 | + df <- do.call(rbind, df_list) |
1293 | 1284 | ``` |
1294 | 1285 |
|
1295 | | -### Read files using apply |
| 1286 | +### (Optional) Read files using apply |
1296 | 1287 |
|
1297 | 1288 | 1. Instead of a for loop that handles each file individually, use a single vectorized function. |
1298 | 1289 |
|
|
0 commit comments