Submission

Submit you R script as a .R (or .Rmd if using markdown) file to Brightspace

Please make sure your submission includes your name and the assignment number in the filename

Grading

You should always be following best coding practices (see Intro to R module 1) but especially for assingment submissions.

To receive full credit for each assignment

  • Wherever possible prioritizxe the use of tidyverse functions
  • Please make sure each problem has its own header so that I can easily navigate to your answers
  • Ensure you have comments that explain what you are doing
  • Long code chunks should be broken up with spaces and comments to explain what is happening at each step
  • Object names should be lowercase and short but descriptive enough that they aren’t confused with other objects (For example data1 and data2 are not good names for dataframes you are working with)
  • Just because your code runs doesn’t mean it did what you think it did, always check your data/objects to ensure any functions were performed correctly (there are several ways to do this)

1 Remove NAs

  • Read in the turtles_tidy data
  • In the same code chunk remove all rows with NAs
  • Assign this new data to the environment as “turtles_no_na”
# code to read in turtles data from earlier
turtles_no_na <- read_csv('data/processed/turtles_tidy.csv') %>% 
  
  # change sex to a factor
  mutate(sex = as.factor(sex),
         sex = recode(sex, 
                       fem = 'female')) %>% 
  
  # remove rows with NAs
  na.omit()

turtles_no_na
## # A tibble: 15 × 5
##      tag sex    c_length h_width weight
##    <dbl> <fct>     <dbl>   <dbl>  <dbl>
##  1    10 male       41      7.15    7.6
##  2    11 female     46.4    8.18   11  
##  3     3 female     42.8    7.32    8.6
##  4     4 male       40      6.6     6.5
##  5     5 female     45      8.05   10.9
##  6    12 female     44      7.55    8.9
##  7     6 female     40      6.53    6.2
##  8     9 male       35      5.74    3.9
##  9    17 female     35.1    6.04    4.5
## 10    19 male       42.3    6.77    7.8
## 11    22 female     48.1    8.55   12.8
## 12   105 male       44      7.1     9  
## 13    14 male       43      6.6     7.2
## 14     7 female     48      8.67   13.5
## 15   104 male       44      7.35    9

2 Make a new variable

Using the turtles_no_na data, make a new variable called “size_class” based on the “weight” variable using case_when() whereby

  • weights less than 4 are juvenile

  • weights greater than 7 are adult

  • weights between 4 and 7 are subadult

(There are multiple ways to do this which is why there are multiple printouts, but they will yield the same answer)

turtles_no_na <- turtles_no_na %>% 
  mutate(size_class = case_when(
    weight < 4 ~ 'juvenile',
    weight > 7 ~ 'adult',
    TRUE  ~ 'subadult'
  ))

turtles_no_na$size_class
##  [1] "adult"    "adult"    "adult"    "subadult" "adult"    "adult"   
##  [7] "subadult" "juvenile" "subadult" "adult"    "adult"    "adult"   
## [13] "adult"    "adult"    "adult"
# alternatively
turtles_no_na <- turtles_no_na %>% 
  mutate(size_class = case_when(
    weight < 4 ~ 'juvenile',
    weight > 7 ~ 'adult',
    weight >= 4 & weight <= 7 ~ 'subadult'
  ))

turtles_no_na$size_class
##  [1] "adult"    "adult"    "adult"    "subadult" "adult"    "adult"   
##  [7] "subadult" "juvenile" "subadult" "adult"    "adult"    "adult"   
## [13] "adult"    "adult"    "adult"

3 Replace values with NA

In the turtles_tidy data (not the turtles_no_na data) replace ALL variable values (except the tag column) for tags 104 and 105 with NAs. To compare your answer, I have printed the last few rows only to show the change to tags 104 & 105

Hint you will need to create a vector for that identifies the tag numbers you want to replace and use mutate() with another function/s to do this

# read data
turtles_tidy <- read_csv('data/processed/turtles_tidy.csv')
## Rows: 21 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): sex
## dbl (4): tag, c_length, h_width, weight
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# list of tags we do not trust the data for
bad_tags <- c(104, 105)

turtles_tidy <- turtles_tidy %>% 
  mutate(
    sex = replace(sex,
                  tag %in% bad_tags,
                  NA),
    c_length = replace(c_length,
                       tag %in% bad_tags,
                       NA),
    h_width = replace(h_width,
                      tag %in% bad_tags,
                      NA),
    weight = replace(weight,
                     tag %in% bad_tags,
                     NA))

tail(turtles_tidy)
## # A tibble: 6 × 5
##     tag sex    c_length h_width weight
##   <dbl> <chr>     <dbl>   <dbl>  <dbl>
## 1    22 female     48.1    8.55  12.8 
## 2   105 <NA>       NA     NA     NA   
## 3    14 male       43      6.6    7.2 
## 4     7 female     48      8.67  13.5 
## 5     1 <NA>       29.2    5.1    2.38
## 6   104 <NA>       NA     NA     NA
# or... use some more tidyverse helper functions and tricks!


turtles_tidy <-turtles_tidy %>% 
  mutate(across(
    c("sex","c_length","h_width", "weight"),
    ~replace(.x,
             tag %in% bad_tags,
             NA)))

tail(turtles_tidy)
## # A tibble: 6 × 5
##     tag sex    c_length h_width weight
##   <dbl> <chr>     <dbl>   <dbl>  <dbl>
## 1    22 female     48.1    8.55  12.8 
## 2   105 <NA>       NA     NA     NA   
## 3    14 male       43      6.6    7.2 
## 4     7 female     48      8.67  13.5 
## 5     1 <NA>       29.2    5.1    2.38
## 6   104 <NA>       NA     NA     NA
# and without specifying all the columns
turtles_tidy <-turtles_tidy %>% 
  mutate(across(
    .cols = -tag,
    ~replace(.x,
             tag %in% bad_tags,
             NA)))

# and finally without making a vector for the tags

turtles_tidy %>%
  
  # mutate across rows, except for the tag column
  mutate(across(-tag, 
                
                # change all variables values for tags 104 and 105 to NAs 
                ~ if_else(tag %in% c(104, 105), NA, .)))
## # A tibble: 21 × 5
##      tag sex    c_length h_width weight
##    <dbl> <chr>     <dbl>   <dbl>  <dbl>
##  1    10 male       41      7.15   7.6 
##  2    11 female     46.4    8.18  11   
##  3     2 <NA>       24.3    4.42   1.65
##  4    15 <NA>       28.7    4.89   2.18
##  5    16 <NA>       32      5.37   3   
##  6     3 female     42.8    7.32   8.6 
##  7     4 male       40      6.6    6.5 
##  8     5 female     45      8.05  10.9 
##  9    12 female     44      7.55   8.9 
## 10    13 <NA>       28      4.85   1.97
## # ℹ 11 more rows

4 Pivot data

Use the below code to read in the Soils data from the carData package

# Load the example data 

soil <- carData::Soils    # load example data
  • print the first few lines of data in “soil”

  • Pivot the data so that columns Ca - Na are contained in one column called nutrients (again there are two possible solutions (really more than that but two I expect people to use))

#See what variables it contains...
head(soil)  
##   Group Contour Depth Gp Block   pH     N Dens   P    Ca   Mg    K   Na Conduc
## 1     1     Top  0-10 T0     1 5.40 0.188 0.92 215 16.35 7.65 0.72 1.14   1.09
## 2     1     Top  0-10 T0     2 5.65 0.165 1.04 208 12.25 5.15 0.71 0.94   1.35
## 3     1     Top  0-10 T0     3 5.14 0.260 0.95 300 13.02 5.68 0.68 0.60   1.41
## 4     1     Top  0-10 T0     4 5.14 0.169 1.10 248 11.92 7.88 1.09 1.01   1.64
## 5     2     Top 10-30 T1     1 5.14 0.164 1.12 174 14.17 8.12 0.70 2.17   1.85
## 6     2     Top 10-30 T1     2 5.10 0.094 1.22 129  8.55 6.92 0.81 2.67   3.18
# Use 'tidyverse' to reshape the data 

soil_nutrient <- pivot_longer(soil, 
                         cols = c(Ca,Mg,K, Na), 
                         names_to = 'nutrient',
                         values_to = 'value')

soil_nutrient
## # A tibble: 192 × 12
##    Group Contour Depth Gp    Block    pH     N  Dens     P Conduc nutrient value
##    <fct> <fct>   <fct> <fct> <fct> <dbl> <dbl> <dbl> <int>  <dbl> <chr>    <dbl>
##  1 1     Top     0-10  T0    1      5.4  0.188  0.92   215   1.09 Ca       16.4 
##  2 1     Top     0-10  T0    1      5.4  0.188  0.92   215   1.09 Mg        7.65
##  3 1     Top     0-10  T0    1      5.4  0.188  0.92   215   1.09 K         0.72
##  4 1     Top     0-10  T0    1      5.4  0.188  0.92   215   1.09 Na        1.14
##  5 1     Top     0-10  T0    2      5.65 0.165  1.04   208   1.35 Ca       12.2 
##  6 1     Top     0-10  T0    2      5.65 0.165  1.04   208   1.35 Mg        5.15
##  7 1     Top     0-10  T0    2      5.65 0.165  1.04   208   1.35 K         0.71
##  8 1     Top     0-10  T0    2      5.65 0.165  1.04   208   1.35 Na        0.94
##  9 1     Top     0-10  T0    3      5.14 0.26   0.95   300   1.41 Ca       13.0 
## 10 1     Top     0-10  T0    3      5.14 0.26   0.95   300   1.41 Mg        5.68
## # ℹ 182 more rows
# alternatively
soil_nutrient <- pivot_longer(soil, 
                         cols = Ca:Na, 
                         names_to = 'nutrient',
                         values_to = 'value')

soil_nutrient
## # A tibble: 192 × 12
##    Group Contour Depth Gp    Block    pH     N  Dens     P Conduc nutrient value
##    <fct> <fct>   <fct> <fct> <fct> <dbl> <dbl> <dbl> <int>  <dbl> <chr>    <dbl>
##  1 1     Top     0-10  T0    1      5.4  0.188  0.92   215   1.09 Ca       16.4 
##  2 1     Top     0-10  T0    1      5.4  0.188  0.92   215   1.09 Mg        7.65
##  3 1     Top     0-10  T0    1      5.4  0.188  0.92   215   1.09 K         0.72
##  4 1     Top     0-10  T0    1      5.4  0.188  0.92   215   1.09 Na        1.14
##  5 1     Top     0-10  T0    2      5.65 0.165  1.04   208   1.35 Ca       12.2 
##  6 1     Top     0-10  T0    2      5.65 0.165  1.04   208   1.35 Mg        5.15
##  7 1     Top     0-10  T0    2      5.65 0.165  1.04   208   1.35 K         0.71
##  8 1     Top     0-10  T0    2      5.65 0.165  1.04   208   1.35 Na        0.94
##  9 1     Top     0-10  T0    3      5.14 0.26   0.95   300   1.41 Ca       13.0 
## 10 1     Top     0-10  T0    3      5.14 0.26   0.95   300   1.41 Mg        5.68
## # ℹ 182 more rows

5 Join Data

If you haven’t already download the 3 bobcat data files

Bobcat collection data for Purrr (bobcat_collection_data.csv)

Bobcat necropsy data for Purrr (bobcat_necropsy_only_data.csv)

Bobcat age data for Purrr (bobcat_age_data.csv)

  • Read in the data files using the tidyverse function

  • In the same code chunk, set the column names to lowercase for all 3 data sets AND rename the ‘Bobcat_ID#’ column to bobcat_id (NOTE: this requires a lot of code repition which is annoying and does not follow best coding practices, we will learn a much better way to do this when we cover Purrr)

  • Use the csv file names as the object names when you assign them to the environment - Make a list with the three data sets and check their internal structure (there are multiple ways to do this)

  • Join the bobcat_necropsy_only_data to the bobcat_collection_data AND then in the same code chunk join the bobcat_age_data as well. Make sure to retain all observations from the bobcat_collection_data. You will need to use the bobcat_id column as the key when joining

  • Print the summary of your data to check that it worked
    ```{recho=TRUE, class.source = ‘fold-hide’, message=FALSE}

read in data files

bobcat_collection_data <- read_csv(‘data/raw/bobcat_collection_data.csv’) %>%

# set names to lowercase set_names( names(.) %>% tolower()) %>%

# change bobcats id# to better name rename(., ‘bobcat_id’ = ‘bobcat_id#’)

bobcat_necropsy_only_data <- read_csv(‘data/raw/bobcat_necropsy_only_data.csv’) %>%

# set names to lowercase set_names( names(.) %>% tolower()) %>%

# change bobcats id# to better name
  rename(.,
         'bobcat_id' = 'bobcat_id#')

bobcat_age_data <- read_csv(‘data/raw/bobcat_age_data.csv’) %>%

# set names to lowercase set_names( names(.) %>% tolower()) %>%

# change bobcats id# to better name
  rename(.,
         'bobcat_id' = 'bobcat_id#')

or simpler code

read in data files

bobcat_collection_data <- read_csv(‘data/raw/bobcat_collection_data.csv’) %>%

# set names to lowercase rename_all(tolower) %>%

# change bobcats id# to better name rename(., ‘bobcat_id’ = ‘bobcat_id#’)

bobcat_necropsy_only_data <- read_csv(‘data/raw/bobcat_necropsy_only_data.csv’) %>%

# set names to lowercase rename_all(tolower) %>%

# change bobcats id# to better name
  rename(.,
         'bobcat_id' = 'bobcat_id#')

bobcat_age_data <- read_csv(‘data/raw/bobcat_age_data.csv’) %>%

# set names to lowercase rename_all(tolower) %>%

# change bobcats id# to better name
  rename(.,
         'bobcat_id' = 'bobcat_id#')

make a list and check internal structure

option 1 - nested code

str(list(bobcat_collection_data, bobcat_necropsy_only_data, bobcat_age_data))

option 2 - with dplyr

list(bobcat_collection_data, bobcat_necropsy_only_data, bobcat_age_data) %>%

str(.)

join data

bobcat_data_joined <- bobcat_collection_data %>%

# join necropsy data left_join(bobcat_necropsy_only_data, by = ‘bobcat_id’) %>%

# join age data left_join(bobcat_age_data, by = ‘bobcat_id’)