Submit you R script as a .R (or .Rmd if using markdown) file to Brightspace
Please make sure your submission includes your name and the assignment number in the filename
You should always be following best coding practices (see Intro to R module 1) but especially for assingment submissions.
To receive full credit for each assignment
# code to read in turtles data from earlier
turtles_no_na <- read_csv('data/processed/turtles_tidy.csv') %>%
# change sex to a factor
mutate(sex = as.factor(sex),
sex = recode(sex,
fem = 'female')) %>%
# remove rows with NAs
na.omit()
turtles_no_na
## # A tibble: 15 × 5
## tag sex c_length h_width weight
## <dbl> <fct> <dbl> <dbl> <dbl>
## 1 10 male 41 7.15 7.6
## 2 11 female 46.4 8.18 11
## 3 3 female 42.8 7.32 8.6
## 4 4 male 40 6.6 6.5
## 5 5 female 45 8.05 10.9
## 6 12 female 44 7.55 8.9
## 7 6 female 40 6.53 6.2
## 8 9 male 35 5.74 3.9
## 9 17 female 35.1 6.04 4.5
## 10 19 male 42.3 6.77 7.8
## 11 22 female 48.1 8.55 12.8
## 12 105 male 44 7.1 9
## 13 14 male 43 6.6 7.2
## 14 7 female 48 8.67 13.5
## 15 104 male 44 7.35 9
Using the turtles_no_na data, make a new variable called “size_class”
based on the “weight” variable using case_when()
whereby
weights less than 4 are juvenile
weights greater than 7 are adult
weights between 4 and 7 are subadult
(There are multiple ways to do this which is why there are multiple printouts, but they will yield the same answer)
turtles_no_na <- turtles_no_na %>%
mutate(size_class = case_when(
weight < 4 ~ 'juvenile',
weight > 7 ~ 'adult',
TRUE ~ 'subadult'
))
turtles_no_na$size_class
## [1] "adult" "adult" "adult" "subadult" "adult" "adult"
## [7] "subadult" "juvenile" "subadult" "adult" "adult" "adult"
## [13] "adult" "adult" "adult"
# alternatively
turtles_no_na <- turtles_no_na %>%
mutate(size_class = case_when(
weight < 4 ~ 'juvenile',
weight > 7 ~ 'adult',
weight >= 4 & weight <= 7 ~ 'subadult'
))
turtles_no_na$size_class
## [1] "adult" "adult" "adult" "subadult" "adult" "adult"
## [7] "subadult" "juvenile" "subadult" "adult" "adult" "adult"
## [13] "adult" "adult" "adult"
In the turtles_tidy data (not the turtles_no_na data) replace ALL variable values (except the tag column) for tags 104 and 105 with NAs. To compare your answer, I have printed the last few rows only to show the change to tags 104 & 105
Hint you will need to create a vector for that identifies the tag
numbers you want to replace and use mutate()
with another
function/s to do this
# read data
turtles_tidy <- read_csv('data/processed/turtles_tidy.csv')
## Rows: 21 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): sex
## dbl (4): tag, c_length, h_width, weight
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# list of tags we do not trust the data for
bad_tags <- c(104, 105)
turtles_tidy <- turtles_tidy %>%
mutate(
sex = replace(sex,
tag %in% bad_tags,
NA),
c_length = replace(c_length,
tag %in% bad_tags,
NA),
h_width = replace(h_width,
tag %in% bad_tags,
NA),
weight = replace(weight,
tag %in% bad_tags,
NA))
tail(turtles_tidy)
## # A tibble: 6 × 5
## tag sex c_length h_width weight
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 22 female 48.1 8.55 12.8
## 2 105 <NA> NA NA NA
## 3 14 male 43 6.6 7.2
## 4 7 female 48 8.67 13.5
## 5 1 <NA> 29.2 5.1 2.38
## 6 104 <NA> NA NA NA
# or... use some more tidyverse helper functions and tricks!
turtles_tidy <-turtles_tidy %>%
mutate(across(
c("sex","c_length","h_width", "weight"),
~replace(.x,
tag %in% bad_tags,
NA)))
tail(turtles_tidy)
## # A tibble: 6 × 5
## tag sex c_length h_width weight
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 22 female 48.1 8.55 12.8
## 2 105 <NA> NA NA NA
## 3 14 male 43 6.6 7.2
## 4 7 female 48 8.67 13.5
## 5 1 <NA> 29.2 5.1 2.38
## 6 104 <NA> NA NA NA
# and without specifying all the columns
turtles_tidy <-turtles_tidy %>%
mutate(across(
.cols = -tag,
~replace(.x,
tag %in% bad_tags,
NA)))
# and finally without making a vector for the tags
turtles_tidy %>%
# mutate across rows, except for the tag column
mutate(across(-tag,
# change all variables values for tags 104 and 105 to NAs
~ if_else(tag %in% c(104, 105), NA, .)))
## # A tibble: 21 × 5
## tag sex c_length h_width weight
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 10 male 41 7.15 7.6
## 2 11 female 46.4 8.18 11
## 3 2 <NA> 24.3 4.42 1.65
## 4 15 <NA> 28.7 4.89 2.18
## 5 16 <NA> 32 5.37 3
## 6 3 female 42.8 7.32 8.6
## 7 4 male 40 6.6 6.5
## 8 5 female 45 8.05 10.9
## 9 12 female 44 7.55 8.9
## 10 13 <NA> 28 4.85 1.97
## # ℹ 11 more rows
Use the below code to read in the Soils data from the carData package
# Load the example data
soil <- carData::Soils # load example data
print the first few lines of data in “soil”
Pivot the data so that columns Ca - Na are contained in one column called nutrients (again there are two possible solutions (really more than that but two I expect people to use))
#See what variables it contains...
head(soil)
## Group Contour Depth Gp Block pH N Dens P Ca Mg K Na Conduc
## 1 1 Top 0-10 T0 1 5.40 0.188 0.92 215 16.35 7.65 0.72 1.14 1.09
## 2 1 Top 0-10 T0 2 5.65 0.165 1.04 208 12.25 5.15 0.71 0.94 1.35
## 3 1 Top 0-10 T0 3 5.14 0.260 0.95 300 13.02 5.68 0.68 0.60 1.41
## 4 1 Top 0-10 T0 4 5.14 0.169 1.10 248 11.92 7.88 1.09 1.01 1.64
## 5 2 Top 10-30 T1 1 5.14 0.164 1.12 174 14.17 8.12 0.70 2.17 1.85
## 6 2 Top 10-30 T1 2 5.10 0.094 1.22 129 8.55 6.92 0.81 2.67 3.18
# Use 'tidyverse' to reshape the data
soil_nutrient <- pivot_longer(soil,
cols = c(Ca,Mg,K, Na),
names_to = 'nutrient',
values_to = 'value')
soil_nutrient
## # A tibble: 192 × 12
## Group Contour Depth Gp Block pH N Dens P Conduc nutrient value
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <int> <dbl> <chr> <dbl>
## 1 1 Top 0-10 T0 1 5.4 0.188 0.92 215 1.09 Ca 16.4
## 2 1 Top 0-10 T0 1 5.4 0.188 0.92 215 1.09 Mg 7.65
## 3 1 Top 0-10 T0 1 5.4 0.188 0.92 215 1.09 K 0.72
## 4 1 Top 0-10 T0 1 5.4 0.188 0.92 215 1.09 Na 1.14
## 5 1 Top 0-10 T0 2 5.65 0.165 1.04 208 1.35 Ca 12.2
## 6 1 Top 0-10 T0 2 5.65 0.165 1.04 208 1.35 Mg 5.15
## 7 1 Top 0-10 T0 2 5.65 0.165 1.04 208 1.35 K 0.71
## 8 1 Top 0-10 T0 2 5.65 0.165 1.04 208 1.35 Na 0.94
## 9 1 Top 0-10 T0 3 5.14 0.26 0.95 300 1.41 Ca 13.0
## 10 1 Top 0-10 T0 3 5.14 0.26 0.95 300 1.41 Mg 5.68
## # ℹ 182 more rows
# alternatively
soil_nutrient <- pivot_longer(soil,
cols = Ca:Na,
names_to = 'nutrient',
values_to = 'value')
soil_nutrient
## # A tibble: 192 × 12
## Group Contour Depth Gp Block pH N Dens P Conduc nutrient value
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <int> <dbl> <chr> <dbl>
## 1 1 Top 0-10 T0 1 5.4 0.188 0.92 215 1.09 Ca 16.4
## 2 1 Top 0-10 T0 1 5.4 0.188 0.92 215 1.09 Mg 7.65
## 3 1 Top 0-10 T0 1 5.4 0.188 0.92 215 1.09 K 0.72
## 4 1 Top 0-10 T0 1 5.4 0.188 0.92 215 1.09 Na 1.14
## 5 1 Top 0-10 T0 2 5.65 0.165 1.04 208 1.35 Ca 12.2
## 6 1 Top 0-10 T0 2 5.65 0.165 1.04 208 1.35 Mg 5.15
## 7 1 Top 0-10 T0 2 5.65 0.165 1.04 208 1.35 K 0.71
## 8 1 Top 0-10 T0 2 5.65 0.165 1.04 208 1.35 Na 0.94
## 9 1 Top 0-10 T0 3 5.14 0.26 0.95 300 1.41 Ca 13.0
## 10 1 Top 0-10 T0 3 5.14 0.26 0.95 300 1.41 Mg 5.68
## # ℹ 182 more rows
If you haven’t already download the 3 bobcat data files
Bobcat collection data for Purrr (bobcat_collection_data.csv)
Bobcat necropsy data for Purrr (bobcat_necropsy_only_data.csv)
Bobcat age data for Purrr (bobcat_age_data.csv)
Read in the data files using the tidyverse function
In the same code chunk, set the column names to lowercase for all 3 data sets AND rename the ‘Bobcat_ID#’ column to bobcat_id (NOTE: this requires a lot of code repition which is annoying and does not follow best coding practices, we will learn a much better way to do this when we cover Purrr)
Use the csv file names as the object names when you assign them
to the environment - Make a list with the three data sets and check
their internal structure (there are multiple ways to do
this)
Join the bobcat_necropsy_only_data to the bobcat_collection_data AND then in the same code chunk join the bobcat_age_data as well. Make sure to retain all observations from the bobcat_collection_data. You will need to use the bobcat_id column as the key when joining
Print the summary of your data to check that it worked
```{recho=TRUE, class.source = ‘fold-hide’, message=FALSE}
bobcat_collection_data <- read_csv(‘data/raw/bobcat_collection_data.csv’) %>%
# set names to lowercase set_names( names(.) %>% tolower()) %>%
# change bobcats id# to better name rename(., ‘bobcat_id’ = ‘bobcat_id#’)
bobcat_necropsy_only_data <- read_csv(‘data/raw/bobcat_necropsy_only_data.csv’) %>%
# set names to lowercase set_names( names(.) %>% tolower()) %>%
# change bobcats id# to better name
rename(.,
'bobcat_id' = 'bobcat_id#')
bobcat_age_data <- read_csv(‘data/raw/bobcat_age_data.csv’) %>%
# set names to lowercase set_names( names(.) %>% tolower()) %>%
# change bobcats id# to better name
rename(.,
'bobcat_id' = 'bobcat_id#')
bobcat_collection_data <- read_csv(‘data/raw/bobcat_collection_data.csv’) %>%
# set names to lowercase rename_all(tolower) %>%
# change bobcats id# to better name rename(., ‘bobcat_id’ = ‘bobcat_id#’)
bobcat_necropsy_only_data <- read_csv(‘data/raw/bobcat_necropsy_only_data.csv’) %>%
# set names to lowercase rename_all(tolower) %>%
# change bobcats id# to better name
rename(.,
'bobcat_id' = 'bobcat_id#')
bobcat_age_data <- read_csv(‘data/raw/bobcat_age_data.csv’) %>%
# set names to lowercase rename_all(tolower) %>%
# change bobcats id# to better name
rename(.,
'bobcat_id' = 'bobcat_id#')
str(list(bobcat_collection_data, bobcat_necropsy_only_data, bobcat_age_data))
list(bobcat_collection_data, bobcat_necropsy_only_data, bobcat_age_data) %>%
str(.)
bobcat_data_joined <- bobcat_collection_data %>%
# join necropsy data left_join(bobcat_necropsy_only_data, by = ‘bobcat_id’) %>%
# join age data left_join(bobcat_age_data, by = ‘bobcat_id’)
summary(bobcat_data_joined)
```