Submit you R script as a .R (or .Rmd if using markdown) file to Brightspace.
Please make sure your submission includes your name and the assignment number in the filename
You should always be following best coding practices (see Intro to R module 1) but especially for assingment submissions.
To receive full credit for each assignment
Save the altered turtles data as a comma separated file to the data/processed folder in your working directory using the ‘readr’ package and name it ‘turtles_tidy’
# includes code for altering turtle data from the lab script
# read in turtle data from .txt file using tidyverse read_delim() function
turtles.df <- read_delim('data/raw/turtle_data.txt',
delim = '\t') %>%
# set column names to lowercase for easier coding later
set_names(
names(.) %>%
tolower()) %>%
# rename columns to shorter names for easier coding later, code reads (new_name = old_name)
rename(tag = tag_number,
c_length = carapace_length,
h_width = head_width) %>%
# change the sex variable to type factor
mutate(sex = as.factor(sex))
# answer 1
# save the new clean data as a csv file using tidyverse write_csv() function. first provide the object name and then the file path (folder and subfolders), name (the file name), and extension (.csv) you want to save it as in quotes
write_csv(turtles.df,
'data/processed/turtles_tidy.csv')
First if you haven’t, download the brown bear damage data (bear_2008_2016.csv) and save it in the data/raw folder.
# read in bear data and assign to env. as bear_data
bear_data <- read_csv('data/raw/bear_2008_2016.csv') %>%
# set column names to lowercase
rename_with(tolower) %>%
# rename columns to include units
rename(m_to_town = dist_to_town,
m_to_forest = dist_to_forest)
## Rows: 3024 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Targetspp
## dbl (24): Damage, Year, Month, POINT_X, POINT_Y, Bear_abund, Landcover_code,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# view data to check changes
head(bear_data)
## # A tibble: 6 × 25
## damage year month targetspp point_x point_y bear_abund landcover_code
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 0 2008 0 <NA> 542583. 532675. 38 311
## 2 0 2008 0 <NA> 540963. 528113. 40 311
## 3 0 2008 0 <NA> 542435. 510385. 38 211
## 4 0 2008 0 <NA> 541244. 507297. 38 231
## 5 0 2008 0 <NA> 542791. 497755. 32 211
## 6 0 2008 0 <NA> 544639. 610454. 32 313
## # ℹ 17 more variables: altitude <dbl>, human_population <dbl>,
## # m_to_forest <dbl>, m_to_town <dbl>, livestock_killed <dbl>,
## # numberdamageperplot <dbl>, shannondivindex <dbl>, prop_arable <dbl>,
## # prop_orchards <dbl>, prop_pasture <dbl>, prop_ag_mosaic <dbl>,
## # prop_seminatural <dbl>, prop_deciduous <dbl>, prop_coniferous <dbl>,
## # prop_mixedforest <dbl>, prop_grassland <dbl>, prop_for_regen <dbl>
# OR
bear_data <- read_csv('data/raw/bear_2008_2016.csv') %>%
# set column names to lowercase using the example from class which was written for purrr functions (sorry forgot to update)
set_names(
names(.) %>%
tolower()) %>%
# rename columns to include units
rename(m_to_town = dist_to_town,
m_to_forest = dist_to_forest)
## Rows: 3024 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Targetspp
## dbl (24): Damage, Year, Month, POINT_X, POINT_Y, Bear_abund, Landcover_code,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# view data
head(bear_data)
## # A tibble: 6 × 25
## damage year month targetspp point_x point_y bear_abund landcover_code
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 0 2008 0 <NA> 542583. 532675. 38 311
## 2 0 2008 0 <NA> 540963. 528113. 40 311
## 3 0 2008 0 <NA> 542435. 510385. 38 211
## 4 0 2008 0 <NA> 541244. 507297. 38 231
## 5 0 2008 0 <NA> 542791. 497755. 32 211
## 6 0 2008 0 <NA> 544639. 610454. 32 313
## # ℹ 17 more variables: altitude <dbl>, human_population <dbl>,
## # m_to_forest <dbl>, m_to_town <dbl>, livestock_killed <dbl>,
## # numberdamageperplot <dbl>, shannondivindex <dbl>, prop_arable <dbl>,
## # prop_orchards <dbl>, prop_pasture <dbl>, prop_ag_mosaic <dbl>,
## # prop_seminatural <dbl>, prop_deciduous <dbl>, prop_coniferous <dbl>,
## # prop_mixedforest <dbl>, prop_grassland <dbl>, prop_for_regen <dbl>
# check how variables read in for the data
str(bear_data)
## spc_tbl_ [3,024 × 25] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ damage : num [1:3024] 0 0 0 0 0 0 0 0 0 0 ...
## $ year : num [1:3024] 2008 2008 2008 2008 2008 ...
## $ month : num [1:3024] 0 0 0 0 0 0 0 0 0 0 ...
## $ targetspp : chr [1:3024] NA NA NA NA ...
## $ point_x : num [1:3024] 542583 540963 542435 541244 542791 ...
## $ point_y : num [1:3024] 532675 528113 510385 507297 497755 ...
## $ bear_abund : num [1:3024] 38 40 38 38 32 32 37 45 45 36 ...
## $ landcover_code : num [1:3024] 311 311 211 231 211 313 313 312 324 312 ...
## $ altitude : num [1:3024] 800 666 504 553 470 ...
## $ human_population : num [1:3024] 2 0 2 0 0 0 0 0 0 0 ...
## $ m_to_forest : num [1:3024] 0 0 1829 398 942 ...
## $ m_to_town : num [1:3024] 2398 2441 571 1598 1068 ...
## $ livestock_killed : num [1:3024] 0 0 0 0 0 0 0 0 0 0 ...
## $ numberdamageperplot: num [1:3024] 0 0 0 0 0 0 0 0 0 0 ...
## $ shannondivindex : num [1:3024] 0.963 1.119 1.056 1.506 1.067 ...
## $ prop_arable : num [1:3024] 0 0 65.6 27.3 53.2 ...
## $ prop_orchards : num [1:3024] 0 0 0 0 0 0 0 0 0 0 ...
## $ prop_pasture : num [1:3024] 0 25.3 11.13 20 4.27 ...
## $ prop_ag_mosaic : num [1:3024] 0 0 8.55 0 0 ...
## $ prop_seminatural : num [1:3024] 16.3 30.6 0 19.8 0 ...
## $ prop_deciduous : num [1:3024] 57.796 43.095 0 0.769 32.454 ...
## $ prop_coniferous : num [1:3024] 0 0 0 0 0 ...
## $ prop_mixedforest : num [1:3024] 0 0 0 0 0 ...
## $ prop_grassland : num [1:3024] 0 1 0 0 0 ...
## $ prop_for_regen : num [1:3024] 25.9 0 13.5 0 0 ...
## - attr(*, "spec")=
## .. cols(
## .. Damage = col_double(),
## .. Year = col_double(),
## .. Month = col_double(),
## .. Targetspp = col_character(),
## .. POINT_X = col_double(),
## .. POINT_Y = col_double(),
## .. Bear_abund = col_double(),
## .. Landcover_code = col_double(),
## .. Altitude = col_double(),
## .. Human_population = col_double(),
## .. Dist_to_forest = col_double(),
## .. Dist_to_town = col_double(),
## .. Livestock_killed = col_double(),
## .. Numberdamageperplot = col_double(),
## .. ShannonDivIndex = col_double(),
## .. prop_arable = col_double(),
## .. prop_orchards = col_double(),
## .. prop_pasture = col_double(),
## .. prop_ag_mosaic = col_double(),
## .. prop_seminatural = col_double(),
## .. prop_deciduous = col_double(),
## .. prop_coniferous = col_double(),
## .. prop_mixedforest = col_double(),
## .. prop_grassland = col_double(),
## .. prop_for_regen = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
# change targetspp to type factor with dplyr and mutate
bear_data <- bear_data %>%
# change variable to type factor
mutate(targetspp = as.factor(targetspp))
# check levels of factor variable
levels(bear_data$targetspp)
## [1] "alte" "bovine" "ovine"
bear_sheep_data <- bear_data %>%
# return only rows for sheep
filter(targetspp == 'ovine') %>%
# select specified columns
select(damage:month, bear_abund:altitude) # most parsimonious way to do it but you could have also done
# select(damage, year, month, bear_abund, landcover_code, altitude)
# or
# select(1:3, 7:9) I do not prefer this one as it's easy for someone reading your code to misinterpret and not change the column names if needed
head(bear_sheep_data)
## # A tibble: 6 × 6
## damage year month bear_abund landcover_code altitude
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 2008 9 40 324 570
## 2 1 2008 9 40 324 570
## 3 1 2008 8 36 321 1410
## 4 1 2008 8 22 321 1068
## 5 1 2008 9 40 112 516
## 6 1 2008 8 56 112 533
summarise()
function, calculate the mean, sd and SE (the formula
for SE is sd / sqrt(n)) for altitude. This last part might be tricky
at first but give it a try and remember you can always google things and
check the ‘help’ files; and if that fails ‘phone a friend or
me’bear_sheep_data %>%
# use summarise to calculate some summary statistics from the data, mean, sd, and SE
summarise(mean_alt = mean(altitude),
sd_alt = sd(altitude),
se_alt = sd_alt/sqrt(length(altitude)))
## # A tibble: 1 × 3
## mean_alt sd_alt se_alt
## <dbl> <dbl> <dbl>
## 1 699. 183. 12.6
# you could also look up the length for altitude in the console and type the number here directly but this is more flexible and readable
# or another possible answer
bear_sheep_data %>%
summarise(mean_alt = mean(altitude),
sd_alt = sd(altitude),
se_alt = sd_alt/sqrt(n()))
## # A tibble: 1 × 3
## mean_alt sd_alt se_alt
## <dbl> <dbl> <dbl>
## 1 699. 183. 12.6