data.frame(col1 = 1:10)
## col1
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 7 7
## 8 8
## 9 9
## 10 10
sqrt(100)
## [1] 10
plot(1:10)data.frame
## function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE,
## fix.empty.names = TRUE, stringsAsFactors = FALSE)
## {
## data.row.names <- if (check.rows && is.null(row.names))
## function(current, new, i) {
## if (is.character(current))
## new <- as.character(new)
## if (is.character(new))
## current <- as.character(current)
## if (anyDuplicated(new))
## return(current)
## if (is.null(current))
## return(new)
## if (all(current == new) || all(current == ""))
## return(new)
## stop(gettextf("mismatch of row names in arguments of 'data.frame', item %d",
## i), domain = NA)
## }
## else function(current, new, i) {
## current %||% if (anyDuplicated(new)) {
## warning(gettextf("some row.names duplicated: %s --> row.names NOT used",
## paste(which(duplicated(new)), collapse = ",")),
## domain = NA)
## current
## }
## else new
## }
## object <- as.list(substitute(list(...)))[-1L]
## mirn <- missing(row.names)
## mrn <- is.null(row.names)
## x <- list(...)
## n <- length(x)
## if (n < 1L) {
## if (!mrn) {
## if (is.object(row.names) || !is.integer(row.names))
## row.names <- as.character(row.names)
## if (anyNA(row.names))
## stop("row names contain missing values")
## if (anyDuplicated(row.names))
## stop(gettextf("duplicate row.names: %s", paste(unique(row.names[duplicated(row.names)]),
## collapse = ", ")), domain = NA)
## }
## else row.names <- integer()
## return(structure(list(), names = character(), row.names = row.names,
## class = "data.frame"))
## }
## vnames <- names(x)
## if (length(vnames) != n)
## vnames <- character(n)
## no.vn <- !nzchar(vnames)
## vlist <- vnames <- as.list(vnames)
## nrows <- ncols <- integer(n)
## for (i in seq_len(n)) {
## xi <- if (is.character(x[[i]]) || is.list(x[[i]]))
## as.data.frame(x[[i]], optional = TRUE, stringsAsFactors = stringsAsFactors)
## else as.data.frame(x[[i]], optional = TRUE)
## nrows[i] <- .row_names_info(xi)
## ncols[i] <- length(xi)
## namesi <- names(xi)
## if (ncols[i] > 1L) {
## if (length(namesi) == 0L)
## namesi <- seq_len(ncols[i])
## vnames[[i]] <- if (no.vn[i])
## namesi
## else paste(vnames[[i]], namesi, sep = ".")
## }
## else if (length(namesi)) {
## vnames[[i]] <- namesi
## }
## else if (fix.empty.names && no.vn[[i]]) {
## tmpname <- deparse(object[[i]], nlines = 1L)[1L]
## if (startsWith(tmpname, "I(") && endsWith(tmpname,
## ")")) {
## ntmpn <- nchar(tmpname, "c")
## tmpname <- substr(tmpname, 3L, ntmpn - 1L)
## }
## vnames[[i]] <- tmpname
## }
## if (mirn && nrows[i] > 0L) {
## rowsi <- attr(xi, "row.names")
## if (any(nzchar(rowsi)))
## row.names <- data.row.names(row.names, rowsi,
## i)
## }
## nrows[i] <- abs(nrows[i])
## vlist[[i]] <- xi
## }
## nr <- max(nrows)
## for (i in seq_len(n)[nrows < nr]) {
## xi <- vlist[[i]]
## if (nrows[i] > 0L && (nr%%nrows[i] == 0L)) {
## xi <- unclass(xi)
## fixed <- TRUE
## for (j in seq_along(xi)) {
## xi1 <- xi[[j]]
## if (is.vector(xi1) || is.factor(xi1))
## xi[[j]] <- rep(xi1, length.out = nr)
## else if (is.character(xi1) && inherits(xi1, "AsIs"))
## xi[[j]] <- structure(rep(xi1, length.out = nr),
## class = class(xi1))
## else if (inherits(xi1, "Date") || inherits(xi1,
## "POSIXct"))
## xi[[j]] <- rep(xi1, length.out = nr)
## else {
## fixed <- FALSE
## break
## }
## }
## if (fixed) {
## vlist[[i]] <- xi
## next
## }
## }
## stop(gettextf("arguments imply differing number of rows: %s",
## paste(unique(nrows), collapse = ", ")), domain = NA)
## }
## value <- unlist(vlist, recursive = FALSE, use.names = FALSE)
## vnames <- as.character(unlist(vnames[ncols > 0L]))
## if (fix.empty.names && any(noname <- !nzchar(vnames)))
## vnames[noname] <- paste0("Var.", seq_along(vnames))[noname]
## if (check.names) {
## if (fix.empty.names)
## vnames <- make.names(vnames, unique = TRUE)
## else {
## nz <- nzchar(vnames)
## vnames[nz] <- make.names(vnames[nz], unique = TRUE)
## }
## }
## names(value) <- vnames
## if (!mrn) {
## if (length(row.names) == 1L && nr != 1L) {
## if (is.character(row.names))
## row.names <- match(row.names, vnames, 0L)
## if (length(row.names) != 1L || row.names < 1L ||
## row.names > length(vnames))
## stop("'row.names' should specify one of the variables")
## i <- row.names
## row.names <- value[[i]]
## value <- value[-i]
## }
## else if (!is.null(row.names) && length(row.names) !=
## nr)
## stop("row names supplied are of the wrong length")
## }
## else if (!is.null(row.names) && length(row.names) != nr) {
## warning("row names were found from a short variable and have been discarded")
## row.names <- NULL
## }
## class(value) <- "data.frame"
## if (is.null(row.names))
## attr(value, "row.names") <- .set_row_names(nr)
## else {
## if (is.object(row.names) || !is.integer(row.names))
## row.names <- as.character(row.names)
## if (anyNA(row.names))
## stop("row names contain missing values")
## if (anyDuplicated(row.names))
## stop(gettextf("duplicate row.names: %s", paste(unique(row.names[duplicated(row.names)]),
## collapse = ", ")), domain = NA)
## row.names(value) <- row.names
## }
## value
## }
## <bytecode: 0x64cb926013b8>
## <environment: namespace:base>How to write functions
Why to write functions
How to share functions
Each function is defined by its name, its arguments, and its body
Each function is defined by its name, its arguments, and its body
Each function can accept multiple arguments
Each function can accept multiple arguments
Each function can only return one thing
Solution: create a vector with both roots, and return the vector.
Functions can call on other functions
library(tidyverse)
make_scatterplot <- function(dataframe, x_col, y_col) {
# Note that this function makes use of {{}} for Tidy Evaluation
# See https://r4ds.hadley.nz/functions.html#indirection-and-tidy-evaluation for details
ggplot(dataframe, aes(x = {{x_col}}, y = {{y_col}})) +
geom_point()
}
library(palmerpenguins)
make_scatterplot(penguins, bill_length_mm, bill_depth_mm)Functions break if provided more arguments than they know what to do with
Functions break if provided more arguments than they know what to do with
... argument enables additional arguments to be passed to functionsmake_scatterplot <- function(dataframe, x_col, y_col) {
ggplot(dataframe, aes(x = {{x_col}}, y = {{y_col}})) +
geom_point()
}
make_scatterplot(penguins, bill_length_mm, bill_depth_mm, size = 4, color = "island")
## Error in make_scatterplot(penguins, bill_length_mm, bill_depth_mm, size = 4, : unused arguments (size = 4, color = "island")Functions break if provided more arguments than they know what to do with
... argument enables additional arguments to be passed to functionsFunctions allow you to minimize repetition
Functions allow a set of logic to be applied across contexts
Functions help achieve abstraction of details
run_model() so that your reader knows exactly what you are trying to achieveWriting a function has four big advantages over using copy-and-paste:
You can give a function an evocative name that makes your code easier to understand.
As requirements change, you only need to update code in one place, instead of many.
You eliminate the chance of making incidental mistakes when you copy and paste (i.e. updating a variable name in one place, but not in another).
It makes it easier to reuse work from project-to-project, increasing your productivity over time.
But if we anticipate wider usage, we need to practice “defensive” programming
But if we anticipate wider usage, we need to practice “defensive” programming
Building in “checks” into the function
If we anticipate wider usage, build in lots of “checks”
divide <- function(numerator = NULL, denominator = NULL) {
if(is.null(numerator) | is.null(denominator)) {
stop("Please define a numerator and a denominator")
}
if(denominator == 0) {
stop("Dividing by 0 is not allowed")
}
if(!is.numeric(numerator) | !is.numeric(denominator)) {
stop("Both numerator and denominator need to be numeric")
}
numerator/denominator
}
divide(3,'four')
## Error in divide(3, "four"): Both numerator and denominator need to be numeric
divide(3)
## Error in divide(3): Please define a numerator and a denominator
divide(3, 0)
## Error in divide(3, 0): Dividing by 0 is not allowedIf we anticipate wider usage, document everything
# Define a function called divide that divides the numerator by the denominator
# This function accepts two arguments; if unnamed, the first is treated as the
# numerator and the second is treated as the denominator
# Assuming the division is valid (e.g. no division by zero), this function returns a number
divide <- function(numerator = NULL, denominator = NULL) {
if(is.null(numerator) | is.null(denominator)) {
stop("Please define a numerator and a denominator")
}
if(length(numerator) != 1 | length(denominator) != 1) {
stop("The numerator and denominator should each be exactly one number")
}
if(denominator == 0) {
stop("Dividing by 0 is not allowed")
}
if(!is.numeric(numerator) | !is.numeric(denominator)) {
stop("Both numerator and denominator need to be numeric")
}
numerator/denominator
}
divide(3,'four')
## Error in divide(3, "four"): Both numerator and denominator need to be numeric
divide(3)
## Error in divide(3): Please define a numerator and a denominator
divide(3, 0)
## Error in divide(3, 0): Dividing by 0 is not allowed
divide(c(3,10),4)
## Error in divide(c(3, 10), 4): The numerator and denominator should each be exactly one numberThe easiest way to share functions is by bundling related functions into a package
This could be the topic of a whole “Crash course” in itself, so we won’t cover in depth here.
See https://r-pkgs.org/ for a comprehensive guide
Good packages reach thousands of people - easy way to have a big impact!