Writing functions in R

Things get done through functions

data.frame(col1 = 1:10)
##    col1
## 1     1
## 2     2
## 3     3
## 4     4
## 5     5
## 6     6
## 7     7
## 8     8
## 9     9
## 10   10

sqrt(100)
## [1] 10

plot(1:10)

Things get done through functions

data.frame
## function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE, 
##     fix.empty.names = TRUE, stringsAsFactors = FALSE) 
## {
##     data.row.names <- if (check.rows && is.null(row.names)) 
##         function(current, new, i) {
##             if (is.character(current)) 
##                 new <- as.character(new)
##             if (is.character(new)) 
##                 current <- as.character(current)
##             if (anyDuplicated(new)) 
##                 return(current)
##             if (is.null(current)) 
##                 return(new)
##             if (all(current == new) || all(current == "")) 
##                 return(new)
##             stop(gettextf("mismatch of row names in arguments of 'data.frame', item %d", 
##                 i), domain = NA)
##         }
##     else function(current, new, i) {
##         current %||% if (anyDuplicated(new)) {
##             warning(gettextf("some row.names duplicated: %s --> row.names NOT used", 
##                 paste(which(duplicated(new)), collapse = ",")), 
##                 domain = NA)
##             current
##         }
##         else new
##     }
##     object <- as.list(substitute(list(...)))[-1L]
##     mirn <- missing(row.names)
##     mrn <- is.null(row.names)
##     x <- list(...)
##     n <- length(x)
##     if (n < 1L) {
##         if (!mrn) {
##             if (is.object(row.names) || !is.integer(row.names)) 
##                 row.names <- as.character(row.names)
##             if (anyNA(row.names)) 
##                 stop("row names contain missing values")
##             if (anyDuplicated(row.names)) 
##                 stop(gettextf("duplicate row.names: %s", paste(unique(row.names[duplicated(row.names)]), 
##                   collapse = ", ")), domain = NA)
##         }
##         else row.names <- integer()
##         return(structure(list(), names = character(), row.names = row.names, 
##             class = "data.frame"))
##     }
##     vnames <- names(x)
##     if (length(vnames) != n) 
##         vnames <- character(n)
##     no.vn <- !nzchar(vnames)
##     vlist <- vnames <- as.list(vnames)
##     nrows <- ncols <- integer(n)
##     for (i in seq_len(n)) {
##         xi <- if (is.character(x[[i]]) || is.list(x[[i]])) 
##             as.data.frame(x[[i]], optional = TRUE, stringsAsFactors = stringsAsFactors)
##         else as.data.frame(x[[i]], optional = TRUE)
##         nrows[i] <- .row_names_info(xi)
##         ncols[i] <- length(xi)
##         namesi <- names(xi)
##         if (ncols[i] > 1L) {
##             if (length(namesi) == 0L) 
##                 namesi <- seq_len(ncols[i])
##             vnames[[i]] <- if (no.vn[i]) 
##                 namesi
##             else paste(vnames[[i]], namesi, sep = ".")
##         }
##         else if (length(namesi)) {
##             vnames[[i]] <- namesi
##         }
##         else if (fix.empty.names && no.vn[[i]]) {
##             tmpname <- deparse(object[[i]], nlines = 1L)[1L]
##             if (startsWith(tmpname, "I(") && endsWith(tmpname, 
##                 ")")) {
##                 ntmpn <- nchar(tmpname, "c")
##                 tmpname <- substr(tmpname, 3L, ntmpn - 1L)
##             }
##             vnames[[i]] <- tmpname
##         }
##         if (mirn && nrows[i] > 0L) {
##             rowsi <- attr(xi, "row.names")
##             if (any(nzchar(rowsi))) 
##                 row.names <- data.row.names(row.names, rowsi, 
##                   i)
##         }
##         nrows[i] <- abs(nrows[i])
##         vlist[[i]] <- xi
##     }
##     nr <- max(nrows)
##     for (i in seq_len(n)[nrows < nr]) {
##         xi <- vlist[[i]]
##         if (nrows[i] > 0L && (nr%%nrows[i] == 0L)) {
##             xi <- unclass(xi)
##             fixed <- TRUE
##             for (j in seq_along(xi)) {
##                 xi1 <- xi[[j]]
##                 if (is.vector(xi1) || is.factor(xi1)) 
##                   xi[[j]] <- rep(xi1, length.out = nr)
##                 else if (is.character(xi1) && inherits(xi1, "AsIs")) 
##                   xi[[j]] <- structure(rep(xi1, length.out = nr), 
##                     class = class(xi1))
##                 else if (inherits(xi1, "Date") || inherits(xi1, 
##                   "POSIXct")) 
##                   xi[[j]] <- rep(xi1, length.out = nr)
##                 else {
##                   fixed <- FALSE
##                   break
##                 }
##             }
##             if (fixed) {
##                 vlist[[i]] <- xi
##                 next
##             }
##         }
##         stop(gettextf("arguments imply differing number of rows: %s", 
##             paste(unique(nrows), collapse = ", ")), domain = NA)
##     }
##     value <- unlist(vlist, recursive = FALSE, use.names = FALSE)
##     vnames <- as.character(unlist(vnames[ncols > 0L]))
##     if (fix.empty.names && any(noname <- !nzchar(vnames))) 
##         vnames[noname] <- paste0("Var.", seq_along(vnames))[noname]
##     if (check.names) {
##         if (fix.empty.names) 
##             vnames <- make.names(vnames, unique = TRUE)
##         else {
##             nz <- nzchar(vnames)
##             vnames[nz] <- make.names(vnames[nz], unique = TRUE)
##         }
##     }
##     names(value) <- vnames
##     if (!mrn) {
##         if (length(row.names) == 1L && nr != 1L) {
##             if (is.character(row.names)) 
##                 row.names <- match(row.names, vnames, 0L)
##             if (length(row.names) != 1L || row.names < 1L || 
##                 row.names > length(vnames)) 
##                 stop("'row.names' should specify one of the variables")
##             i <- row.names
##             row.names <- value[[i]]
##             value <- value[-i]
##         }
##         else if (!is.null(row.names) && length(row.names) != 
##             nr) 
##             stop("row names supplied are of the wrong length")
##     }
##     else if (!is.null(row.names) && length(row.names) != nr) {
##         warning("row names were found from a short variable and have been discarded")
##         row.names <- NULL
##     }
##     class(value) <- "data.frame"
##     if (is.null(row.names)) 
##         attr(value, "row.names") <- .set_row_names(nr)
##     else {
##         if (is.object(row.names) || !is.integer(row.names)) 
##             row.names <- as.character(row.names)
##         if (anyNA(row.names)) 
##             stop("row names contain missing values")
##         if (anyDuplicated(row.names)) 
##             stop(gettextf("duplicate row.names: %s", paste(unique(row.names[duplicated(row.names)]), 
##                 collapse = ", ")), domain = NA)
##         row.names(value) <- row.names
##     }
##     value
## }
## <bytecode: 0x64cb926013b8>
## <environment: namespace:base>

Things get done through functions

plot
## function (x, y, ...) 
## UseMethod("plot")
## <bytecode: 0x64cb914184b8>
## <environment: namespace:base>

Things get done through functions

sqrt
## function (x)  .Primitive("sqrt")

Packages: collections of functions

Today’s lesson

  1. How to write functions

  2. Why to write functions

  3. How to share functions

How to write functions

Anatomy of a function

Each function is defined by its name, its arguments, and its body

my_function <- function(parameters) {
  body
}

Anatomy of a function

Each function is defined by its name, its arguments, and its body

square <- function(number) {
  number*number
}

square(3)
## [1] 9

square(square(3))
## [1] 81

Anatomy of a function

Each function can accept multiple arguments

divide <- function(numerator, denominator) {
  numerator/denominator
}

divide(12, 4)
## [1] 3

divide(9, 3)
## [1] 3

Anatomy of a function

Each function can accept multiple arguments

  • Order of arguments is assumed, but can be specified
divide <- function(numerator, denominator) {
  numerator/denominator
}

divide(12, 3)
## [1] 4

divide(3, 12)
## [1] 0.25

divide(denominator = 3, numerator = 12)
## [1] 4

Anatomy of a function

Each function can only return one thing

  • e.g. write a function that returns both (positive and negative) roots of its input
square_root <- function(number) {
  root <- sqrt(number)
  negative_root = root*-1
  
  root
  negative_root
}

square_root(49)
## [1] -7

Anatomy of a function

Solution: create a vector with both roots, and return the vector.

square_root <- function(number) {
  root <- sqrt(number)
  negative_root = root*-1
  
  roots <- c(positive_root = root, negative_root = negative_root)
  return(roots)
}

square_root(49)
## positive_root negative_root 
##             7            -7

Anatomy of a function

Functions can call on other functions

library(tidyverse)

make_scatterplot <- function(dataframe, x_col, y_col) {
  
  # Note that this function makes use of {{}} for Tidy Evaluation
  # See https://r4ds.hadley.nz/functions.html#indirection-and-tidy-evaluation for details
  
  ggplot(dataframe, aes(x = {{x_col}}, y = {{y_col}})) + 
    geom_point()
  
}

library(palmerpenguins)
make_scatterplot(penguins, bill_length_mm, bill_depth_mm)

Anatomy of a function

Functions break if provided more arguments than they know what to do with

divide <- function(numerator, denominator) {
  numerator/denominator
}

divide(3, 4, 5)
## Error in divide(3, 4, 5): unused argument (5)

Anatomy of a function

Functions break if provided more arguments than they know what to do with

  • But the ... argument enables additional arguments to be passed to functions
make_scatterplot <- function(dataframe, x_col, y_col) {
  ggplot(dataframe, aes(x = {{x_col}}, y = {{y_col}})) + 
    geom_point()
}

make_scatterplot(penguins, bill_length_mm, bill_depth_mm, size = 4, color = "island")
## Error in make_scatterplot(penguins, bill_length_mm, bill_depth_mm, size = 4, : unused arguments (size = 4, color = "island")

Functions break if provided more arguments than they know what to do with

  • But the ... argument enables additional arguments to be passed to functions
make_scatterplot <- function(dataframe, x_col, y_col, ...) {
  ggplot(dataframe, aes(x = {{x_col}}, y = {{y_col}})) + 
    geom_point(...)
}

make_scatterplot(penguins, bill_length_mm, bill_depth_mm, ize = 4, color = "darkgreen")

Why to write functions

General principle

  • Functions allow you to minimize repetition

  • Functions allow a set of logic to be applied across contexts

  • Functions help achieve abstraction of details

Minimizing repetition

  • General guideline: if the same logic appears in >2 points, consider encapsulating it in a function
    • If you need to do it a fourth time, it’s trivial to apply
    • If something about the logic needs to change, only need to change it once

Applying logic across contexts

  • General guideline: if the logic you are developing can be applied to lots of different datasets, consider encapsulating it in a function
    • e.g. Shannon diversity can be calculated for any community
    • e.g. Standard error of the mean can be calculated for any vector

Achieving abstraction

  • General guideline: Abstractions can help make code more readable if they ‘hide away’ complex logic
    • e.g. code to run a model might be >100 lines long, but you can “hide” it in a function called run_model() so that your reader knows exactly what you are trying to achieve

Writing a function has four big advantages over using copy-and-paste:

  1. You can give a function an evocative name that makes your code easier to understand.

  2. As requirements change, you only need to update code in one place, instead of many.

  3. You eliminate the chance of making incidental mistakes when you copy and paste (i.e. updating a variable name in one place, but not in another).

  4. It makes it easier to reuse work from project-to-project, increasing your productivity over time.

https://r4ds.hadley.nz/functions.html

How to share functions

Functions can be applied across contexts

But if we anticipate wider usage, we need to practice “defensive” programming

divide <- function(numerator, denominator) {
  numerator/denominator
}

divide(3,'four')
## Error in numerator/denominator: non-numeric argument to binary operator

Functions can be applied across contexts

But if we anticipate wider usage, we need to practice “defensive” programming

Building in “checks” into the function

divide <- function(numerator, denominator) {
  if(!is.numeric(numerator) | !is.numeric(denominator)) {
    stop("Both numerator and denominator need to be numeric")
  }
  numerator/denominator
}

divide(3,'four')
## Error in divide(3, "four"): Both numerator and denominator need to be numeric

If we anticipate wider usage, build in lots of “checks”

divide <- function(numerator = NULL, denominator = NULL) {
  
  if(is.null(numerator) | is.null(denominator)) {
    stop("Please define a numerator and a denominator")
  }
  
  if(denominator == 0) {
    stop("Dividing by 0 is not allowed")
  }
  
  if(!is.numeric(numerator) | !is.numeric(denominator)) {
    stop("Both numerator and denominator need to be numeric")
  }
  
  
  numerator/denominator
}

divide(3,'four')
## Error in divide(3, "four"): Both numerator and denominator need to be numeric
divide(3)
## Error in divide(3): Please define a numerator and a denominator
divide(3, 0)
## Error in divide(3, 0): Dividing by 0 is not allowed

If we anticipate wider usage, document everything

# Define a function called divide that divides the numerator by the denominator
# This function accepts two arguments; if unnamed, the first is treated as the
# numerator and the second is treated as the denominator
# Assuming the division is valid (e.g. no division by zero), this function returns a number

divide <- function(numerator = NULL, denominator = NULL) {
  
  if(is.null(numerator) | is.null(denominator)) {
    stop("Please define a numerator and a denominator")
  }
  
  if(length(numerator) != 1 | length(denominator) != 1) {
    stop("The numerator and denominator should each be exactly one number")
  }
  
  if(denominator == 0) {
    stop("Dividing by 0 is not allowed")
  }
  
  if(!is.numeric(numerator) | !is.numeric(denominator)) {
    stop("Both numerator and denominator need to be numeric")
  }
  
  
  numerator/denominator
}

divide(3,'four')
## Error in divide(3, "four"): Both numerator and denominator need to be numeric
divide(3)
## Error in divide(3): Please define a numerator and a denominator
divide(3, 0)
## Error in divide(3, 0): Dividing by 0 is not allowed
divide(c(3,10),4)
## Error in divide(c(3, 10), 4): The numerator and denominator should each be exactly one number

Combining functions into a Package

  • The easiest way to share functions is by bundling related functions into a package

  • This could be the topic of a whole “Crash course” in itself, so we won’t cover in depth here.

  • See https://r-pkgs.org/ for a comprehensive guide

  • Good packages reach thousands of people - easy way to have a big impact!