Title: | Nima Hejazi's R Toolbox |
---|---|
Description: | Miscellaneous R functions developed as collateral damage over the course of work in statistical and scientific computing for research. These include, for example, utilities that supplement existing idiosyncrasies of the R language, extend existing plotting functionality and aesthetics, help prepare data objects for imputation, and extend access to command line tools and systems-level information. |
Authors: | Nima Hejazi [aut, cre, cph] |
Maintainer: | Nima Hejazi <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.6.2 |
Built: | 2024-11-03 03:04:47 UTC |
Source: | https://github.com/nhejazi/nima |
Take the maximum of the absolute values of an input vector.
absmax(x, na.rm = FALSE)
absmax(x, na.rm = FALSE)
x |
A numeric vector or array. |
na.rm |
A logical indicating whether missing values should be removed. |
The maximum of the absolute values of elements of the input vector.
x <- c(5, 3, -9, -100, 3.14159, 7.5) absmax(x)
x <- c(5, 3, -9, -100, 3.14159, 7.5) absmax(x)
Get the names of the attributes of an input object.
attrnames(obj)
attrnames(obj)
obj |
Any object. |
Vector of character strings with the names of the attributes.
x <- matrix(1:100, ncol = 5) colnames(x) <- LETTERS[1:5] attrnames(x)
x <- matrix(1:100, ncol = 5) colnames(x) <- LETTERS[1:5] attrnames(x)
Clear the screen with a call to system
and clear
.
clear()
clear()
This function is merely a call to system("clear")
system("clear")
system("clear")
Convert a number to a string, with commas inserted at every 3rd digit.
commas(numbers)
commas(numbers)
numbers |
Vector of non-negative numbers (will be rounded to integers) |
Character string with numbers written like "5,771,009"
.
commas(c(2300, 9000, 21456, 987654890, 1256787, 345765, 1432))
commas(c(2300, 9000, 21456, 987654890, 1256787, 345765, 1432))
Discretizes a non-factor input vector and returns the result as numeric.
discrete_by_quantile(x, ...)
discrete_by_quantile(x, ...)
x |
A vector containing arbitrary data. |
... |
Additional arguments passed to |
A numeric vector with the data re-coded to based on the quantiles.
x <- rnorm(1000) discrete_by_quantile(x)
x <- rnorm(1000) discrete_by_quantile(x)
Exit R without saving workspace, using the ubiquitous UNIX syntax.
exit()
exit()
This function is merely a call to q("no")
.
Convert a factor with numeric levels to a non-factor (numeric).
factor_to_num(x)
factor_to_num(x)
x |
A vector containing a factor with numeric levels. |
The input factor made into a numeric vector.
x <- factor(c(3, 4, 9, 4, 9), levels = c(3, 4, 9)) factor_to_num(x)
x <- factor(c(3, 4, 9, 4, 9), levels = c(3, 4, 9)) factor_to_num(x)
View the HTML version of a help file while running R from the terminal.
hweb(...)
hweb(...)
... |
Help topics. |
Calls function help
using argument htmlhelp=TRUE
.
hweb(read.table)
hweb(read.table)
Produce standard diagnostic plots for linear models using ggplot2.
lm_plot(x, ...)
lm_plot(x, ...)
x |
A linear model object produced by |
... |
Extra arguments, currently ignored. |
n <- 100 x1 <- rnorm(n) y1 <- rnorm(n) linmod <- lm(y1 ~ x1) plot(linmod)
n <- 100 x1 <- rnorm(n) y1 <- rnorm(n) linmod <- lm(y1 ~ x1) plot(linmod)
Add indicator columns to a data.frame showing the pattern of missingness.
miss_ind(data, prefix = "miss_")
miss_ind(data, prefix = "miss_")
data |
A numeric vector or array. |
prefix |
A string used to name the indicator variables.. |
An augmented data.frame with indicators for missingness patterns.
data <- data.frame(cbind(rnorm(10), runif(10))) data[sample(nrow(data), 3), 1] <- NA data[sample(nrow(data), 4), 2] <- NA data <- miss_ind(data)
data <- data.frame(cbind(rnorm(10), runif(10))) data[sample(nrow(data), 3), 1] <- NA data[sample(nrow(data), 4), 2] <- NA data <- miss_ind(data)
Compute the mean squared error (risk under L2 loss).
mse(prediction, outcome)
mse(prediction, outcome)
prediction |
A |
outcome |
A |
x <- rnorm(100) y <- x^2 test_x <- rnorm(100) test_y <- test_x^2 mod <- glm(y ~ x) pred <- predict(mod, newx = as.data.frame(test_x)) error <- mse(prediction = pred, outcome = test_y)
x <- rnorm(100) y <- x^2 test_x <- rnorm(100) test_y <- test_x^2 mod <- glm(y ~ x) pred <- predict(mod, newx = as.data.frame(test_x)) error <- mse(prediction = pred, outcome = test_y)
Compute the empirical risk under cross-entropy loss for binary predictions.
nll(prediction, outcome)
nll(prediction, outcome)
prediction |
A |
outcome |
A |
n_obs <- 100 x <- rnorm(n_obs) y <- rbinom(n_obs, 1, plogis(x^2)) test_x <- rnorm(n_obs) test_y <- rbinom(n_obs, 1, plogis(test_x^2)) mod <- glm(y ~ x, family = "binomial") pred <- predict(mod, newx = as.data.frame(test_x), type = "response") error <- nll(prediction = unname(pred), outcome = test_y)
n_obs <- 100 x <- rnorm(n_obs) y <- rbinom(n_obs, 1, plogis(x^2)) test_x <- rnorm(n_obs) test_y <- rbinom(n_obs, 1, plogis(test_x^2)) mod <- glm(y ~ x, family = "binomial") pred <- predict(mod, newx = as.data.frame(test_x), type = "response") error <- nll(prediction = unname(pred), outcome = test_y)
Open a file using system
and open
.
openfile(file)
openfile(file)
file |
File name (as character string). |
Open files from R by using the default operating system program.
## Not run: openfile("myplot.pdf") ## End(Not run)
## Not run: openfile("myplot.pdf") ## End(Not run)
Produce standard quantile-quantile plots for modeling using ggplot2.
qq_plot( x, distribution = "norm", ..., line.estimate = NULL, conf = 0.95, labels = names(x) )
qq_plot( x, distribution = "norm", ..., line.estimate = NULL, conf = 0.95, labels = names(x) )
x |
A numeric vector of residuals from a generalized linear model. |
distribution |
The reference probability distribution for residuals. |
... |
Any additional parameters to be passed to distribution functions. |
line.estimate |
Should quantiles be estimated, if so which quantiles? |
conf |
The confidence level to be used with confidence intervals. |
labels |
The names to be used when identifying points on the Q-Q plot. |
n <- 100 x1 <- rnorm(n) y1 <- rnorm(n) linmod <- lm(y1 ~ x1) x <- linmod$residuals qq_plot(x)
n <- 100 x1 <- rnorm(n) y1 <- rnorm(n) linmod <- lm(y1 ~ x1) x <- linmod$residuals qq_plot(x)
Nima's ggplot2 theme scale_color supplement: colors optimized via ColorBrewer
scale_color_nima(...)
scale_color_nima(...)
... |
Passed to |
Nima's ggplot2 theme scale_fill supplement: colors optimized via ColorBrewer
scale_fill_nima(...)
scale_fill_nima(...)
... |
Passed to |
Visualize Summaries of Simulation Results
sim_plot(x, ..., sample_sizes, stat = c("bias", "mc_var", "mse"))
sim_plot(x, ..., sample_sizes, stat = c("bias", "mc_var", "mse"))
x |
A |
... |
Extra arguments currently ignored. |
sample_sizes |
A |
stat |
A |
n_sim <- 100 n_obs <- c(100, 10000) mu <- 2 sim_results <- lapply(n_obs, function(sample_size) { estimator_sim <- lapply(seq_len(n_sim), function(iter) { y_obs <- rnorm(sample_size, mu) est_param <- mean(y_obs) est_var <- var(y_obs) estimate <- tibble::as_tibble(list( param_est = est_param, param_var = est_var )) return(estimate) }) estimates <- do.call(rbind, estimator_sim) return(estimates) }) sim_summary <- lapply(sim_results, summarize_sim, truth = mu) p_sim_summary <- sim_plot(sim_summary, sample_sizes = n_obs, stat = "mse") p_sim_summary
n_sim <- 100 n_obs <- c(100, 10000) mu <- 2 sim_results <- lapply(n_obs, function(sample_size) { estimator_sim <- lapply(seq_len(n_sim), function(iter) { y_obs <- rnorm(sample_size, mu) est_param <- mean(y_obs) est_var <- var(y_obs) estimate <- tibble::as_tibble(list( param_est = est_param, param_var = est_var )) return(estimate) }) estimates <- do.call(rbind, estimator_sim) return(estimates) }) sim_summary <- lapply(sim_results, summarize_sim, truth = mu) p_sim_summary <- sim_plot(sim_summary, sample_sizes = n_obs, stat = "mse") p_sim_summary
Summarize Simulations Results
summarize_sim(simulation_results, truth, ci_level = 0.95)
summarize_sim(simulation_results, truth, ci_level = 0.95)
simulation_results |
A |
truth |
A |
ci_level |
A |
n_sim <- 1000 n_obs <- c(100, 10000) mu <- 2 sim_results <- lapply(n_obs, function(sample_size) { estimator_sim <- lapply(seq_len(n_sim), function(iter) { y_obs <- rnorm(sample_size, mu) est_param <- mean(y_obs) est_var <- var(y_obs) / sample_size estimate <- tibble::as_tibble(list( param_est = est_param, param_var = est_var )) return(estimate) }) estimates <- do.call(rbind, estimator_sim) return(estimates) }) sim_summary <- lapply(sim_results, summarize_sim, truth = mu)
n_sim <- 1000 n_obs <- c(100, 10000) mu <- 2 sim_results <- lapply(n_obs, function(sample_size) { estimator_sim <- lapply(seq_len(n_sim), function(iter) { y_obs <- rnorm(sample_size, mu) est_param <- mean(y_obs) est_var <- var(y_obs) / sample_size estimate <- tibble::as_tibble(list( param_est = est_param, param_var = est_var )) return(estimate) }) estimates <- do.call(rbind, estimator_sim) return(estimates) }) sim_summary <- lapply(sim_results, summarize_sim, truth = mu)
A jet black theme with inverted colors
theme_jetblack(base_size = 12, base_family = "")
theme_jetblack(base_size = 12, base_family = "")
base_size |
Base font size |
base_family |
Base font family |
An object as returned by theme
library(ggplot2) p <- ggplot(mtcars, aes(y = mpg, x = disp, color = factor(cyl))) p <- p + geom_point() + theme_jetblack() p
library(ggplot2) p <- ggplot(mtcars, aes(y = mpg, x = disp, color = factor(cyl))) p <- p + geom_point() + theme_jetblack() p
Nima's ggplot2 theme: white background, colors optimized
theme_nima(base_size = 14, base_family = "Helvetica") nima_theme(base_size = 14, base_family = "Helvetica")
theme_nima(base_size = 14, base_family = "Helvetica") nima_theme(base_size = 14, base_family = "Helvetica")
base_size |
Base font size |
base_family |
Base font family |
An object as returned by theme
library(ggplot2) p <- ggplot(mtcars, aes(y = mpg, x = disp, color = factor(cyl))) p <- p + geom_point() + scale_fill_nima() + scale_color_nima() p <- p + theme_nima() p
library(ggplot2) p <- ggplot(mtcars, aes(y = mpg, x = disp, color = factor(cyl))) p <- p + geom_point() + scale_fill_nima() + scale_color_nima() p <- p + theme_nima() p
Get the number of unique values in an input vector.
uniqlen(vec, na.rm = TRUE)
uniqlen(vec, na.rm = TRUE)
vec |
A vector of any type. |
na.rm |
If |
Number of unique values.
x <- c(1, 3, 1, 1, NA, 2, 2, 3, NA, NA, 1, 3, 1) uniqlen(x) uniqlen(x, na.rm = FALSE)
x <- c(1, 3, 1, 1, NA, 2, 2, 3, NA, NA, 1, 3, 1) uniqlen(x) uniqlen(x, na.rm = FALSE)