Title: | Fits item response theory models to big data |
---|---|
Description: | Fits item response theory models to big data. |
Authors: | Charles Driver [aut, cre] |
Maintainer: | Charles Driver <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.1.8 |
Built: | 2025-02-06 05:27:45 UTC |
Source: | https://github.com/cdriveraus/bigIRT |
A DESCRIPTION OF THE PACKAGE
Maintainer: Charles Driver [email protected] (ORCID)
Stan Development Team (NA). RStan: the R interface to Stan. R package version 2.26.1. https://mc-stan.org
#Generate some data (here 2pl model require(data.table) dat <- simIRT(Nsubs = 5000,Nitems = 100,Nscales = 1, logitCMean = -10,logitCSD = 0,AMean = 1,ASD = .3, BMean=0,BSD = .5, AbilityMean = 0,AbilitySD = 1) #convert to wide for TAM wdat <- data.frame(dcast(data.table(dat$dat),formula = 'id ~ Item',value.var='score')[,-1]) #fit using TAM require(TAM) tfit <-tam.mml.2pl(resp = wdat,est.variance = TRUE) #fit using bigIRT fit <- fitIRT(dat$dat,cores=2,pl=2) #some summary stuff: plot(dat$Ability,(fit$pars$Ability-dat$Ability)^2) #ability error given ability sqrt(mean((fit$pars$Ability-dat$Ability)^2)) #rms error stat #correlations of estimated vs true cor(data.frame(True=dat$Ability,Est=fit$pars$Ability)) cor(data.frame(True=dat$A,Est=fit$pars$A)) cor(data.frame(True=dat$B,Est=fit$pars$B))
#Generate some data (here 2pl model require(data.table) dat <- simIRT(Nsubs = 5000,Nitems = 100,Nscales = 1, logitCMean = -10,logitCSD = 0,AMean = 1,ASD = .3, BMean=0,BSD = .5, AbilityMean = 0,AbilitySD = 1) #convert to wide for TAM wdat <- data.frame(dcast(data.table(dat$dat),formula = 'id ~ Item',value.var='score')[,-1]) #fit using TAM require(TAM) tfit <-tam.mml.2pl(resp = wdat,est.variance = TRUE) #fit using bigIRT fit <- fitIRT(dat$dat,cores=2,pl=2) #some summary stuff: plot(dat$Ability,(fit$pars$Ability-dat$Ability)^2) #ability error given ability sqrt(mean((fit$pars$Ability-dat$Ability)^2)) #rms error stat #correlations of estimated vs true cor(data.frame(True=dat$Ability,Est=fit$pars$Ability)) cor(data.frame(True=dat$A,Est=fit$pars$A)) cor(data.frame(True=dat$B,Est=fit$pars$B))
This function drops variables/items and subjects that have all perfect scores (either all 0's or all 1's) in a data table.
dropPerfectScores( dat, scoreref. = "score", itemref. = "Item", idref. = "id", tol. = 0.001 )
dropPerfectScores( dat, scoreref. = "score", itemref. = "Item", idref. = "id", tol. = 0.001 )
dat |
The input data table |
scoreref |
The column name of the score variable in |
itemref |
The column name of the item variable in |
idref |
The column name of the id variable in |
tol |
Tolerance level for checking perfect scores – .01 would drop subjects with less than 1% correct or incorrect |
The input data table (dat
) without variables/items and subjects with all perfect scores.
dat <- data.table(id=c(1,1,1,2,2,2,3,3,3), Item=c('I1','I2','I3','I1','I2','I3','I1','I2','I3'), score=c(1,0,1,0,0,0,0,1,1)) print(dropPerfectScores(dat))
dat <- data.table(id=c(1,1,1,2,2,2,3,3,3), Item=c('I1','I2','I3','I1','I2','I3','I1','I2','I3'), score=c(1,0,1,0,0,0,0,1,1)) print(dropPerfectScores(dat))
This function fits a binary Item Response Theory (IRT) model using various parameters and options.
fitIRT( dat, score = "score", id = "id", item = "Item", scale = "Scale", pl = 1, personDat = NA, personPreds = character(), itemDat = NA, AitemPreds = character(), BitemPreds = character(), CitemPreds = character(), DitemPreds = character(), itemSpecificBetas = FALSE, betaScale = 10, invspAMeandat = 0.542, invspASD = 2, BMeandat = 0, BSD = 10, logitCMeandat = -4, logitCSD = 2, logitDMeandat = 4, logitDSD = 2, AbilityMeandat = array(0, dim = c(length(unique(dat[[scale]])))), AbilitySD = array(10, dim = c(length(unique(dat[[scale]])))), AbilityCorr = diag(1, c(length(unique(dat[[scale]])))), AMeanSD = 1, BMeanSD = BSD, logitCMeanSD = logitCSD, logitDMeanSD = logitDSD, AbilityMeanSD = array(1, dim = c(length(unique(dat[[scale]])))), iter = 2000, cores = 6, carefulfit = FALSE, ebayes = TRUE, ebayesmultiplier = 2, ebayesFromFixed = FALSE, ebayesiter = 1, estMeans = c("ability", "B", "C", "D"), priors = TRUE, integrateEachAbility = FALSE, integrateEachAbilityFixedSE = FALSE, mml = FALSE, NintegratePoints = 5, normalise = FALSE, normaliseScale = 1, normaliseMean = 0, dropPerfectScores = TRUE, trainingRows = 1:nrow(dat), init = NA, tol = 1e-08 * 10^(log(nrow(dat), 10)), ... )
fitIRT( dat, score = "score", id = "id", item = "Item", scale = "Scale", pl = 1, personDat = NA, personPreds = character(), itemDat = NA, AitemPreds = character(), BitemPreds = character(), CitemPreds = character(), DitemPreds = character(), itemSpecificBetas = FALSE, betaScale = 10, invspAMeandat = 0.542, invspASD = 2, BMeandat = 0, BSD = 10, logitCMeandat = -4, logitCSD = 2, logitDMeandat = 4, logitDSD = 2, AbilityMeandat = array(0, dim = c(length(unique(dat[[scale]])))), AbilitySD = array(10, dim = c(length(unique(dat[[scale]])))), AbilityCorr = diag(1, c(length(unique(dat[[scale]])))), AMeanSD = 1, BMeanSD = BSD, logitCMeanSD = logitCSD, logitDMeanSD = logitDSD, AbilityMeanSD = array(1, dim = c(length(unique(dat[[scale]])))), iter = 2000, cores = 6, carefulfit = FALSE, ebayes = TRUE, ebayesmultiplier = 2, ebayesFromFixed = FALSE, ebayesiter = 1, estMeans = c("ability", "B", "C", "D"), priors = TRUE, integrateEachAbility = FALSE, integrateEachAbilityFixedSE = FALSE, mml = FALSE, NintegratePoints = 5, normalise = FALSE, normaliseScale = 1, normaliseMean = 0, dropPerfectScores = TRUE, trainingRows = 1:nrow(dat), init = NA, tol = 1e-08 * 10^(log(nrow(dat), 10)), ... )
dat |
A data frame containing the data to be analyzed. |
score |
Character. The name of the column in |
id |
Character. The name of the column in |
item |
Character. The name of the column in |
scale |
Character. The name of the column in |
pl |
Integer. The number of parameters for the logistic model (1PL, 2PL, 3PL, or 4PL). Default is 1. |
personDat |
Data frame. Any fixed ability data for persons. Default is NA. |
personPreds |
Character vector. Names of predictors for person parameters found in data. Default is an empty character vector. |
itemDat |
Data frame. Any fixed item data. Default is NA. |
AitemPreds |
Character vector. Names of predictors for item discrimination parameters. Default is an empty character vector. |
BitemPreds |
Character vector. Names of predictors for item difficulty parameters. Default is an empty character vector. |
CitemPreds |
Character vector. Names of predictors for item guessing parameters. Default is an empty character vector. |
DitemPreds |
Character vector. Names of predictors for item upper asymptote (slipping) parameters. Default is an empty character vector. |
itemSpecificBetas |
Logical. Whether to allow item-specific betas for covariate effects, or simply estimate one effect per covariate. Default is FALSE. |
betaScale |
Numeric. Scale of the prior for beta parameters. Default is 10. |
invspAMeandat |
Numeric. Mean for the prior distribution of the raw discrimination parameters, which subsequently have a 'softplus' log(1+exp(x)) applied. Default is 0.542, giving a mean for A pars of ~ 1. |
invspASD |
Numeric. Standard deviation for the prior distribution of the raw discrimination parameters. Default is 2. |
BMeandat |
Numeric. Mean for the prior distribution of the item difficulty parameters. Default is 0. |
BSD |
Numeric. Standard deviation for the prior distribution of the item difficulty parameters. Default is 10. |
logitCMeandat |
Numeric. Mean for the prior distribution of the item guessing parameters (on logit scale). Default is -4. |
logitCSD |
Numeric. Standard deviation for the prior distribution of the item guessing parameters (on logit scale). Default is 2. |
logitDMeandat |
Numeric. Mean for the prior distribution of the item upper asymptote parameters (on logit scale). Default is 4. |
logitDSD |
Numeric. Standard deviation for the prior distribution of the item upper asymptote parameters (on logit scale). Default is 2. |
AbilityMeandat |
Numeric array. Mean for the prior distribution of the ability parameters. Default is 0 for each scale. |
AbilitySD |
Numeric array. Standard deviation for the prior distribution of the ability parameters. Default is 10 for each scale. |
AbilityCorr |
Matrix. Correlation matrix for the ability parameters. Default is an identity matrix. |
AMeanSD |
Numeric. Standard deviation for the prior distribution of the discrimination parameters. Default is 1. |
BMeanSD |
Numeric. Standard deviation for the prior distribution of the difficulty parameters. Default is |
logitCMeanSD |
Numeric. Standard deviation for the prior distribution of the guessing parameters (on logit scale). Default is |
logitDMeanSD |
Numeric. Standard deviation for the prior distribution of the upper asymptote parameters (on logit scale). Default is |
AbilityMeanSD |
Numeric array. Standard deviation for the prior distribution of the ability parameters. Default is 1 for each scale. |
iter |
Integer. Maximum number of iterations for the fitting algorithm. Default is 2000. |
cores |
Integer. Number of cores to use for parallel computation. Default is 6. |
carefulfit |
Logical. Whether to use a slower, careful fitting procedure. Default is FALSE. Experimental. |
ebayes |
Logical. Whether to use empirical Bayes estimation. Default is TRUE. With ebayes, the priors are adapted based on a first pass estimate. |
ebayesmultiplier |
Numeric. Multiplier for the widths of the empirical Bayes priors. Default is 2, as this appears to work better in practice. |
ebayesFromFixed |
Logical. Whether to initialize empirical Bayes from any specifed fixed values Default is FALSE. |
ebayesiter |
Integer. Number of iterations for empirical Bayes estimation. Default is 1. |
estMeans |
Character vector. Which means to estimate from 'ability', 'A', 'B', 'C', 'D'. Default is c('ability', 'B', 'C', 'D'), with discrimination means fixed. |
priors |
Logical. Whether to use prior distributions. Default is TRUE. |
integrateEachAbility |
Logical. Whether to integrate across each ability. Default is FALSE. |
integrateEachAbilityFixedSE |
Logical. Whether to integrate each ability with fixed standard error. Default is FALSE. |
mml |
Logical. Experimental and not working well. Whether to use marginal maximum likelihood estimation. Default is FALSE. |
NintegratePoints |
Integer. Number of integration points for numerical integration. Default is 5. |
normalise |
Logical. Whether to normalize the output estimates. Default is FALSE. |
normaliseScale |
Numeric. Scale for normalization. Default is 1. |
normaliseMean |
Numeric. Mean for normalization. Default is 0. |
dropPerfectScores |
Logical. Whether to drop perfect scores from each subject and item before estimation. Default is TRUE. |
trainingRows |
Integer vector. Rows of data to use for estimation of parameters. Default is all rows in |
init |
Initial values for the fitting algorithm. Default is NA. |
tol |
Numeric. Tolerance for convergence. Default attempts to sensibly adjust for amount of data. |
... |
Additional arguments passed to the fitting function. |
A list containing the fitted IRT model parameters and additional information about the fit.
#Generate some data (here 2pl model require(data.table) dat <- simIRT(Nsubs = 50,Nitems = 100,Nscales = 1, logitCMean = -10,logitCSD = 0,AMean = 1,ASD = .3, BMean=0,BSD = .5, AbilityMean = 0,AbilitySD = 1) #fit using bigIRT fit <- fitIRT(dat$dat,cores=2,score = 'score',id = 'id', scale = 'Scale',item = 'Item', pl=2) print(fit$personPars) print(fit$itemPars)
#Generate some data (here 2pl model require(data.table) dat <- simIRT(Nsubs = 50,Nitems = 100,Nscales = 1, logitCMean = -10,logitCSD = 0,AMean = 1,ASD = .3, BMean=0,BSD = .5, AbilityMean = 0,AbilitySD = 1) #fit using bigIRT fit <- fitIRT(dat$dat,cores=2,score = 'score',id = 'id', scale = 'Scale',item = 'Item', pl=2) print(fit$personPars) print(fit$itemPars)
Normalise item response theory (IRT) parameters.
normaliseIRT( B, Ability, A, normbase = "Ability", normaliseScale = 1, normaliseMean = ifelse(normbase == "A", 1, 0), robust = TRUE )
normaliseIRT( B, Ability, A, normbase = "Ability", normaliseScale = 1, normaliseMean = ifelse(normbase == "A", 1, 0), robust = TRUE )
B |
Vector of item difficulty parameters. |
Ability |
Vector of persons' ability parameters. |
A |
Vector of item discrimination parameters. |
normbase |
The base from which the normalisation should be calculated. Can be 'Ability' or 'B'. |
normaliseScale |
The scale to normalise to. |
normaliseMean |
The mean to normalise to. The default is 0 when normbase is 'Ability' or 'B', and 1 when normbase is 'A'. |
robust |
if TRUE, outliers (greater than 1.5x the interquartile range from the interquartile region of 25-75%) are dropped before computing the mean and sd for normalisation. |
A list containing the normalised A, B, and Ability parameters.
B <- rnorm(100,2,1) Ability <- rnorm(500,3,.5) A <- rnorm(100,1.4,.05) normaliseIRT(B, Ability, A, normbase='B')
B <- rnorm(100,2,1) Ability <- rnorm(500,3,.5) A <- rnorm(100,1.4,.05) normaliseIRT(B, Ability, A, normbase='B')
Simulate IRT data
simIRT( Nsubs = 100, Nitems = 200, Nscales = 1, NitemsAnswered = Nitems, ASD = 0, AMean = 1, BSD = 1, BMean = 0, logitCSD = 1, logitCMean = -2, AbilitySD = 1, AbilityMean = 0, itemPreds = NA, AitemPredEffects = NA, BitemPredEffects = NA, logitCitemPredEffects = NA, personPreds = NA, AbilityPredEffects = NA, normalise = FALSE )
simIRT( Nsubs = 100, Nitems = 200, Nscales = 1, NitemsAnswered = Nitems, ASD = 0, AMean = 1, BSD = 1, BMean = 0, logitCSD = 1, logitCMean = -2, AbilitySD = 1, AbilityMean = 0, itemPreds = NA, AitemPredEffects = NA, BitemPredEffects = NA, logitCitemPredEffects = NA, personPreds = NA, AbilityPredEffects = NA, normalise = FALSE )
AbilityMean |
Computes the WLE and SE for each subject and scale in a bigIRT model.
wleIRT(fit)
wleIRT(fit)
fit |
A bigIRT model fit object. |
A list containing two matrices. The first matrix contains the WLEs for each subject and scale. The second matrix contains the SEs for each subject and scale.
# Fit a bigIRT model #fit <- bigIRT(data, itempars) # Compute WLE and SE #wleIRT(fit)
# Fit a bigIRT model #fit <- bigIRT(data, itempars) # Compute WLE and SE #wleIRT(fit)