Microbial inactivation models

library(predmicror)

This vignette shows a complete workflow for fitting microbial inactivation models with predmicror.

Inactivation models in this package expect microbial counts on the base-10 logarithmic scale, usually in a column named logN. The workflow is:

  1. inspect the available models;
  2. fit one or more candidate models;
  3. calculate model diagnostics;
  4. extract fitted values and residuals;
  5. generate predictions over a time grid.

Available inactivation models

predmicror_models("inactivation")
#>           type      model response_scale             parameters
#> 1 inactivation   WeibullM         log10N       Y0, sigma, alpha
#> 2 inactivation  WeibullMM         log10N Y0, Yres, sigma, alpha
#> 3 inactivation  WeibullPH         log10N           Y0, k, alpha
#> 4 inactivation GeeraerdST         log10N     Y0, Yres, kmax, Sl
#> 5 inactivation   HuangRGS         log10N               Y0, k, M

Example data

The mafart2005Li11 dataset contains inactivation data with time and log10 microbial counts.

data(mafart2005Li11)
head(mafart2005Li11)
#>   Time  logN
#> 1    1 10.06
#> 2    2  9.98
#> 3    4 10.00
#> 4    6  9.82
#> 5    8  9.26
#> 6   12  8.49
summary(mafart2005Li11)
#>       Time           logN       
#>  Min.   : 1.0   Min.   : 6.770  
#>  1st Qu.: 4.5   1st Qu.: 7.197  
#>  Median :10.0   Median : 8.875  
#>  Mean   :12.1   Mean   : 8.605  
#>  3rd Qu.:19.0   3rd Qu.: 9.940  
#>  Max.   :28.0   Max.   :10.060
plot(
  logN ~ Time,
  data = mafart2005Li11,
  xlab = "Time",
  ylab = expression(log[10](N)),
  pch = 19
)

Observed inactivation data used to illustrate primary inactivation models.

Fitting candidate models

Nonlinear fitting can be sensitive to starting values. The helper below keeps this vignette robust across platforms by returning NULL if a candidate model cannot be fitted with the selected starting values.

safe_fit <- function(expr) {
  withCallingHandlers(
    tryCatch(
      expr,
      error = function(e) {
        message("Model fit failed: ", conditionMessage(e))
        NULL
      }
    ),
    warning = function(w) {
      message("Model fit warning: ", conditionMessage(w))
      invokeRestart("muffleWarning")
    }
  )
}

A simple starting point is to fit the Weibull-Mafart model.

weibull <- safe_fit(
  fit_inactivation(
    mafart2005Li11,
    model = "WeibullM",
    time = "Time",
    response = "logN",
    start = list(Y0 = max(mafart2005Li11$logN), sigma = 3, alpha = 1)
  )
)

weibull
#> predmicror fit
#>   type: inactivation
#>   model: WeibullM
#>   response: logN (log10N)
#>   formula: logN ~ WeibullM(Time, Y0, sigma, alpha)
#> 
#> Nonlinear regression model
#>   model: logN ~ WeibullM(Time, Y0, sigma, alpha)
#>    data: data
#>      Y0   sigma   alpha 
#> 10.6394 13.1942  0.7727 
#>  residual sum-of-squares: 1.116
#> 
#> Algorithm: multifit/levenberg-marquardt, (scaling: more, solver: qr)
#> 
#> Number of iterations to convergence: 16 
#> Achieved convergence tolerance: 1.11e-15

Additional candidate models can be fitted and compared when they converge.

weibull_ph <- safe_fit(
  fit_inactivation(
    mafart2005Li11,
    model = "WeibullPH",
    time = "Time",
    response = "logN",
    start = list(Y0 = max(mafart2005Li11$logN), k = 0.3, alpha = 1)
  )
)

huang_rgs <- safe_fit(
  fit_inactivation(
    mafart2005Li11,
    model = "HuangRGS",
    time = "Time",
    response = "logN",
    start = list(Y0 = max(mafart2005Li11$logN), k = 0.3, M = 1)
  )
)
#> Model fit warning: singular gradient matrix at parameter estimates

fits <- Filter(Negate(is.null), list(
  WeibullM = weibull,
  WeibullPH = weibull_ph,
  HuangRGS = huang_rgs
))

names(fits)
#> [1] "WeibullM"  "WeibullPH" "HuangRGS"

Diagnostics and model comparison

Use fit_metrics() for one fitted model and compare_models() for a ranked summary of several fitted models.

if (length(fits) > 0L) {
  fit_metrics(fits[[1]])
}
#>      model         type response response_scale  n p      SSE      RMSE
#> 1 WeibullM inactivation     logN         log10N 10 3 1.116111 0.3340824
#>         MAE          bias       RSE       R2   adj_R2    logLik      AIC
#> 1 0.2926188 -1.706368e-12 0.3993049 0.934726 0.902089 -3.225709 14.45142
#>        BIC converged
#> 1 15.66176      TRUE
if (length(fits) > 1L) {
  compare_models(fits, sort_by = "AIC")
}
#>         fit     model         type response response_scale  n p       SSE
#> 1  WeibullM  WeibullM inactivation     logN         log10N 10 3  1.116111
#> 2 WeibullPH WeibullPH inactivation     logN         log10N 10 3  1.116111
#> 3  HuangRGS  HuangRGS inactivation     logN         log10N 10 3 17.098850
#>        RMSE       MAE          bias       RSE            R2    adj_R2
#> 1 0.3340824 0.2926188 -1.706368e-12 0.3993049  9.347260e-01  0.902089
#> 2 0.3340824 0.2926188  1.367795e-14 0.3993049  9.347260e-01  0.902089
#> 3 1.3076257 1.2190000  2.388623e-11 1.5629117 -1.093525e-10 -0.500000
#>       logLik      AIC      BIC converged
#> 1  -3.225709 14.45142 15.66176      TRUE
#> 2  -3.225709 14.45142 15.66176      TRUE
#> 3 -16.871516 41.74303 42.95337      TRUE

Fitted values and residuals

predmicror_augment() adds fitted values and residuals to the original data. This is useful for diagnostic plots and for exporting results.

if (length(fits) > 0L) {
  aug <- predmicror_augment(fits[[1]])
  head(aug)
}
#>   Time  logN   .fitted       .resid   .model        .type
#> 1    1 10.06 10.325715 -0.265714615 WeibullM inactivation
#> 2    2  9.98 10.103482 -0.123482157 WeibullM inactivation
#> 3    4 10.00  9.723802  0.276198363 WeibullM inactivation
#> 4    6  9.82  9.386916  0.433083708 WeibullM inactivation
#> 5    8  9.26  9.075124  0.184876317 WeibullM inactivation
#> 6   12  8.49  8.499561 -0.009560584 WeibullM inactivation
if (length(fits) > 0L) {
  aug <- predmicror_augment(fits[[1]])
  if (all(c(".fitted", ".resid") %in% names(aug))) {
    plot(
      aug[[".fitted"]], aug[[".resid"]],
      xlab = "Fitted values",
      ylab = "Residuals",
      pch = 19
    )
    abline(h = 0, lty = 2)
  }
}

Observed and fitted values for the Weibull-Mafart inactivation model.

Prediction over a time grid

The same augment helper can be used with newdata to obtain predictions over a regular time grid.

if (length(fits) > 0L) {
  best_fit <- fits[[1]]

  if (length(fits) > 1L) {
    comparison <- compare_models(fits, sort_by = "AIC")
    best_fit <- fits[[comparison$model[1]]]
  }

  new_time <- data.frame(
    Time = seq(min(mafart2005Li11$Time), max(mafart2005Li11$Time), length.out = 100)
  )

  pred <- predmicror_augment(best_fit, newdata = new_time)

  plot(
    logN ~ Time,
    data = mafart2005Li11,
    xlab = "Time",
    ylab = expression(log[10](N)),
    pch = 19
  )

  if (".fitted" %in% names(pred)) {
    lines(pred$Time, pred[[".fitted"]], lwd = 2)
  }
}

Observed and fitted values for an alternative inactivation model.

Practical notes

For inactivation models, verify the response scale before fitting. The wrappers expect base-10 log counts.

When fitting more complex models with residual tails, use biologically plausible starting values and inspect parameter uncertainty; residual population parameters can be weakly identified if the data do not show a clear tail.