Parity Regression Model Estimation

The savvyPR function is used to fit a parity regression model. Here we demonstrate its usage with synthetic, highly correlated data.

library(MASS)
library(glmnet)

# Function to create a correlation matrix for X
create_corr_matrix <- function(rho, p) {
  corr_matrix <- diag(1, p)
  for (i in 2:p) {
    for (j in 1:(i-1)) {
      corr_matrix[i, j] <- rho^(abs(i - j))
      corr_matrix[j, i] <- corr_matrix[i, j] # symmetric matrix
    }
  }
  return(corr_matrix)
}

# Function to generate beta values with both positive and negative signs
generate_beta <- function(p) {
  half_p <- ceiling(p / 2)
  beta <- rep(c(1, -1), length.out = p) * rep(1:half_p, each = 2)[1:p]
  return(beta)
}

set.seed(123)
n <- 1500  
p <- 15  
rho <- -0.5  

corr_matrix <- create_corr_matrix(rho, p)
x <- mvrnorm(n = n, mu = rep(0, p), Sigma = corr_matrix)
beta <- generate_beta(p + 1)
sigma_vec <- abs(rnorm(n = n, mean = 15, sd = sqrt(1)))
y <- rnorm(n, mean = as.vector(cbind(1,x)%*%beta), sd = sigma_vec)
  
# 1. Run OLS estimation with intercept
result_ols <- lm(y ~ x)
coef_ols <- coef(result_ols)

# 2. Run Ridge Regression (RR) estimation
result_RR <- glmnet(x, y, alpha = 0, lambda = 1)
coef_RR <- coef(result_RR)

# 3. Run PR estimation (Budget Method)
result_pr_budget <- savvyPR(x, y, method = "budget", val = 0.05, intercept = TRUE)
print(result_pr_budget)

## 
## Call:  savvyPR(x = x, y = y, method = "budget", val = 0.05, intercept = TRUE) 
## 
##  Method Number of Non-Zero Coefficients Intercept Included Lambda Value
##  budget                              16                Yes           NA
## 
## Coefficients:
##  Coefficient Estimate
##  (Intercept)   0.9537
##           X1  -3.6182
##           X2   3.7402
##           X3  -3.4817
##           X4   4.1282
##           X5  -4.1790
##           X6   5.1278
##           X7  -4.9607
##           X8   6.0784
##           X9  -5.9857
##          X10   6.3625
##          X11  -5.6978
##          X12   7.7738
##          X13  -7.2884
##          X14   9.0898
##          X15  -8.7795

coef_pr_budget <- coef(result_pr_budget)

# 4. Run PR estimation (Target Method)
result_pr_target <- savvyPR(x, y, method = "target", val = 1, intercept = TRUE)
print(result_pr_target)

## 
## Call:  savvyPR(x = x, y = y, method = "target", val = 1, intercept = TRUE) 
## 
##  Method Number of Non-Zero Coefficients Intercept Included Lambda Value
##  target                              16                Yes           NA
## 
## Coefficients:
##  Coefficient Estimate
##  (Intercept)   1.0055
##           X1  -4.1065
##           X2   4.1299
##           X3  -3.8592
##           X4   4.4271
##           X5  -4.5044
##           X6   5.4132
##           X7  -5.2940
##           X8   6.2690
##           X9  -6.2374
##          X10   6.5824
##          X11  -5.9742
##          X12   7.9672
##          X13  -7.5700
##          X14   9.2653
##          X15  -9.0948

coef_pr_target <- coef(result_pr_target)

# Calculate the L2 distance to true beta
ols_L2 <- sqrt(sum((beta - coef_ols)^2))
print(paste("OLS L2:", ols_L2))

## [1] "OLS L2: 2.24654287653342"

RR_L2 <- sqrt(sum((beta - coef_RR)^2))
print(paste("Ridge L2:", RR_L2))

## [1] "Ridge L2: 2.08824054155074"

pr_budget_L2 <- sqrt(sum((beta - coef_pr_budget)^2))
print(paste("PR Budget L2:", pr_budget_L2))

## [1] "PR Budget L2: 4.66615731466751"

pr_target_L2 <- sqrt(sum((beta - coef_pr_target)^2))
print(paste("PR Target L2:", pr_target_L2))

## [1] "PR Target L2: 5.75815086276712"

You can use the summary function to get detailed statistics for your models:

summary(result_pr_budget)

## Summary of Parity Model
## ===================================================================
## 
## Parameterization Method: budget 
## Intercept: Included
## 
## Call:
## savvyPR(x = x, y = y, method = "budget", val = 0.05, intercept = TRUE)
## 
## Residuals:
##          0%         25%         50%         75%        100% 
## -56.4006880 -11.1821825  -0.1397933  11.0630792  49.0702638 
## 
## Coefficients:
##             Estimate Std. Error t value  Pr(>|t|)   2.5 %   97.5 %  Signif.
## (Intercept) 0.9537   0.4311     2.2122   0.0271     0.1087  1.7986  *      
## X1          -3.6182  0.5067     -7.141   1.4456e-12 -4.6113 -2.6251 ***    
## X2          3.7402   0.5496     6.8051   1.4603e-11 2.663   4.8175  ***    
## X3          -3.4817  0.5568     -6.2533  5.2442e-10 -4.573  -2.3904 ***    
## X4          4.1282   0.5586     7.3907   2.4295e-13 3.0334  5.223   ***    
## X5          -4.179   0.5571     -7.5015  1.0818e-13 -5.2709 -3.0871 ***    
## X6          5.1278   0.5496     9.3306   3.7012e-20 4.0507  6.2049  ***    
## X7          -4.9607  0.5629     -8.8133  3.3293e-18 -6.0639 -3.8575 ***    
## X8          6.0784   0.5583     10.8869  1.3089e-26 4.9841  7.1726  ***    
## X9          -5.9857  0.558      -10.7277 6.5395e-26 -7.0793 -4.8921 ***    
## X10         6.3625   0.5605     11.3513  1.0727e-28 5.2639  7.4611  ***    
## X11         -5.6978  0.5551     -10.2648 6.2712e-24 -6.7857 -4.6098 ***    
## X12         7.7738   0.5687     13.6702  3.7103e-40 6.6592  8.8883  ***    
## X13         -7.2884  0.5516     -13.2127 9.1855e-38 -8.3696 -6.2072 ***    
## X14         9.0898   0.5417     16.781   5.2817e-58 8.0281  10.1514 ***    
## X15         -8.7795  0.4922     -17.8359 1.2703e-64 -9.7443 -7.8147 ***    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.649 on 1484 degrees of freedom
## Multiple R-squared: 0.7824 , Adjusted R-squared: 0.7802 
## F-statistic: 355.6877 on 15 and 1484 DF,  p-value: 0.0000e+00 
## AIC: 8452.962 , BIC: 8537.973 , Deviance: 411348

summary(result_pr_target)

## Summary of Parity Model
## ===================================================================
## 
## Parameterization Method: target 
## Intercept: Included
## 
## Call:
## savvyPR(x = x, y = y, method = "target", val = 1, intercept = TRUE)
## 
## Residuals:
##          0%         25%         50%         75%        100% 
## -58.8840447 -11.3697571  -0.1744899  11.4321575  52.4471791 
## 
## Coefficients:
##             Estimate Std. Error t value  Pr(>|t|)   2.5 %    97.5 %  Signif.
## (Intercept) 1.0055   0.4526     2.2218   0.0264     0.1185   1.8925  *      
## X1          -4.1065  0.5319     -7.7203  2.1199e-14 -5.1491  -3.064  ***    
## X2          4.1299   0.577      7.1575   1.2874e-12 2.999    5.2607  ***    
## X3          -3.8592  0.5845     -6.6024  5.6176e-11 -5.0048  -2.7136 ***    
## X4          4.4271   0.5864     7.5497   7.5799e-14 3.2778   5.5764  ***    
## X5          -4.5044  0.5848     -7.7019  2.4347e-14 -5.6507  -3.3581 ***    
## X6          5.4132   0.5769     9.3826   2.3264e-20 4.2824   6.544   ***    
## X7          -5.294   0.5909     -8.9591  9.5785e-19 -6.4522  -4.1358 ***    
## X8          6.269    0.5861     10.6956  9.0206e-26 5.1202   7.4178  ***    
## X9          -6.2374  0.5858     -10.6483 1.4480e-25 -7.3855  -5.0893 ***    
## X10         6.5824   0.5884     11.1864  6.0176e-28 5.4291   7.7357  ***    
## X11         -5.9742  0.5827     -10.2521 7.0904e-24 -7.1163  -4.8321 ***    
## X12         7.9672   0.597      13.3455  1.8818e-38 6.7971   9.1373  ***    
## X13         -7.57    0.5791     -13.072  4.8569e-37 -8.705   -6.435  ***    
## X14         9.2653   0.5687     16.2934  4.9055e-55 8.1507   10.3798 ***    
## X15         -9.0948  0.5168     -17.5996 4.0662e-63 -10.1076 -8.0819 ***    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.4783 on 1484 degrees of freedom
## Multiple R-squared: 0.7602 , Adjusted R-squared: 0.7577 
## F-statistic: 313.5674 on 15 and 1484 DF,  p-value: 0.0000e+00 
## AIC: 8598.801 , BIC: 8683.813 , Deviance: 453350.6

The package also provides built-in plotting functions to visualize the coefficients and the risk parity optimization distributions.

# Plot the estimated coefficients
plot(result_pr_budget, plot_type = "estimated_coefficients", label = TRUE)

Four-panel visualization: The top two plots show estimated regression coefficients for budget and target methods. The bottom two plots display the risk parity distribution, including optimization weights and relative risk contributions across predictors.

plot(result_pr_target, plot_type = "estimated_coefficients", label = FALSE)

# Plot the risk contributions and weights/target variables
plot(result_pr_budget, plot_type = "risk_contributions", label = TRUE)

plot(result_pr_target, plot_type = "risk_contributions", label = FALSE)

Cross-Validation for Parity Regression Models

The cv.savvyPR function performs cross-validation to select optimal parameters. It handles both the “budget” sequence and the “target” sequence automatically.

# Cross-validation with Ridge
result_rr_cv <- cv.glmnet(x, y, alpha = 0, folds = 5)
fit_rr1 <- glmnet(x, y, alpha = 0, lambda = result_rr_cv$lambda.min)
coef_rr_cv <- coef(fit_rr1)[,1]

# Cross-validation with model type PR1 (Budget Method)
result_pr_cv1 <- cv.savvyPR(x, y, method = "budget", folds = 5, model_type = "PR1", measure_type = "mse")
coef_pr_cv1 <- coef(result_pr_cv1)

# Cross-validation with model type PR2 (Target Method)
result_pr_cv2 <- cv.savvyPR(x, y, method = "target", folds = 5, model_type = "PR2", measure_type = "mse")
coef_pr_cv2 <- coef(result_pr_cv2)

# Cross-validation with model type PR3 (Budget Method)
result_pr_cv3 <- cv.savvyPR(x, y, method = "budget", folds = 5, model_type = "PR3", measure_type = "mse")
coef_pr_cv3 <- coef(result_pr_cv3)

# Calculate the L2 distance 
print(paste("Ridge CV L2:", sqrt(sum((beta - coef_rr_cv)^2))))

## [1] "Ridge CV L2: 2.00624794773698"

print(paste("PR1 CV (Budget) L2:", sqrt(sum((beta - coef_pr_cv1)^2))))

## [1] "PR1 CV (Budget) L2: 2.14318103060067"

print(paste("PR2 CV (Target) L2:", sqrt(sum((beta - coef_pr_cv2)^2))))

## [1] "PR2 CV (Target) L2: 2.14032902099082"

print(paste("PR3 CV (Budget) L2:", sqrt(sum((beta - coef_pr_cv3)^2))))

## [1] "PR3 CV (Budget) L2: 1.93346984062272"

We can summarize the cross-validation results to see the optimal tuning values chosen by the algorithm.

summary(result_pr_cv1)

## Summary of Cross-Validated Parity Model
## ===================================================================
## 
## Parameterization Method: budget 
## Intercept: Included
## 
## Call:
## cv.savvyPR(x = x, y = y, method = "budget", folds = 5, model_type = "PR1", 
##     measure_type = "mse")
## 
## Residuals:
##          0%         25%         50%         75%        100% 
## -48.8472816 -10.3022551  -0.1662322  10.5688898  53.1109339 
## 
## Coefficients:
##             Estimate Std. Error t value  Pr(>|t|)   2.5 %   97.5 %  Signif.
## (Intercept) 0.7742   0.3937     1.9664   0.0494     0.0025  1.5459  *      
## X1          -1.2303  0.4628     -2.6585  0.0079     -2.1373 -0.3233 **     
## X2          2.54     0.502      5.0599   4.7171e-07 1.5561  3.5239  ***    
## X3          -1.6504  0.5085     -3.2456  0.0012     -2.6471 -0.6538 **     
## X4          3.4415   0.5102     6.7459   2.1724e-11 2.4416  4.4414  ***    
## X5          -2.8655  0.5088     -5.6317  2.1308e-08 -3.8627 -1.8682 ***    
## X6          4.4593   0.5019     8.8842   1.8198e-18 3.4756  5.4431  ***    
## X7          -3.5982  0.5141     -6.9992  3.8856e-12 -4.6058 -2.5906 ***    
## X8          5.795    0.5099     11.3643  9.3572e-29 4.7955  6.7944  ***    
## X9          -5.0856  0.5096     -9.9794  9.5962e-23 -6.0844 -4.0868 ***    
## X10         5.832    0.5119     11.3921  6.9752e-29 4.8286  6.8353  ***    
## X11         -4.6964  0.507      -9.2636  6.7118e-20 -5.69   -3.7028 ***    
## X12         7.3638   0.5194     14.178   6.9111e-43 6.3458  8.3817  ***    
## X13         -6.3285  0.5038     -12.5611 1.8277e-34 -7.3159 -5.341  ***    
## X14         8.7043   0.4947     17.5942  4.4039e-63 7.7346  9.6739  ***    
## X15         -7.7908  0.4496     -17.3292 2.0714e-61 -8.6719 -6.9096 ***    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.206 on 1484 degrees of freedom
## Multiple R-squared: 0.8185 , Adjusted R-squared: 0.8166 
## F-statistic: 446.0644 on 15 and 1484 DF,  p-value: 0.0000e+00 
## AIC: 8180.986 , BIC: 8265.997 , Deviance: 343134.4 
## 
## Cross-Validation Summary:
## Mean Cross-Validation Error ( mse: Mean-Squared Error ): 234.6694 
## Optimal Val: 0.00137 
## Fixed Lambda: 0

summary(result_pr_cv2)

## Summary of Cross-Validated Parity Model
## ===================================================================
## 
## Parameterization Method: target 
## Intercept: Included
## 
## Call:
## cv.savvyPR(x = x, y = y, method = "target", folds = 5, model_type = "PR2", 
##     measure_type = "mse")
## 
## Residuals:
##          0%         25%         50%         75%        100% 
## -49.3237027 -10.3323437  -0.2204857  10.6880448  54.3591593 
## 
## Coefficients:
##             Estimate Std. Error t value  Pr(>|t|)   2.5 %   97.5 %  Signif.
## (Intercept) 0.7652   0.3937     1.9437   0.0521     -0.0064 1.5368  .      
## X1          -0.9632  0.4627     -2.0817  0.0375     -1.8701 -0.0563 *      
## X2          2.5719   0.5019     5.124    3.3846e-07 1.5881  3.5556  ***    
## X3          -1.5083  0.5085     -2.9664  0.0031     -2.5049 -0.5117 **     
## X4          3.4476   0.5101     6.7586   1.9955e-11 2.4478  4.4474  ***    
## X5          -2.8125  0.5088     -5.5282  3.8176e-08 -3.8096 -1.8153 ***    
## X6          4.3976   0.5019     8.7622   5.1292e-18 3.4139  5.3813  ***    
## X7          -3.5431  0.514      -6.8928  8.0657e-12 -4.5506 -2.5356 ***    
## X8          5.7435   0.5099     11.2646  2.6647e-28 4.7442  6.7428  ***    
## X9          -5.0403  0.5096     -9.8916  2.1918e-22 -6.039  -4.0416 ***    
## X10         5.7582   0.5119     11.2493  3.1263e-28 4.755   6.7615  ***    
## X11         -4.6896  0.5069     -9.2511  7.4980e-20 -5.6831 -3.696  ***    
## X12         7.2691   0.5193     13.9973  6.6007e-42 6.2513  8.287   ***    
## X13         -6.2969  0.5038     -12.4998 3.6778e-34 -7.2842 -5.3095 ***    
## X14         8.5979   0.4947     17.3811  9.7716e-62 7.6284  9.5675  ***    
## X15         -7.6855  0.4495     -17.0969 5.8674e-60 -8.5666 -6.8045 ***    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.2044 on 1484 degrees of freedom
## Multiple R-squared: 0.8185 , Adjusted R-squared: 0.8167 
## F-statistic: 446.1797 on 15 and 1484 DF,  p-value: 0.0000e+00 
## AIC: 8180.668 , BIC: 8265.68 , Deviance: 343061.8 
## 
## Cross-Validation Summary:
## Mean Cross-Validation Error ( mse: Mean-Squared Error ): 234.9538 
## Optimal Val: 0 
## Fixed Lambda: 0.6251

summary(result_pr_cv3)

## Summary of Cross-Validated Parity Model
## ===================================================================
## 
## Parameterization Method: budget 
## Intercept: Included
## 
## Call:
## cv.savvyPR(x = x, y = y, method = "budget", folds = 5, model_type = "PR3", 
##     measure_type = "mse")
## 
## Residuals:
##          0%         25%         50%         75%        100% 
## -49.2544186 -10.2679987  -0.3142341  10.8672310  53.4768986 
## 
## Coefficients:
##             Estimate Std. Error t value  Pr(>|t|)   2.5 %   97.5 %  Signif.
## (Intercept) 0.7692   0.3939     1.9527   0.0510     -0.0028 1.5412  .      
## X1          -1.3201  0.4629     -2.8515  0.0044     -2.2275 -0.4127 **     
## X2          2.4447   0.5022     4.8681   1.2472e-06 1.4604  3.4289  ***    
## X3          -1.7604  0.5087     -3.4604  0.0006     -2.7575 -0.7633 ***    
## X4          3.3493   0.5104     6.5626   7.2886e-11 2.349   4.3496  ***    
## X5          -2.9135  0.509      -5.7238  1.2582e-08 -3.9111 -1.9159 ***    
## X6          4.336    0.5021     8.635    1.4890e-17 3.3518  5.3202  ***    
## X7          -3.6289  0.5143     -7.0562  2.6177e-12 -4.6369 -2.6209 ***    
## X8          5.6752   0.5101     11.125   1.1384e-27 4.6754  6.6751  ***    
## X9          -5.082   0.5098     -9.9684  1.0645e-22 -6.0813 -4.0828 ***    
## X10         5.7127   0.5121     11.1547  8.3632e-28 4.709   6.7165  ***    
## X11         -4.765   0.5072     -9.395   2.0806e-20 -5.759  -3.7709 ***    
## X12         7.1714   0.5196     13.8021  7.3703e-41 6.153   8.1898  ***    
## X13         -6.3301  0.504      -12.5593 1.8672e-34 -7.3179 -5.3422 ***    
## X14         8.5222   0.4949     17.2192  1.0125e-60 7.5522  9.4922  ***    
## X15         -7.6452  0.4498     -16.9985 2.3990e-59 -8.5267 -6.7637 ***    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.2122 on 1484 degrees of freedom
## Multiple R-squared: 0.8183 , Adjusted R-squared: 0.8165 
## F-statistic: 445.6237 on 15 and 1484 DF,  p-value: 0.0000e+00 
## AIC: 8182.199 , BIC: 8267.211 , Deviance: 343412.1 
## 
## Cross-Validation Summary:
## Mean Cross-Validation Error ( mse: Mean-Squared Error ): 234.3607 
## Fixed Val: 0.002051 
## Optimal Lambda: 0.03728

We can also visualize the cross-validated models:

# Plot coefficients and risk contributions for PR1
plot(result_pr_cv1, plot_type = "estimated_coefficients", label = TRUE)

Coefficient and risk contribution plots for cross-validated PR1 and PR2 models, illustrating the impact of optimal tuning on model parameters.

plot(result_pr_cv1, plot_type = "risk_contributions",label = TRUE)

Coefficient and risk contribution plots for cross-validated PR1 and PR2 models, illustrating the impact of optimal tuning on model parameters.

# Plot coefficients and risk contributions for PR2
plot(result_pr_cv2, plot_type = "estimated_coefficients", label = FALSE)

Coefficient and risk contribution plots for cross-validated PR1 and PR2 models, illustrating the impact of optimal tuning on model parameters.

# Cannot plot risk-contribution for PR2 since the tuning parameter val=0 is fixed.
#plot(result_pr_cv2, plot_type = "risk_contributions", label = FALSE)

We can visualize the cross-validation error curves to see exactly where the optimal minimum was found.

# Plot the cross-validation errors for each model
plot(result_rr_cv)

Cross-validation MSE curves for Ridge and PR models. Each plot shows mean squared error against the log of the tuning parameter, with vertical dashed lines marking the optimal values.

plot(result_pr_cv1, plot_type = "cv_errors", label = TRUE)

Cross-validation MSE curves for Ridge and PR models. Each plot shows mean squared error against the log of the tuning parameter, with vertical dashed lines marking the optimal values.

plot(result_pr_cv2, plot_type = "cv_errors")

Cross-validation MSE curves for Ridge and PR models. Each plot shows mean squared error against the log of the tuning parameter, with vertical dashed lines marking the optimal values.

plot(result_pr_cv3, plot_type = "cv_errors", label = FALSE)

Cross-validation MSE curves for Ridge and PR models. Each plot shows mean squared error against the log of the tuning parameter, with vertical dashed lines marking the optimal values.

Finally, we can plot the Coefficient Paths to see how the coefficients shrink or change as the tuning parameter varies. Notice that we use xvar = “val” to plot against the unified tuning parameter.

# Plot the coefficient paths for cross-validation models
plot(result_pr_cv1, plot_type = "cv_coefficients", xvar = "val", max_vars_per_plot = 10, label = TRUE)