## Warning: package 'ggplot2' was built under R version 4.4.3
This vignette demonstrates how to use the savvyPR
package for parity regression model estimation and cross-validation. The
package handles multicollinearity by applying risk parity constraints,
supporting both Budget-based and Target-based parameterizations.
# Install the development version from GitHub
# devtools::install_github("Ziwei-ChenChen/savvyPR)
library(savvyPR)
library(MASS)
library(glmnet)## Warning: package 'glmnet' was built under R version 4.4.3
## Loading required package: Matrix
## Loaded glmnet 4.1-10
The savvyPR function is used to fit a parity regression
model. Here we demonstrate its usage with synthetic, highly correlated
data.
library(MASS)
library(glmnet)
# Function to create a correlation matrix for X
create_corr_matrix <- function(rho, p) {
corr_matrix <- diag(1, p)
for (i in 2:p) {
for (j in 1:(i-1)) {
corr_matrix[i, j] <- rho^(abs(i - j))
corr_matrix[j, i] <- corr_matrix[i, j] # symmetric matrix
}
}
return(corr_matrix)
}
# Function to generate beta values with both positive and negative signs
generate_beta <- function(p) {
half_p <- ceiling(p / 2)
beta <- rep(c(1, -1), length.out = p) * rep(1:half_p, each = 2)[1:p]
return(beta)
}
set.seed(123)
n <- 1500
p <- 15
rho <- -0.5
corr_matrix <- create_corr_matrix(rho, p)
x <- mvrnorm(n = n, mu = rep(0, p), Sigma = corr_matrix)
beta <- generate_beta(p + 1)
sigma_vec <- abs(rnorm(n = n, mean = 15, sd = sqrt(1)))
y <- rnorm(n, mean = as.vector(cbind(1,x)%*%beta), sd = sigma_vec)
# 1. Run OLS estimation with intercept
result_ols <- lm(y ~ x)
coef_ols <- coef(result_ols)
# 2. Run Ridge Regression (RR) estimation
result_RR <- glmnet(x, y, alpha = 0, lambda = 1)
coef_RR <- coef(result_RR)
# 3. Run PR estimation (Budget Method)
result_pr_budget <- savvyPR(x, y, method = "budget", val = 0.05, intercept = TRUE)
print(result_pr_budget)##
## Call: savvyPR(x = x, y = y, method = "budget", val = 0.05, intercept = TRUE)
##
## Method Number of Non-Zero Coefficients Intercept Included Lambda Value
## budget 16 Yes NA
##
## Coefficients:
## Coefficient Estimate
## (Intercept) 0.9537
## X1 -3.6182
## X2 3.7402
## X3 -3.4817
## X4 4.1282
## X5 -4.1790
## X6 5.1278
## X7 -4.9607
## X8 6.0784
## X9 -5.9857
## X10 6.3625
## X11 -5.6978
## X12 7.7738
## X13 -7.2884
## X14 9.0898
## X15 -8.7795
coef_pr_budget <- coef(result_pr_budget)
# 4. Run PR estimation (Target Method)
result_pr_target <- savvyPR(x, y, method = "target", val = 1, intercept = TRUE)
print(result_pr_target)##
## Call: savvyPR(x = x, y = y, method = "target", val = 1, intercept = TRUE)
##
## Method Number of Non-Zero Coefficients Intercept Included Lambda Value
## target 16 Yes NA
##
## Coefficients:
## Coefficient Estimate
## (Intercept) 1.0055
## X1 -4.1065
## X2 4.1299
## X3 -3.8592
## X4 4.4271
## X5 -4.5044
## X6 5.4132
## X7 -5.2940
## X8 6.2690
## X9 -6.2374
## X10 6.5824
## X11 -5.9742
## X12 7.9672
## X13 -7.5700
## X14 9.2653
## X15 -9.0948
coef_pr_target <- coef(result_pr_target)
# Calculate the L2 distance to true beta
ols_L2 <- sqrt(sum((beta - coef_ols)^2))
print(paste("OLS L2:", ols_L2))## [1] "OLS L2: 2.24654287653342"
## [1] "Ridge L2: 2.08824054155074"
## [1] "PR Budget L2: 4.66615731466751"
## [1] "PR Target L2: 5.75815086276712"
You can use the summary function to get detailed
statistics for your models:
## Summary of Parity Model
## ===================================================================
##
## Parameterization Method: budget
## Intercept: Included
##
## Call:
## savvyPR(x = x, y = y, method = "budget", val = 0.05, intercept = TRUE)
##
## Residuals:
## 0% 25% 50% 75% 100%
## -56.4006880 -11.1821825 -0.1397933 11.0630792 49.0702638
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|) 2.5 % 97.5 % Signif.
## (Intercept) 0.9537 0.4311 2.2122 0.0271 0.1087 1.7986 *
## X1 -3.6182 0.5067 -7.141 1.4456e-12 -4.6113 -2.6251 ***
## X2 3.7402 0.5496 6.8051 1.4603e-11 2.663 4.8175 ***
## X3 -3.4817 0.5568 -6.2533 5.2442e-10 -4.573 -2.3904 ***
## X4 4.1282 0.5586 7.3907 2.4295e-13 3.0334 5.223 ***
## X5 -4.179 0.5571 -7.5015 1.0818e-13 -5.2709 -3.0871 ***
## X6 5.1278 0.5496 9.3306 3.7012e-20 4.0507 6.2049 ***
## X7 -4.9607 0.5629 -8.8133 3.3293e-18 -6.0639 -3.8575 ***
## X8 6.0784 0.5583 10.8869 1.3089e-26 4.9841 7.1726 ***
## X9 -5.9857 0.558 -10.7277 6.5395e-26 -7.0793 -4.8921 ***
## X10 6.3625 0.5605 11.3513 1.0727e-28 5.2639 7.4611 ***
## X11 -5.6978 0.5551 -10.2648 6.2712e-24 -6.7857 -4.6098 ***
## X12 7.7738 0.5687 13.6702 3.7103e-40 6.6592 8.8883 ***
## X13 -7.2884 0.5516 -13.2127 9.1855e-38 -8.3696 -6.2072 ***
## X14 9.0898 0.5417 16.781 5.2817e-58 8.0281 10.1514 ***
## X15 -8.7795 0.4922 -17.8359 1.2703e-64 -9.7443 -7.8147 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.649 on 1484 degrees of freedom
## Multiple R-squared: 0.7824 , Adjusted R-squared: 0.7802
## F-statistic: 355.6877 on 15 and 1484 DF, p-value: 0.0000e+00
## AIC: 8452.962 , BIC: 8537.973 , Deviance: 411348
## Summary of Parity Model
## ===================================================================
##
## Parameterization Method: target
## Intercept: Included
##
## Call:
## savvyPR(x = x, y = y, method = "target", val = 1, intercept = TRUE)
##
## Residuals:
## 0% 25% 50% 75% 100%
## -58.8840447 -11.3697571 -0.1744899 11.4321575 52.4471791
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|) 2.5 % 97.5 % Signif.
## (Intercept) 1.0055 0.4526 2.2218 0.0264 0.1185 1.8925 *
## X1 -4.1065 0.5319 -7.7203 2.1199e-14 -5.1491 -3.064 ***
## X2 4.1299 0.577 7.1575 1.2874e-12 2.999 5.2607 ***
## X3 -3.8592 0.5845 -6.6024 5.6176e-11 -5.0048 -2.7136 ***
## X4 4.4271 0.5864 7.5497 7.5799e-14 3.2778 5.5764 ***
## X5 -4.5044 0.5848 -7.7019 2.4347e-14 -5.6507 -3.3581 ***
## X6 5.4132 0.5769 9.3826 2.3264e-20 4.2824 6.544 ***
## X7 -5.294 0.5909 -8.9591 9.5785e-19 -6.4522 -4.1358 ***
## X8 6.269 0.5861 10.6956 9.0206e-26 5.1202 7.4178 ***
## X9 -6.2374 0.5858 -10.6483 1.4480e-25 -7.3855 -5.0893 ***
## X10 6.5824 0.5884 11.1864 6.0176e-28 5.4291 7.7357 ***
## X11 -5.9742 0.5827 -10.2521 7.0904e-24 -7.1163 -4.8321 ***
## X12 7.9672 0.597 13.3455 1.8818e-38 6.7971 9.1373 ***
## X13 -7.57 0.5791 -13.072 4.8569e-37 -8.705 -6.435 ***
## X14 9.2653 0.5687 16.2934 4.9055e-55 8.1507 10.3798 ***
## X15 -9.0948 0.5168 -17.5996 4.0662e-63 -10.1076 -8.0819 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.4783 on 1484 degrees of freedom
## Multiple R-squared: 0.7602 , Adjusted R-squared: 0.7577
## F-statistic: 313.5674 on 15 and 1484 DF, p-value: 0.0000e+00
## AIC: 8598.801 , BIC: 8683.813 , Deviance: 453350.6
The package also provides built-in plotting functions to visualize the coefficients and the risk parity optimization distributions.
# Plot the estimated coefficients
plot(result_pr_budget, plot_type = "estimated_coefficients", label = TRUE)# Plot the risk contributions and weights/target variables
plot(result_pr_budget, plot_type = "risk_contributions", label = TRUE)The cv.savvyPR function performs cross-validation to
select optimal parameters. It handles both the “budget” sequence and the
“target” sequence automatically.
# Cross-validation with Ridge
result_rr_cv <- cv.glmnet(x, y, alpha = 0, folds = 5)
fit_rr1 <- glmnet(x, y, alpha = 0, lambda = result_rr_cv$lambda.min)
coef_rr_cv <- coef(fit_rr1)[,1]
# Cross-validation with model type PR1 (Budget Method)
result_pr_cv1 <- cv.savvyPR(x, y, method = "budget", folds = 5, model_type = "PR1", measure_type = "mse")
coef_pr_cv1 <- coef(result_pr_cv1)
# Cross-validation with model type PR2 (Target Method)
result_pr_cv2 <- cv.savvyPR(x, y, method = "target", folds = 5, model_type = "PR2", measure_type = "mse")
coef_pr_cv2 <- coef(result_pr_cv2)
# Cross-validation with model type PR3 (Budget Method)
result_pr_cv3 <- cv.savvyPR(x, y, method = "budget", folds = 5, model_type = "PR3", measure_type = "mse")
coef_pr_cv3 <- coef(result_pr_cv3)
# Calculate the L2 distance
print(paste("Ridge CV L2:", sqrt(sum((beta - coef_rr_cv)^2))))## [1] "Ridge CV L2: 2.00624794773698"
## [1] "PR1 CV (Budget) L2: 2.14318103060067"
## [1] "PR2 CV (Target) L2: 2.14032902099082"
## [1] "PR3 CV (Budget) L2: 1.93346984062272"
We can summarize the cross-validation results to see the optimal tuning values chosen by the algorithm.
## Summary of Cross-Validated Parity Model
## ===================================================================
##
## Parameterization Method: budget
## Intercept: Included
##
## Call:
## cv.savvyPR(x = x, y = y, method = "budget", folds = 5, model_type = "PR1",
## measure_type = "mse")
##
## Residuals:
## 0% 25% 50% 75% 100%
## -48.8472816 -10.3022551 -0.1662322 10.5688898 53.1109339
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|) 2.5 % 97.5 % Signif.
## (Intercept) 0.7742 0.3937 1.9664 0.0494 0.0025 1.5459 *
## X1 -1.2303 0.4628 -2.6585 0.0079 -2.1373 -0.3233 **
## X2 2.54 0.502 5.0599 4.7171e-07 1.5561 3.5239 ***
## X3 -1.6504 0.5085 -3.2456 0.0012 -2.6471 -0.6538 **
## X4 3.4415 0.5102 6.7459 2.1724e-11 2.4416 4.4414 ***
## X5 -2.8655 0.5088 -5.6317 2.1308e-08 -3.8627 -1.8682 ***
## X6 4.4593 0.5019 8.8842 1.8198e-18 3.4756 5.4431 ***
## X7 -3.5982 0.5141 -6.9992 3.8856e-12 -4.6058 -2.5906 ***
## X8 5.795 0.5099 11.3643 9.3572e-29 4.7955 6.7944 ***
## X9 -5.0856 0.5096 -9.9794 9.5962e-23 -6.0844 -4.0868 ***
## X10 5.832 0.5119 11.3921 6.9752e-29 4.8286 6.8353 ***
## X11 -4.6964 0.507 -9.2636 6.7118e-20 -5.69 -3.7028 ***
## X12 7.3638 0.5194 14.178 6.9111e-43 6.3458 8.3817 ***
## X13 -6.3285 0.5038 -12.5611 1.8277e-34 -7.3159 -5.341 ***
## X14 8.7043 0.4947 17.5942 4.4039e-63 7.7346 9.6739 ***
## X15 -7.7908 0.4496 -17.3292 2.0714e-61 -8.6719 -6.9096 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.206 on 1484 degrees of freedom
## Multiple R-squared: 0.8185 , Adjusted R-squared: 0.8166
## F-statistic: 446.0644 on 15 and 1484 DF, p-value: 0.0000e+00
## AIC: 8180.986 , BIC: 8265.997 , Deviance: 343134.4
##
## Cross-Validation Summary:
## Mean Cross-Validation Error ( mse: Mean-Squared Error ): 234.6694
## Optimal Val: 0.00137
## Fixed Lambda: 0
## Summary of Cross-Validated Parity Model
## ===================================================================
##
## Parameterization Method: target
## Intercept: Included
##
## Call:
## cv.savvyPR(x = x, y = y, method = "target", folds = 5, model_type = "PR2",
## measure_type = "mse")
##
## Residuals:
## 0% 25% 50% 75% 100%
## -49.3237027 -10.3323437 -0.2204857 10.6880448 54.3591593
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|) 2.5 % 97.5 % Signif.
## (Intercept) 0.7652 0.3937 1.9437 0.0521 -0.0064 1.5368 .
## X1 -0.9632 0.4627 -2.0817 0.0375 -1.8701 -0.0563 *
## X2 2.5719 0.5019 5.124 3.3846e-07 1.5881 3.5556 ***
## X3 -1.5083 0.5085 -2.9664 0.0031 -2.5049 -0.5117 **
## X4 3.4476 0.5101 6.7586 1.9955e-11 2.4478 4.4474 ***
## X5 -2.8125 0.5088 -5.5282 3.8176e-08 -3.8096 -1.8153 ***
## X6 4.3976 0.5019 8.7622 5.1292e-18 3.4139 5.3813 ***
## X7 -3.5431 0.514 -6.8928 8.0657e-12 -4.5506 -2.5356 ***
## X8 5.7435 0.5099 11.2646 2.6647e-28 4.7442 6.7428 ***
## X9 -5.0403 0.5096 -9.8916 2.1918e-22 -6.039 -4.0416 ***
## X10 5.7582 0.5119 11.2493 3.1263e-28 4.755 6.7615 ***
## X11 -4.6896 0.5069 -9.2511 7.4980e-20 -5.6831 -3.696 ***
## X12 7.2691 0.5193 13.9973 6.6007e-42 6.2513 8.287 ***
## X13 -6.2969 0.5038 -12.4998 3.6778e-34 -7.2842 -5.3095 ***
## X14 8.5979 0.4947 17.3811 9.7716e-62 7.6284 9.5675 ***
## X15 -7.6855 0.4495 -17.0969 5.8674e-60 -8.5666 -6.8045 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.2044 on 1484 degrees of freedom
## Multiple R-squared: 0.8185 , Adjusted R-squared: 0.8167
## F-statistic: 446.1797 on 15 and 1484 DF, p-value: 0.0000e+00
## AIC: 8180.668 , BIC: 8265.68 , Deviance: 343061.8
##
## Cross-Validation Summary:
## Mean Cross-Validation Error ( mse: Mean-Squared Error ): 234.9538
## Optimal Val: 0
## Fixed Lambda: 0.6251
## Summary of Cross-Validated Parity Model
## ===================================================================
##
## Parameterization Method: budget
## Intercept: Included
##
## Call:
## cv.savvyPR(x = x, y = y, method = "budget", folds = 5, model_type = "PR3",
## measure_type = "mse")
##
## Residuals:
## 0% 25% 50% 75% 100%
## -49.2544186 -10.2679987 -0.3142341 10.8672310 53.4768986
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|) 2.5 % 97.5 % Signif.
## (Intercept) 0.7692 0.3939 1.9527 0.0510 -0.0028 1.5412 .
## X1 -1.3201 0.4629 -2.8515 0.0044 -2.2275 -0.4127 **
## X2 2.4447 0.5022 4.8681 1.2472e-06 1.4604 3.4289 ***
## X3 -1.7604 0.5087 -3.4604 0.0006 -2.7575 -0.7633 ***
## X4 3.3493 0.5104 6.5626 7.2886e-11 2.349 4.3496 ***
## X5 -2.9135 0.509 -5.7238 1.2582e-08 -3.9111 -1.9159 ***
## X6 4.336 0.5021 8.635 1.4890e-17 3.3518 5.3202 ***
## X7 -3.6289 0.5143 -7.0562 2.6177e-12 -4.6369 -2.6209 ***
## X8 5.6752 0.5101 11.125 1.1384e-27 4.6754 6.6751 ***
## X9 -5.082 0.5098 -9.9684 1.0645e-22 -6.0813 -4.0828 ***
## X10 5.7127 0.5121 11.1547 8.3632e-28 4.709 6.7165 ***
## X11 -4.765 0.5072 -9.395 2.0806e-20 -5.759 -3.7709 ***
## X12 7.1714 0.5196 13.8021 7.3703e-41 6.153 8.1898 ***
## X13 -6.3301 0.504 -12.5593 1.8672e-34 -7.3179 -5.3422 ***
## X14 8.5222 0.4949 17.2192 1.0125e-60 7.5522 9.4922 ***
## X15 -7.6452 0.4498 -16.9985 2.3990e-59 -8.5267 -6.7637 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.2122 on 1484 degrees of freedom
## Multiple R-squared: 0.8183 , Adjusted R-squared: 0.8165
## F-statistic: 445.6237 on 15 and 1484 DF, p-value: 0.0000e+00
## AIC: 8182.199 , BIC: 8267.211 , Deviance: 343412.1
##
## Cross-Validation Summary:
## Mean Cross-Validation Error ( mse: Mean-Squared Error ): 234.3607
## Fixed Val: 0.002051
## Optimal Lambda: 0.03728
We can also visualize the cross-validated models:
# Plot coefficients and risk contributions for PR1
plot(result_pr_cv1, plot_type = "estimated_coefficients", label = TRUE)# Plot coefficients and risk contributions for PR2
plot(result_pr_cv2, plot_type = "estimated_coefficients", label = FALSE)# Cannot plot risk-contribution for PR2 since the tuning parameter val=0 is fixed.
#plot(result_pr_cv2, plot_type = "risk_contributions", label = FALSE)We can visualize the cross-validation error curves to see exactly where the optimal minimum was found.
Finally, we can plot the Coefficient Paths to see how the coefficients shrink or change as the tuning parameter varies. Notice that we use xvar = “val” to plot against the unified tuning parameter.
# Plot the coefficient paths for cross-validation models
plot(result_pr_cv1, plot_type = "cv_coefficients", xvar = "val", max_vars_per_plot = 10, label = TRUE)# Show what happens when max_vars_per_plot exceeds the limit (will trigger a warning and reset to 10)
plot(result_pr_cv2, plot_type = "cv_coefficients", xvar = "norm", max_vars_per_plot = 12, label = FALSE)## Warning in plotCVCoef(result_list = x, label = label, xvar = xvar,
## max_vars_per_plot = max_vars_per_plot, : max_vars_per_plot cannot exceed 10.
## Setting max_vars_per_plot to 10.
# PR3 uses dual-optimization, so we can plot against lambda as well
plot(result_pr_cv3, plot_type = "cv_coefficients", xvar = "norm", max_vars_per_plot = 10, label = TRUE)plot(result_pr_cv3, plot_type = "cv_coefficients", xvar = "lambda", max_vars_per_plot = 10, label = TRUE)plot(result_pr_cv3, plot_type = "cv_coefficients", xvar = "dev", max_vars_per_plot = 10, label = TRUE)This vignette has provided an overview of the main functionalities of
the savvyPR package, covering both the Budget-based and
Target-based risk parity constraints. For more details, refer to the
function documentation.