ksformat Usage Examples

The ksformat package provides SAS PROC FORMAT-like functionality for R. This vignette walks through the most common use cases.

Example 1: Basic Discrete Formatting

Create a format for gender codes (auto-stored in library as “sex”):

fnew(
  "M" = "Male",
  "F" = "Female",
  .missing = "Unknown",
  .other = "Other Gender",
  name = "sex"
)
#> KS Format:sex
#> Type: character 
#> Mappings:
#>   M => Male
#>   F => Female
#>   .missing => Unknown
#>   .other => Other Gender

gender_codes <- c("M", "F", "M", NA, "X", "F")
formatted_genders <- fput(gender_codes, "sex")

data.frame(
  code = gender_codes,
  label = formatted_genders
)
#>   code        label
#> 1    M         Male
#> 2    F       Female
#> 3    M         Male
#> 4 <NA>      Unknown
#> 5    X Other Gender
#> 6    F       Female

fprint("sex")
#> KS Format:sex
#> Type: character 
#> Mappings:
#>   M => Male
#>   F => Female
#>   .missing => Unknown
#>   .other => Other Gender

Example 2: Numeric Range Formatting

Define formats in SAS-like text (auto-registered):

fparse(text = '
VALUE age (numeric)
  [0, 18)     = "Child"
  [18, 65)    = "Adult"
  [65, HIGH]  = "Senior"
  .missing    = "Age Unknown"
;
')
#> $age
#> KS Format:age
#> Type: numeric 
#> Mappings:
#>   [0, 18) => Child
#>   [18, 65) => Adult
#>   [65, HIGH] => Senior
#>   .missing => Age Unknown

ages <- c(5, 15.3, 17.9, 18, 45, 64.99, 65, 85, NA)
age_groups <- fputn(ages, "age")

data.frame(
  age = ages,
  group = age_groups
)
#>     age       group
#> 1  5.00       Child
#> 2 15.30       Child
#> 3 17.90       Child
#> 4 18.00       Adult
#> 5 45.00       Adult
#> 6 64.99       Adult
#> 7 65.00      Senior
#> 8 85.00      Senior
#> 9    NA Age Unknown

Example 3: Decimal Ranges (BMI Categories)

fparse(text = '
VALUE bmi (numeric)
  [0, 18.5)    = "Underweight"
  [18.5, 25)   = "Normal"
  [25, 30)     = "Overweight"
  [30, HIGH]   = "Obese"
  .missing     = "No data"
;
')
#> $bmi
#> KS Format:bmi
#> Type: numeric 
#> Mappings:
#>   [0, 18.5) => Underweight
#>   [18.5, 25) => Normal
#>   [25, 30) => Overweight
#>   [30, HIGH] => Obese
#>   .missing => No data

bmi_values <- c(16.2, 18.5, 22.7, 25, 29.9, 35.1, NA)
bmi_labels <- fputn(bmi_values, "bmi")

data.frame(
  bmi = bmi_values,
  category = bmi_labels
)
#>    bmi    category
#> 1 16.2 Underweight
#> 2 18.5      Normal
#> 3 22.7      Normal
#> 4 25.0  Overweight
#> 5 29.9  Overweight
#> 6 35.1       Obese
#> 7   NA     No data

Example 4: Exclusive/Inclusive Bounds

fparse(text = '
VALUE score (numeric)
  (0, 50]    = "Low"
  (50, 100]  = "High"
  .other     = "Out of range"
;
')
#> $score
#> KS Format:score
#> Type: numeric 
#> Mappings:
#>   (0, 50] => Low
#>   (50, 100] => High
#>   .other => Out of range

scores <- c(0, 1, 50, 51, 100, 101)
score_labels <- fputn(scores, "score")

data.frame(
  score = scores,
  label = score_labels
)
#>   score        label
#> 1     0 Out of range
#> 2     1          Low
#> 3    50          Low
#> 4    51         High
#> 5   100         High
#> 6   101 Out of range

Example 5: Reverse Formatting with Invalue

Invalues convert labels back to values. The default target_type is "numeric":

finput(
  "Male" = 1,
  "Female" = 2,
  name = "sex_inv"
)
#> KS Invalue: sex_inv 
#> Target Type: numeric 
#> Mappings:
#>   Male => 1
#>   Female => 2

labels <- c("Male", "Female", "Male", "Unknown", "Female")
codes <- finputn(labels, "sex_inv")

data.frame(
  label = labels,
  code = codes
)
#>     label code
#> 1    Male    1
#> 2  Female    2
#> 3    Male    1
#> 4 Unknown   NA
#> 5  Female    2

Example 6: Bidirectional Formatting

fnew_bid() creates both a format and an invalue at once:

status_bi <- fnew_bid(
  "A" = "Active",
  "I" = "Inactive",
  "P" = "Pending",
  name = "status"
)

# Forward: code -> label
status_codes <- c("A", "I", "P", "A")
status_labels <- fputc(status_codes, "status")
data.frame(code = status_codes, label = status_labels)
#>   code    label
#> 1    A   Active
#> 2    I Inactive
#> 3    P  Pending
#> 4    A   Active

# Reverse: label -> code
test_labels <- c("Active", "Pending", "Inactive")
test_codes <- finputc(test_labels, "status_inv")
data.frame(label = test_labels, code = test_codes)
#>      label code
#> 1   Active    A
#> 2  Pending    P
#> 3 Inactive    I

Example 7: Parse Multiple Formats from Text

fparse(text = '
// Study format definitions

VALUE race (character)
  "W" = "White"
  "B" = "Black"
  "A" = "Asian"
  .missing = "Unknown"
;

INVALUE race_inv
  "White" = 1
  "Black" = 2
  "Asian" = 3
;
')
#> $race
#> KS Format:race
#> Type: character 
#> Mappings:
#>   W => White
#>   B => Black
#>   A => Asian
#>   .missing => Unknown
#> 
#> $race_inv
#> KS Invalue: race_inv 
#> Target Type: numeric 
#> Mappings:
#>   White => 1
#>   Black => 2
#>   Asian => 3

fprint()
#> Registered formats:
#>   age - VALUE (numeric), 3 mapping(s)
#>   bmi - VALUE (numeric), 4 mapping(s)
#>   race - VALUE (character), 3 mapping(s)
#>   race_inv - INVALUE (numeric), 3 mapping(s)
#>   score - VALUE (numeric), 2 mapping(s)
#>   sex - VALUE (character), 2 mapping(s)
#>   sex_inv - INVALUE (numeric), 2 mapping(s)
#>   status - VALUE (character), 3 mapping(s)
#>   status_inv - INVALUE (character), 3 mapping(s)

Example 8: Export Formats Back to Text

bmi_fmt <- format_get("bmi")
cat(fexport(bmi = bmi_fmt))
#> VALUE bmi (numeric)
#>   [0, 18.5) = "Underweight"
#>   [18.5, 25) = "Normal"
#>   [25, 30) = "Overweight"
#>   [30, HIGH] = "Obese"
#>   .missing = "No data"
#> ;

Example 9: SAS-like PUT/INPUT Functions

# fputn — apply numeric format by name
fputn(c(5, 30, 70), "age")
#> [1] "Child"  "Adult"  "Senior"

# fputc — apply character format by name
fputc(c("M", "F"), "sex")
#> [1] "Male"   "Female"

# finputn — apply numeric invalue by name
finputn(c("White", "Black"), "race_inv")
#> [1] 1 2

Example 10: Data Frame Formatting

df <- data.frame(
  id = 1:6,
  sex = c("M", "F", "M", "F", NA, "X"),
  age = c(15, 25, 45, 70, 35, NA),
  stringsAsFactors = FALSE
)

sex_f <- format_get("sex")
age_f <- format_get("age")

df_formatted <- fput_df(
  df,
  sex = sex_f,
  age = age_f,
  suffix = "_label"
)

df_formatted
#>   id  sex age    sex_label   age_label
#> 1  1    M  15         Male       Child
#> 2  2    F  25       Female       Adult
#> 3  3    M  45         Male       Adult
#> 4  4    F  70       Female      Senior
#> 5  5 <NA>  35      Unknown       Adult
#> 6  6    X  NA Other Gender Age Unknown

Example 11: Missing Value Handling

# With .missing label
fput(c("M", "F", NA), "sex")
#> [1] "Male"    "Female"  "Unknown"

# With keep_na = TRUE
fput(c("M", "F", NA), sex_f, keep_na = TRUE)
#> [1] "Male"   "Female" NA

# is_missing() checks
is_missing(NA)
#> [1] TRUE
is_missing(NaN)
#> [1] TRUE
is_missing("")   # TRUE — empty strings are treated as missing
#> [1] TRUE

Example 12: Date/Time Formats (SAS-style)

SAS Date Formats

SAS date format names are auto-resolved — no pre-creation needed:

today <- Sys.Date()

data.frame(
  format = c("DATE9.", "MMDDYY10.", "DDMMYY10.", "YYMMDD10.",
             "MONYY7.", "WORDDATE.", "YEAR4.", "QTR."),
  result = c(
    fputn(today, "DATE9."),
    fputn(today, "MMDDYY10."),
    fputn(today, "DDMMYY10."),
    fputn(today, "YYMMDD10."),
    fputn(today, "MONYY7."),
    fputn(today, "WORDDATE."),
    fputn(today, "YEAR4."),
    fputn(today, "QTR.")
  )
)
#>      format         result
#> 1    DATE9.      21MAR2026
#> 2 MMDDYY10.     03/21/2026
#> 3 DDMMYY10.     21/03/2026
#> 4 YYMMDD10.     2026-03-21
#> 5   MONYY7.        MAR2026
#> 6 WORDDATE. March 21, 2026
#> 7    YEAR4.           2026
#> 8      QTR.              1

# Multiple dates
dates <- as.Date(c("2020-01-15", "2020-06-30", "2020-12-25"))
fputn(dates, "DATE9.")
#> [1] "15JAN2020" "30JUN2020" "25DEC2020"

R Numeric Dates (Days Since 1970-01-01)

r_days <- as.numeric(as.Date("2025-01-01"))
r_days
#> [1] 20089
fputn(r_days, "DATE9.")
#> [1] "01JAN2025"
fputn(r_days, "MMDDYY10.")
#> [1] "01/01/2025"

Time Formats

Time is represented as seconds since midnight:

seconds <- c(0, 3600, 45000, 86399)

data.frame(
  seconds = seconds,
  TIME8 = fputn(seconds, "TIME8."),
  TIME5 = fputn(seconds, "TIME5."),
  HHMM = fputn(seconds, "HHMM.")
)
#>   seconds    TIME8 TIME5  HHMM
#> 1       0  0:00:00  0:00 00:00
#> 2    3600  1:00:00  1:00 01:00
#> 3   45000 12:30:00 12:30 12:30
#> 4   86399 23:59:59 23:59 23:59

Datetime Formats

now <- Sys.time()

data.frame(
  format = c("DATETIME20.", "DATETIME13.", "DTDATE.", "DTYYMMDD."),
  result = c(
    fputn(now, "DATETIME20."),
    fputn(now, "DATETIME13."),
    fputn(now, "DTDATE."),
    fputn(now, "DTYYMMDD.")
  )
)
#>        format             result
#> 1 DATETIME20. 21MAR2026:14:00:09
#> 2 DATETIME13.      21MAR26:14:00
#> 3     DTDATE.          21MAR2026
#> 4   DTYYMMDD.         2026-03-21

# From numeric R-epoch seconds
r_secs <- as.numeric(as.POSIXct("2025-06-15 14:30:00", tz = "UTC"))
fputn(r_secs, "DATETIME20.")
#> [1] "15JUN2025:14:30:00"

Custom Date Formats with fnew_date()

# SAS-named format
fnew_date("DATE9.", name = "bday_fmt")
#> KS Format:bday_fmt
#> Type: date 
#> Pattern: %d%b%Y (DATE9.)
birthdays <- as.Date(c("1990-03-25", "1985-11-03", "2000-07-14"))
fput(birthdays, "bday_fmt")
#> [1] "25MAR1990" "03NOV1985" "14JUL2000"

# Custom strftime pattern (e.g. DD.MM.YYYY)
fnew_date("%d.%m.%Y", name = "ru_date", type = "date")
#> KS Format:ru_date
#> Type: date 
#> Pattern: %d.%m.%Y
fput(birthdays, "ru_date")
#> [1] "25.03.1990" "03.11.1985" "14.07.2000"

# Custom pattern with missing label
fnew_date("MMDDYY10.", name = "us_date", .missing = "NO DATE")
#> KS Format:us_date
#> Type: date 
#> Pattern: %m/%d/%Y (MMDDYY10.) 
#>   .missing => NO DATE
mixed <- c(as.Date("2025-01-01"), NA, as.Date("2025-12-31"))
fput(mixed, "us_date")
#> [1] "01/01/2025" "NO DATE"    "12/31/2025"

fprint("bday_fmt")
#> KS Format:bday_fmt
#> Type: date 
#> Pattern: %d%b%Y (DATE9.)

Date Formats in Data Frames

patients <- data.frame(
  id = 1:4,
  visit_date = as.Date(c("2025-01-10", "2025-02-15", "2025-03-20", NA)),
  stringsAsFactors = FALSE
)

visit_fmt <- fnew_date("DATE9.", name = "visit_fmt", .missing = "NOT RECORDED")
fput_df(patients, visit_date = visit_fmt)
#>   id visit_date visit_date_fmt
#> 1  1 2025-01-10      10JAN2025
#> 2  2 2025-02-15      15FEB2025
#> 3  3 2025-03-20      20MAR2025
#> 4  4       <NA>   NOT RECORDED

Parse Date Formats from Text

fparse(text = '
VALUE enrldt (date)
  pattern = "DATE9."
  .missing = "Not Enrolled"
;

VALUE visit_time (time)
  pattern = "TIME8."
;

VALUE stamp (datetime)
  pattern = "DATETIME20."
;
')
#> $enrldt
#> KS Format:enrldt
#> Type: date 
#> Pattern: %d%b%Y (DATE9.) 
#>   .missing => Not Enrolled
#> 
#> $visit_time
#> KS Format:visit_time
#> Type: time 
#> Pattern: %_H:%M:%S (TIME8.) 
#> 
#> $stamp
#> KS Format:stamp
#> Type: datetime 
#> Pattern: %d%b%Y:%H:%M:%S (DATETIME20.)

fput(as.Date("2025-03-01"), "enrldt")
#> [1] "01MAR2025"
fput(36000, "visit_time")
#> [1] "10:00:00"
fput(as.POSIXct("2025-03-01 10:00:00", tz = "UTC"), "stamp")
#> [1] "01MAR2025:10:00:00"

# Export back to text
enrl_obj <- format_get("enrldt")
cat(fexport(enrldt = enrl_obj))
#> VALUE enrldt (date)
#>   pattern = "DATE9."
#>   .missing = "Not Enrolled"
#> ;

fclear()
#> All formats cleared from library.

Example 13: Multilabel Formats

Overlapping Age Categories

With multilabel formats, a single value can match multiple labels:

fnew(
  "0,5,TRUE,TRUE"    = "Infant",
  "6,11,TRUE,TRUE"   = "Child",
  "12,17,TRUE,TRUE"  = "Adolescent",
  "0,17,TRUE,TRUE"   = "Pediatric",
  "18,64,TRUE,TRUE"  = "Adult",
  "65,Inf,TRUE,TRUE" = "Elderly",
  "18,Inf,TRUE,TRUE" = "Non-Pediatric",
  name = "age_categories",
  type = "numeric",
  multilabel = TRUE
)
#> KS Format:age_categories (multilabel)
#> Type: numeric 
#> Mappings:
#>   [0, 5] => Infant
#>   [6, 11] => Child
#>   [12, 17] => Adolescent
#>   [0, 17] => Pediatric
#>   [18, 64] => Adult
#>   [65, HIGH] => Elderly
#>   [18, HIGH] => Non-Pediatric

ages <- c(3, 14, 25, 70)

# fput returns first match only
fput(ages, "age_categories")
#> [1] "Infant"     "Adolescent" "Adult"      "Elderly"

# fput_all returns ALL matching labels
all_labels <- fput_all(ages, "age_categories")
for (i in seq_along(ages)) {
  cat("Age", ages[i], "->", paste(all_labels[[i]], collapse = ", "), "\n")
}
#> Age 3 -> Infant, Pediatric 
#> Age 14 -> Adolescent, Pediatric 
#> Age 25 -> Adult, Non-Pediatric 
#> Age 70 -> Elderly, Non-Pediatric

Multilabel with Missing Values

fnew(
  "0,100,TRUE,TRUE"   = "Valid Score",
  "0,49,TRUE,TRUE"    = "Below Average",
  "50,100,TRUE,TRUE"  = "Above Average",
  "90,100,TRUE,TRUE"  = "Excellent",
  .missing = "No Score",
  .other = "Out of Range",
  name = "score_ml",
  type = "numeric",
  multilabel = TRUE
)
#> KS Format:score_ml (multilabel)
#> Type: numeric 
#> Mappings:
#>   [0, 100] => Valid Score
#>   [0, 49] => Below Average
#>   [50, 100] => Above Average
#>   [90, 100] => Excellent
#>   .missing => No Score
#>   .other => Out of Range

scores <- c(95, 45, NA, 150)
ml_result <- fput_all(scores, "score_ml")

for (i in seq_along(scores)) {
  cat("Score", ifelse(is.na(scores[i]), "NA", scores[i]),
      "->", paste(ml_result[[i]], collapse = ", "), "\n")
}
#> Score 95 -> Valid Score, Above Average, Excellent 
#> Score 45 -> Valid Score, Below Average 
#> Score NA -> No Score 
#> Score 150 -> Out of Range

Parse Multilabel from Text

fparse(text = '
VALUE risk (numeric, multilabel)
  [0, 3]   = "Low Risk"
  [0, 7]   = "Monitored"
  (3, 7]   = "Medium Risk"
  (7, 10]  = "High Risk"
;
')
#> $risk
#> KS Format:risk (multilabel)
#> Type: numeric 
#> Mappings:
#>   [0, 3] => Low Risk
#>   [0, 7] => Monitored
#>   (3, 7] => Medium Risk
#>   (7, 10] => High Risk

risk_scores <- c(2, 5, 9)
risk_labels <- fput_all(risk_scores, "risk")
for (i in seq_along(risk_scores)) {
  cat("Score", risk_scores[i], "->",
      paste(risk_labels[[i]], collapse = " | "), "\n")
}
#> Score 2 -> Low Risk | Monitored 
#> Score 5 -> Monitored | Medium Risk 
#> Score 9 -> High Risk

Multilabel Export

risk_obj <- format_get("risk")
cat(fexport(risk = risk_obj))
#> VALUE risk (numeric, multilabel)
#>   [0, 3] = "Low Risk"
#>   [0, 7] = "Monitored"
#>   (3, 7] = "Medium Risk"
#>   (7, 10] = "High Risk"
#> ;

fprint("risk")
#> KS Format:risk (multilabel)
#> Type: numeric 
#> Mappings:
#>   [0, 3] => Low Risk
#>   [0, 7] => Monitored
#>   (3, 7] => Medium Risk
#>   (7, 10] => High Risk

Practical Example: Adverse Event Severity Grading

fnew(
  "1,1,TRUE,TRUE" = "Mild",
  "2,2,TRUE,TRUE" = "Moderate",
  "3,3,TRUE,TRUE" = "Severe",
  "4,4,TRUE,TRUE" = "Life-threatening",
  "5,5,TRUE,TRUE" = "Fatal",
  "3,5,TRUE,TRUE" = "Serious",
  "1,2,TRUE,TRUE" = "Non-serious",
  name = "ae_grade",
  type = "numeric",
  multilabel = TRUE
)
#> KS Format:ae_grade (multilabel)
#> Type: numeric 
#> Mappings:
#>   [1, 1] => Mild
#>   [2, 2] => Moderate
#>   [3, 3] => Severe
#>   [4, 4] => Life-threatening
#>   [5, 5] => Fatal
#>   [3, 5] => Serious
#>   [1, 2] => Non-serious

grades <- c(1, 2, 3, 4, 5)
ae_labels <- fput_all(grades, "ae_grade")
for (i in seq_along(grades)) {
  cat("Grade", grades[i], ":",
      paste(ae_labels[[i]], collapse = " + "), "\n")
}
#> Grade 1 : Mild + Non-serious 
#> Grade 2 : Moderate + Non-serious 
#> Grade 3 : Severe + Serious 
#> Grade 4 : Life-threatening + Serious 
#> Grade 5 : Fatal + Serious

fclear()
#> All formats cleared from library.

Example 14: Case-Insensitive Matching

sex_nc <- fnew(
  "M" = "Male",
  "F" = "Female",
  .missing = "Unknown",
  name = "sex_nc",
  type = "character",
  ignore_case = TRUE
)

input <- c("m", "F", "M", "f", NA)
fput(input, sex_nc)
#> [1] "Male"    "Female"  "Male"    "Female"  "Unknown"

# Note the [nocase] flag
fprint("sex_nc")
#> KS Format:sex_nc (nocase)
#> Type: character 
#> Mappings:
#>   M => Male
#>   F => Female
#>   .missing => Unknown

# Also works with fputc
fputc("m", "sex_nc")
#> [1] "Male"

fclear()
#> All formats cleared from library.

Example 15: Expression Labels in Formats

Expression labels contain .x1, .x2, etc., which reference extra arguments passed to fput(). This lets you compute labels dynamically.

Simple sprintf Expression

stat_fmt <- fnew(
  "n"   = "sprintf('%s', .x1)",
  "pct" = "sprintf('%.1f%%', .x1 * 100)",
  name = "stat",
  type = "character"
)

types  <- c("n",  "pct",  "n",   "pct")
values <- c(42,   0.053,  100,   0.255)

fput(types, stat_fmt, values)
#> [1] "42"    "5.3%"  "100"   "25.5%"

Two Extra Arguments (.x1, .x2)

ratio_fmt <- fnew(
  "ratio" = "sprintf('%s/%s', .x1, .x2)",
  name = "ratio",
  type = "character"
)

fput("ratio", ratio_fmt, 3, 10)
#> [1] "3/10"
fput(c("ratio", "ratio"), ratio_fmt, c(3, 7), c(10, 20))
#> [1] "3/10" "7/20"

ifelse Expression

sign_fmt <- fnew(
  "val" = "ifelse(.x1 > 0, paste0('+', .x1), as.character(.x1))",
  name = "sign",
  type = "character"
)

nums <- c(5, 0, -3)
fput(rep("val", 3), sign_fmt, nums)
#> [1] "+5" "0"  "-3"

Mixed Static and Expression Labels

mixed_fmt <- fnew(
  "header" = "HEADER",
  "n"      = "sprintf('N=%s', .x1)",
  "pct"    = "sprintf('%.1f%%', .x1 * 100)",
  name = "mixed",
  type = "character"
)

keys <- c("header", "n", "pct", "header", "n")
vals <- c(0,        42,  0.15,  0,        100)
fput(keys, mixed_fmt, vals)
#> [1] "HEADER" "N=42"   "15.0%"  "HEADER" "N=100"

Expression in .other Fallback

known_fmt <- fnew(
  "ok" = "OK",
  .other = "sprintf('Error(%s)', .x1)",
  name = "err_fmt",
  type = "character"
)

codes   <- c("ok", "E01", "ok", "E99")
details <- c("",   "timeout", "", "overflow")
fput(codes, known_fmt, details)
#> [1] "OK"              "Error(timeout)"  "OK"              "Error(overflow)"

Scalar Recycling

label_fmt <- fnew(
  "val" = "sprintf('%s (N=%s)', .x1, .x2)",
  name = "recycle",
  type = "character"
)

fput(c("val", "val"), label_fmt, c(42, 55), 100)
#> [1] "42 (N=100)" "55 (N=100)"

fclear()
#> All formats cleared from library.

Example 16: Vectorized Format Names (SAS PUTC-style)

Each element can use a different format, determined by a vector of format names:

# Dispatch format: maps type code to format name
fnew("1" = "groupx", "2" = "groupy", "3" = "groupz",
     name = "typefmt", type = "numeric")
#> KS Format:typefmt
#> Type: numeric 
#> Mappings:
#>   1 => groupx
#>   2 => groupy
#>   3 => groupz

# Per-group character formats
fnew("positive" = "agree",  "negative" = "disagree", "neutral" = "notsure",
     name = "groupx", type = "character")
#> KS Format:groupx
#> Type: character 
#> Mappings:
#>   positive => agree
#>   negative => disagree
#>   neutral => notsure
fnew("positive" = "accept", "negative" = "reject",   "neutral" = "possible",
     name = "groupy", type = "character")
#> KS Format:groupy
#> Type: character 
#> Mappings:
#>   positive => accept
#>   negative => reject
#>   neutral => possible
fnew("positive" = "pass",   "negative" = "fail",     "neutral" = "retest",
     name = "groupz", type = "character")
#> KS Format:groupz
#> Type: character 
#> Mappings:
#>   positive => pass
#>   negative => fail
#>   neutral => retest

type     <- c(1, 1, 1, 2, 2, 2, 3, 3, 3)
response <- c("positive", "negative", "neutral",
              "positive", "negative", "neutral",
              "positive", "negative", "neutral")

# Step 1: map type -> format name
respfmt <- fput(type, "typefmt")

# Step 2: apply per-element format
word <- fputc(response, respfmt)

data.frame(type = type, response = response, respfmt = respfmt, word = word)
#>   type response respfmt     word
#> 1    1 positive  groupx    agree
#> 2    1 negative  groupx disagree
#> 3    1  neutral  groupx  notsure
#> 4    2 positive  groupy   accept
#> 5    2 negative  groupy   reject
#> 6    2  neutral  groupy possible
#> 7    3 positive  groupz     pass
#> 8    3 negative  groupz     fail
#> 9    3  neutral  groupz   retest

fclear()
#> All formats cleared from library.

Example 17: Working with Dates and Formats — PUTN

A SAS-style workflow where format names are looked up dynamically per observation:

# Format that maps key codes to date format names
fnew("1" = "date9.", "2" = "mmddyy10.",
     name = "writfmt", type = "numeric")
#> KS Format:writfmt
#> Type: numeric 
#> Mappings:
#>   1 => date9.
#>   2 => mmddyy10.

fnew_date("date9.")
#> KS Format:DATE9.
#> Type: date 
#> Pattern: %d%b%Y (DATE9.)
fnew_date("mmddyy10.")
#> KS Format:MMDDYY10.
#> Type: date 
#> Pattern: %m/%d/%Y (MMDDYY10.)

# Input data (R date numbers = days since 1970-01-01)
number <- c(12103, 10899)
key    <- c(1, 2)

# Look up format name per observation
datefmt <- fputn(key, "writfmt")

# Apply per-element date format
date <- fputn(number, datefmt)

data.frame(number = number, key = key, datefmt = datefmt, date = date)
#>   number key   datefmt       date
#> 1  12103   1    date9.  20FEB2003
#> 2  10899   2 mmddyy10. 11/04/1999

fclear()
#> All formats cleared from library.

Example 18: Import SAS Formats from CNTLOUT CSV

The fimport() function reads a CSV file exported from a SAS format catalogue (PROC FORMAT ... CNTLOUT=):

csv_path <- system.file("extdata", "test_cntlout.csv", package = "ksformat")
imported <- fimport(csv_path)
#> Warning: Skipping PICTURE format: "PICFMT"
#> ℹ TYPE="P" is not supported by ksformat.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.A' (HLO='S') has no R equivalent.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.B' (HLO='S') has no R equivalent.
#> ✔ Imported 4 formats and 1 invalue from
#>   '/tmp/Rtmp9JzXuG/Rinstb07cb1087ad9b/ksformat/extdata/test_cntlout.csv'.
names(imported)
#> [1] "AGEGRP"   "BMICAT"   "GENDER"   "RACEIN"   "SMISSING"

fprint()
#> Registered formats:
#>   AGEGRP - VALUE (numeric), 3 mapping(s)
#>   BMICAT - VALUE (numeric), 4 mapping(s)
#>   GENDER - VALUE (character), 2 mapping(s)
#>   RACEIN - INVALUE (numeric), 3 mapping(s)
#>   SMISSING - VALUE (numeric), 1 mapping(s)

Use Imported Formats

# Character format (GENDER)
gender_codes <- c("M", "F", NA, "X")
data.frame(
  code = gender_codes,
  label = fputc(gender_codes, "GENDER")
)
#>   code   label
#> 1    M    Male
#> 2    F  Female
#> 3 <NA> Unknown
#> 4    X       X

# Numeric format (AGEGRP)
ages <- c(5, 17, 18, 45, 65, 100, NA, -1)
data.frame(
  age = ages,
  group = fputn(ages, "AGEGRP")
)
#>   age       group
#> 1   5       Child
#> 2  17       Child
#> 3  18       Adult
#> 4  45       Adult
#> 5  65      Senior
#> 6 100      Senior
#> 7  NA Missing Age
#> 8  -1       Other

# Numeric format (BMICAT)
bmi_values <- c(15.0, 18.5, 22.3, 25.0, 28.7, 30.0, 35.5)
data.frame(
  bmi = bmi_values,
  category = fputn(bmi_values, "BMICAT")
)
#>    bmi    category
#> 1 15.0 Underweight
#> 2 18.5      Normal
#> 3 22.3      Normal
#> 4 25.0  Overweight
#> 5 28.7  Overweight
#> 6 30.0       Obese
#> 7 35.5       Obese

# Invalue (RACEIN)
race_labels <- c("White", "Black", "Asian", "Other")
data.frame(
  label = race_labels,
  code = finputn(race_labels, "RACEIN")
)
#>   label code
#> 1 White    1
#> 2 Black    2
#> 3 Asian    3
#> 4 Other   NA

Apply to Data Frame

df <- data.frame(
  id = 1:5,
  sex = c("M", "F", "M", NA, "F"),
  age = c(10, 30, 70, NA, 50),
  stringsAsFactors = FALSE
)

gender_fmt <- imported[["GENDER"]]
age_fmt    <- imported[["AGEGRP"]]

fput_df(df, sex = gender_fmt, age = age_fmt, suffix = "_label")
#>   id  sex age sex_label   age_label
#> 1  1    M  10      Male       Child
#> 2  2    F  30    Female       Adult
#> 3  3    M  70      Male      Senior
#> 4  4 <NA>  NA   Unknown Missing Age
#> 5  5    F  50    Female       Adult

Export Imported Format

cat(fexport(AGEGRP = age_fmt))
#> VALUE AGEGRP (numeric)
#>   [0, 17] = "Child"
#>   [18, 64] = "Adult"
#>   [65, HIGH] = "Senior"
#>   .missing = "Missing Age"
#>   .other = "Other"
#> ;
cat(fexport(GENDER = gender_fmt))
#> VALUE GENDER (character)
#>   "M" = "Male"
#>   "F" = "Female"
#>   .missing = "Unknown"
#> ;

Selective Import (No Auto-register)

fclear()
#> All formats cleared from library.

manual <- fimport(csv_path, register = FALSE)
#> Warning: Skipping PICTURE format: "PICFMT"
#> ℹ TYPE="P" is not supported by ksformat.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.A' (HLO='S') has no R equivalent.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.B' (HLO='S') has no R equivalent.
#> ✔ Imported 4 formats and 1 invalue from
#>   '/tmp/Rtmp9JzXuG/Rinstb07cb1087ad9b/ksformat/extdata/test_cntlout.csv'.

# Library should be empty
fprint()
#> Format library is empty

# Use directly from returned list
fput(c("M", "F"), manual[["GENDER"]])
#> [1] "Male"   "Female"

fclear()
#> All formats cleared from library.