EFA, CFA, CB-SEM, and PLS-SEM syntax generation

In v0.3, surveyframe generates syntax for lavaan and seminr workflows. Model fitting remains in the researcher’s analysis script. The generated syntax should be reviewed, copied into the modelling package, and fitted with modelling choices appropriate to the study.

demo <- sframe_demo_data()
instr <- demo$instrument

EFA planning syntax

efa_syntax() creates a small R code block for estimating an exploratory factor solution with the optional psych package.

cat(efa_syntax(
  items = c("dm_1", "dm_2", "dm_3", "sq_1", "sq_2", "sq_3"),
  nfactors = 2,
  extraction = "minres",
  rotation = "oblimin"
))
#> # EFA syntax generated by surveyframe
#> rlang::check_installed("psych", reason = "to estimate an EFA solution")
#> efa_items <- data[, c("dm_1", "dm_2", "dm_3", "sq_1", "sq_2", "sq_3")]
#> psych::fa(efa_items, nfactors = 2, fm = "minres", rotate = "oblimin")

CFA syntax from scale definitions

The simplest CFA workflow uses scales already defined in the instrument.

cat(cfa_lavaan_syntax(instr, ordered = TRUE))
#> # lavaan CFA syntax generated by surveyframe
#> # Model: Tourism Services Experience Demo CFA
#> # Recommended fitting option: std.lv = TRUE
#> # Ordered-item option: pass ordered = c(...) to lavaan::cfa()
#> # Fit with lavaan only when lavaan is installed.
#> 
#> # Digital marketing effectiveness (reflective)
#> digital_marketing =~ dm_1 + dm_2 + dm_3
#> 
#> # Service quality (reflective)
#> service_quality =~ sq_1 + sq_2 + sq_3
#> 
#> # Sustainability perception (reflective)
#> sustainability =~ sus_1 + sus_2
#> 
#> # Tourist satisfaction (reflective)
#> satisfaction =~ sat_1 + sat_2
#> 
#> # Behavioural intention (reflective)
#> behavioural_intention =~ bi_1 + bi_2

cfa_syntax() remains available as a backward-compatible wrapper.

cat(cfa_syntax(instr))
#> # lavaan CFA syntax generated by surveyframe
#> # Model: Tourism Services Experience Demo CFA
#> # Recommended fitting option: std.lv = TRUE
#> # Fit with lavaan only when lavaan is installed.
#> 
#> # Digital marketing effectiveness (reflective)
#> digital_marketing =~ dm_1 + dm_2 + dm_3
#> 
#> # Service quality (reflective)
#> service_quality =~ sq_1 + sq_2 + sq_3
#> 
#> # Sustainability perception (reflective)
#> sustainability =~ sus_1 + sus_2
#> 
#> # Tourist satisfaction (reflective)
#> satisfaction =~ sat_1 + sat_2
#> 
#> # Behavioural intention (reflective)
#> behavioural_intention =~ bi_1 + bi_2

Construct objects

Construct objects record the construct ID, label, measurement mode, and indicator items.

dm <- sf_construct(
  id = "DM",
  label = "Digital marketing",
  items = c("dm_1", "dm_2", "dm_3"),
  mode = "reflective"
)

sq <- sf_construct(
  id = "SQ",
  label = "Service quality",
  items = c("sq_1", "sq_2", "sq_3"),
  mode = "reflective"
)

sat <- sf_construct(
  id = "SAT",
  label = "Satisfaction",
  items = c("sat_1", "sat_2"),
  mode = "reflective"
)

Construct modes include reflective, composite, formative, and single_item. Lavaan syntax generation in v0.3 is intended for reflective measurement models. PLS-SEM syntax can use composite-style constructs.

CB-SEM lavaan syntax

sf_model() stores measurement and structural paths. sem_lavaan_syntax() then generates lavaan syntax without requiring lavaan to be installed.

sem_model <- sf_model(
  id = "tourism_sem",
  label = "Tourism structural model",
  type = "cb_sem",
  constructs = list(dm, sq, sat),
  paths = list(
    sf_path("DM", "SQ", label = "a"),
    sf_path("SQ", "SAT", label = "b"),
    sf_path("DM", "SAT", label = "c_prime")
  ),
  indirect = list(
    sf_indirect("DM", "SQ", "SAT", label = "indirect_DM_SQ_SAT")
  ),
  options = list(estimator = "MLR", missing = "fiml", standardised = TRUE)
)

validate_model(sem_model, instr)

cat(sem_lavaan_syntax(sem_model, instr))
#> # lavaan CB-SEM syntax generated by surveyframe
#> # Model: Tourism structural model
#> # Recommended summary option: standardized = TRUE
#> # Estimator: MLR
#> # Missing data method: fiml
#> 
#> DM =~ dm_1 + dm_2 + dm_3
#> SQ =~ sq_1 + sq_2 + sq_3
#> SAT =~ sat_1 + sat_2
#> 
#> # Structural paths
#> SAT ~ b*SQ + c_prime*DM
#> SQ ~ a*DM
#> 
#> # Indirect and total effects
#> indirect_DM_SQ_SAT := a*b
#> total_DM_SAT := c_prime + indirect_DM_SQ_SAT

PLS-SEM seminr syntax

For PLS-SEM planning, use composite constructs and seminr_syntax().

pls_model <- sf_model(
  id = "tourism_pls",
  label = "Tourism PLS model",
  type = "pls_sem",
  constructs = list(
    sf_construct("DM", "Digital marketing", c("dm_1", "dm_2", "dm_3"), mode = "composite"),
    sf_construct("SQ", "Service quality", c("sq_1", "sq_2", "sq_3"), mode = "composite"),
    sf_construct("SAT", "Satisfaction", c("sat_1", "sat_2"), mode = "composite")
  ),
  paths = list(
    sf_path("DM", "SQ"),
    sf_path("SQ", "SAT"),
    sf_path("DM", "SAT")
  ),
  options = list(bootstrap = 5000)
)

cat(seminr_syntax(pls_model))
#> # seminr PLS-SEM syntax generated by surveyframe
#> rlang::check_installed("seminr", reason = "to fit PLS-SEM models")
#> measurement_model <- constructs(
#>   composite("DM", multi_items("dm_", 1:3)),
#>   composite("SQ", multi_items("sq_", 1:3)),
#>   composite("SAT", multi_items("sat_", 1:2))
#> )
#> 
#> structural_model <- relationships(
#>   paths(from = "DM", to = c("SQ", "SAT")),
#>   paths(from = "SQ", to = c("SAT"))
#> )
#> 
#> pls_model <- estimate_pls(
#>   data = data,
#>   measurement_model = measurement_model,
#>   structural_model = structural_model
#> )
#> 
#> boot_model <- bootstrap_model(
#>   seminr_model = pls_model,
#>   nboot = 5000,
#>   cores = 1,
#>   seed = 123
#> )
#> 
#> reliability(pls_model)
#> ave(pls_model)
#> htmt(pls_model)

Model JSON

Store model specifications in .sframe files and export them as JSON.

model_json(sem_model)
#> {
#>   "id": "tourism_sem",
#>   "label": "Tourism structural model",
#>   "type": "cb_sem",
#>   "engine": "lavaan",
#>   "measurement": {
#>     "constructs": [
#>       {
#>         "id": "DM",
#>         "label": "Digital marketing",
#>         "mode": "reflective",
#>         "items": ["dm_1", "dm_2", "dm_3"],
#>         "weights": null
#>       },
#>       {
#>         "id": "SQ",
#>         "label": "Service quality",
#>         "mode": "reflective",
#>         "items": ["sq_1", "sq_2", "sq_3"],
#>         "weights": null
#>       },
#>       {
#>         "id": "SAT",
#>         "label": "Satisfaction",
#>         "mode": "reflective",
#>         "items": ["sat_1", "sat_2"],
#>         "weights": null
#>       }
#>     ]
#>   },
#>   "structural": {
#>     "paths": [
#>       {
#>         "from": "DM",
#>         "to": "SQ",
#>         "label": "a"
#>       },
#>       {
#>         "from": "SQ",
#>         "to": "SAT",
#>         "label": "b"
#>       },
#>       {
#>         "from": "DM",
#>         "to": "SAT",
#>         "label": "c_prime"
#>       }
#>     ],
#>     "covariances": [],
#>     "indirect": [
#>       {
#>         "from": "DM",
#>         "through": "SQ",
#>         "to": "SAT",
#>         "label": "indirect_DM_SQ_SAT"
#>       }
#>     ]
#>   },
#>   "options": {
#>     "estimator": "MLR",
#>     "missing": "fiml",
#>     "standardised": true
#>   }
#> }

Where syntax goes next

The generated lavaan syntax should be copied into lavaan::cfa() or lavaan::sem() after selecting estimators, ordered item handling, missing-data treatment, and reporting options. The generated seminr syntax should be copied into an analysis script where seminr is installed and the researcher has chosen bootstrapping settings and construct modes.