/* Central JMP Life Sciences Macros used for the following Predictive Modeling Analytical Processes: Discriminant Analysis Distance Scoring GLM Select K Nearest Neighbors Logistic Regression Partial Least Squares Partition Trees Radial Basis Machine Survival Predictive Modeling */ %macro PredictiveAnalysis(_outdata,_outdataname,_outdatavars,_outdatavarsname,_jslfile,_jslfilename); %if %symexist(exiterror) %then %do; %if &exiterror %then %goto exit; %end; %let exiterror = 0; * create libname for input data set; %PathName(&InData); %let trainpath=&TmpPath; %let InData = %trim(&TmpName); libname InLib "&TmpPath"; %if &exiterror %then %do; %put ERROR: Input dataset specification is invalid.; %goto exit; %end; * check for incompatible options; %if %symexist(Mode) %then %do; %if &Mode = Interactive and %index(&VarSelect,Genetic) %then %do; %put ERROR: Genetic Algorithm is not available in &ProcessName Interactive mode.; %let exiterror = 1; %end; %if %symexist(CVing) %then %do; %if &Mode = Interactive %then %do; %put ERROR: Cross validation is not available in &ProcessName Interactive mode.; %let exiterror = 1; %end; %end; %end; %if &ProcessName = DiscriminantAnalysis %then %do; %if &Metric = Regularized and &Mode = Automated %then %do; %put ERROR: Regularized metric is available only in &ProcessName Interactive mode.; %let exiterror = 1; %end; %end; %if (&ProcessName = PartitionTrees) and (&Priors = Equal) %then %do; %if (&Mode = Interactive) %then %do; %put ERROR: Equal priors are not available in Interactive Mode for Partition Trees.; %let exiterror = 1; %goto exit; %end; %if (&Server ^= Local) %then %do; %put ERROR: Equal priors are not available in client-server mode for Partition Trees.; %let exiterror = 1; %goto exit; %end; %end; %if &exiterror %then %goto exit; /* proc catalog cat=work.sasmacr; contents; run; */ %global ClassVars OutName; %if %symexist(prefix) and %length(&prefix) %then %let OutName = &prefix; %else %let OutName = &InData; %if ^%symexist(KMeans) %then %let KMeans = No; %if ^%symexist(StatTest) %then %let StatTest = No; %if ^%symexist(TraitType) %then %let TraitType = ; %if ^%symexist(NumDepAsClass) %then %let NumDepAsClass = Yes; %if ^%symexist(ClassVars) %then %let ClassVars = ; %if ^%symexist(ContinuousVars) %then %let ContinuousVars = ; %if ^%symexist(GenerateHTML) %then %let GenerateHTML = No; %if ^%symexist(TreeMethod) %then %let TreeMethod = ; %if ^%symexist(FR_ImpVars) %then %let FR_ImpVars = No; %if ^%symexist(FR_Inds) %then %let FR_Inds = No; %if ^%symexist(FR_NVars) %then %let FR_NVars = ; %if ^%symexist(FR_Impfit) %then %let FR_Impfit = No; %if ^%symexist(MaxDepth) %then %let MaxDepth = ; %if ^%symexist(LeafSize) %then %let LeafSize = ; %if ^%symexist(FR_Seed) %then %let FR_Seed = ; %if ^%symexist(MaxTrees) %then %let MaxTrees = ; %if ^%symexist(FR_NInputs) %then %let FR_NInputs = ; %if ^%symexist(LambdaMode) %then %let LambdaMode = ; %if ^%symexist(CustomPriors) %then %let CustomPriors = ; %if ^%symexist(CustomCosts) %then %let CustomCosts = ; %if ^%symexist(cv_custompriors) %then %let cv_custompriors = ; %if ^%symexist(cv_customcosts) %then %let cv_customcosts = ; %if ^%symexist(SuccessCutoff) %then %let SuccessCutoff = 0.5; %if ^%symexist(YReflection) %then %let YReflection = No; * override custom priors and costs if they are specified elsewhere (e.g. CVMC); %if %length(&cv_custompriors) %then %let CustomPriors = &cv_custompriors; %if %length(&cv_customcosts) %then %let CustomCosts = &cv_customcosts; ods html close; * options for cross-validation; %if %symexist(CVing) %then %do; %let indat = &CVTrainInData; %if ^%length(&indat) %then %let indat = InLib.&InData; %let intestdat = &CVTestInData; %let outdat = CV_&&mdlname&WorkFlowrp.._preds; %let outdatvars = CV_&&mdlname&WorkFlowrp.._vars; %if &ProcessName = SurvivalPredictiveModeling %then %do; %let outdataspm1 = CV_&&mdlname&WorkFlowrp.._spm1; %let outdataspm2 = CV_&&mdlname&WorkFlowrp.._spm2; %let outdataspm3 = CV_&&mdlname&WorkFlowrp.._spm3; %end; options nonotes; %end; %else %do; %let indat = InLib.&InData; %let intestdat = d; %let outdat = &&&_OutData; %let outdatvars = &&&_OutDataVars; %include "&MacroPath./CrossValidationModelComparisonMacros.sas" / nosource; %include "&MacroPath./PartitionTreesMacros.sas" / nosource; %include "&MacroPath./BinaryResponseEffectSelectionMacros.sas" / nosource; %end; %put indat=&indat; %put intestdat=&intestdat; %put outdat=&outdat; %put outdatvars=&outdatvars; * Handling censor variable for survival data; %if &ProcessName = SurvivalPredictiveModeling %then %do; %let trait_type = survival; * parameter for CVMC; %let TraitType = Continuous; %if ^%length(&TestInData) & ^%symexist(CVing) %then %let intestdat = ; * process censoring variable; %if %length(&CensorVar) %then %do; data _null_; set InLib.&InData; call symputx("cvtype",vtype(&CensorVar)); stop; run; %put cvtype = &cvtype; %if ^%length(&CensorValues) %then %do; %if &cvtype=C %then %let CensorValues = "1"; %else %let CensorValues = 1; %end; %else %if &cvtype=C %then %do; %let char1 = %bquote(%substr(&CensorValues,1,1)); %put char1 = &char1; %if %bquote(&char1) ^= %str(%") %then %do; %put ERROR: Values for a character Censor Variable must be enclosed in double quotes.; %let exiterror = 1; %goto exit; %end; %end; * check to make sure at least one censoring event is found; %let foundone = 0; data _null_; set InLib.&InData; if &CensorVar in(%unquote(&CensorValues)) then do; call symputx('foundone',1); stop; end; run; %if ^&foundone %then %do; %put ERROR: The censoring variable &CensorVar has no values matching &CensorValues..; %let exiterror = 1; %goto exit; %end; * check for character type, create numeric if necessary; %let CensorVar_old = &CensorVar; %let CensorValues_old = &CensorValues; %if &cvtype=C %then %do; data d; set &indat; _train_ = 1; if &CensorVar in(%unquote(&CensorValues)) then _Censor1 = 1; else _Censor1 = 0; %if ^%length(&IDVar) %then %do; _RowNum_ = _n_; %end; run; %put NOTE: Numeric variable _Censor1 has been created to replace character variable &CensorVar..; %let indat = d; %let CensorVar = _Censor1; %let CensorValues = 1; %end; %else %do; data d; set &indat; _train_ = 1; %if ^%length(&IDVar) %then %do; _RowNum_ = _n_; %end; run; %end; %end; %else %do; data d; set &indat; _train_ = 1; %if ^%length(&IDVar) %then %do; _RowNum_ = _n_; %end; run; %end; %if ^%length(&IDVar) %then %do; %let IDVar = _RowNum_; %end; data _C_; set InLib.&InData(keep=&DepVar &WeightVar); if (&DepVar ne .) %if %length(&WeightVar) %then & (&WeightVar ne .);; run; * check if the reference time is within the range of time to event variable; %if %length(&RefTime) %then %do; data _null_; set _C_ end=_e_; retain _min_ _max_; * check range of time variable; if _n_ = 1 then do; _min_ = &DepVar; _Max_ = _min_; end; else do; if &DepVar < _min_ then _min_ = &DepVar; else if &DepVar > _max_ then _max_ = &DepVar; end; if _e_ then do; if (&RefTime < _min_) | (&RefTime > _max_) then do; put "ERROR: The specified Reference Time for Comparing Survival Curves, &RefTime, is not within" " the range of Time to Event Variable, &DepVar.."; put "ERROR: Please specify a reference time within " _min_ "and " _max_ +(-1)"."; call symput('exiterror',1); end; end; run; %end; %if &exiterror %then %goto exit; %nObsVars(_C_); %let nAllObs_sTrain = &nObs; %if &nOBS < 4 %then %do; %let exiterror = 1; %if &nObs = 1 %then %do; %let _v = is; %let _n = observation; %end; %else %do; %let _v = are; %let _n = observations; %end; %put ERROR: There &_v only &nObs valid &_n in the input data set. Please check the input data set and provide more valid data.; %goto exit; %end; %end; %else %do; data d; set &indat; _train_ = 1; run; %end; %let indat = d; %put indat = &indat; %if %index(%quote(&ST_Method),Shrunken) %then %do; %include "&MacroPath./MixedModelMacros.sas" / nosource; %end; libname OutLib "&OutPath"; %if %symexist(NewPredictorVars) %then %do; %let PredictorVars = &NewPredictorVars; %end; %if %symexist(NewClassPredVars) %then %do; %let ClassPredVars = &NewClassPredVars; %end; %if %symexist(NewListContPredVars) %then %do; %let ListContPredVars = &NewListContPredVars; %end; %if %symexist(NewListClassPredVars) %then %do; %let ListClassPredVars = &NewListClassPredVars; %end; %if ^%symexist(LockContPredVars) %then %let LockContPredVars = ; %if ^%symexist(LockClassPredVars) %then %let LockClassPredVars = ; %if ^%symexist(ListLockContPredVars) %then %let ListLockContPredVars = ; %if ^%symexist(ListLockClassPredVars) %then %let ListLockClassPredVars = ; %if (%length(&LockContPredVars) or %length(&LockClassPredVars) or %length(&ListLockContPredVars) or %length(&ListLockClassPredVars)) and %index(&VarSelect,Genetic) %then %do; %put ERROR: Genetic Algorithm is not available with lock-in predictor variables.; %let exiterror = 1; %goto exit; %end; %if %symexist(DropPredVars) %then %do; %if %length(%trim(&DropPredVars)) %then %do; %let i = 1; %do %while(%length(%scan(&DropPredVars,&i))); %let dpv = %scan(&DropPredVars,&i); %if %index(&PredictorVars,&dpv) %then %let PredictorVars = %RemoveWord(&PredictorVars,&dpv); %if %index(&ClassPredVars,&dpv) %then %let ClassPredVars = %RemoveWord(&ClassPredVars,&dpv); %if &exiterror %then %goto exit; %let i = %eval(&i+1); %end; data &indat; set &indat; drop &DropPredVars; run; %end; %end; /* %if ^%length(%trim(&PredictorVars &ListContPredVars &ClassPredVars &ListClassPredVars)) %then %do; %put ERROR: Predictor variables must be specified.; %let exiterror = 1; %goto exit; %end; */ data ind1; set &indat; if _n_ > 1 then stop; keep &PredictorVars &ListContPredVars &ClassPredVars &ListClassPredVars &LockContPredVars &LockClassPredVars &ListLockContPredVars &ListLockClassPredVars ; run; * check dependent variable; %put DepVar = &DepVar; %let _tid_ = %sysfunc(open(ind1)); %let _dnum_ = %sysfunc(varnum(&_tid_,&DepVar)); %let _tid_ = %sysfunc(close(&_tid_)); %if &_dnum_ %then %do; %put Dropping &DepVar from predictor variables; data ind1; set ind1; drop &DepVar; run; %if %index(&PredictorVars,&DepVar) %then %let PredictorVars = %RemoveWord(&PredictorVars,&DepVar); %if %index(&ClassPredVars,&DepVar) %then %let ClassPredVars = %RemoveWord(&ClassPredVars,&DepVar); %if %index(&LockContPredVars,&DepVar) %then %let LockContPredVars = %RemoveWord(&LockContPredVars,&DepVar); %if %index(&LockClassPredVars,&DepVar) %then %let LockClassPredVars = %RemoveWord(&LockClassPredVars,&DepVar); /* %let exiterror = 1; %put ERROR: The dependent variable &DepVar must not be included as a predictor variable.; %goto exit; */ %end; %global _dtype_; %let _tid_ = %sysfunc(open(&indat)); %let _dnum_ = %sysfunc(varnum(&_tid_,&DepVar)); %let _dtype_ = %sysfunc(vartype(&_tid_,&_dnum_)); %if %symexist(ColorVar) %then %do; %if %length(&ColorVar) %then %do; %let _cnum_ = %sysfunc(varnum(&_tid_,&ColorVar)); %let _ctype_ = %sysfunc(vartype(&_tid_,&_cnum_)); %end; %end; %let _tid_ = %sysfunc(close(&_tid_)); %put _dtype_ = &_dtype_; %if (&TraitType = Continuous) and (&_dtype_ = C) %then %do; %put ERROR: Continuous variable type is incompatible with character dependent variable &DepVar..; %put You may want to create a numeric version of &DepVar in the input data set.; %let exiterror = 1; %goto exit; %end; * assign default trait type; %if ^%length(&TraitType) %then %do; %if &_dtype_ = C %then %let TraitType = Binary; %else %do; %CheckBinary(&indat,&DepVar); %let nv = &nvalue; %put nvalue = &nv; %if &nv <= 1 %then %do; %if &nv = 0 %then %put ERROR: All values of &DepVar are missing. Model cannot be fit.; %else %put ERROR: Only one nonmissing value of &DepVar detected. Model cannot be fit.; %let exiterror = 1; %goto exit; %end; * numeric variables with two levels are considered binary by default; %if &nv = 2 %then %let TraitType = Binary; %else %if (&ProcessName = DiscriminantAnalysis) or (&ProcessName = KNearestNeighbors) or (&ProcessName = LogisticRegression) %then %let TraitType = Nominal; %else %let TraitType = Continuous; %end; %end; %put TraitType = &TraitType; %if (&TraitType=Continuous) or (&TraitType=Count) %then %do; %let NumDepAsClass = No; %end; %else %do; %let NumDepAsClass = Yes; %let ClassVars = &DepVar; %end; /* Options for Proc Glimmix, wait until SAS 9.2 %if (&ProcessName = RadialBasisMachine) %then %do; %if &TraitType=Binary %then %do; %if %length(&EventTrait)>0 %then %let DepOption = %str((event="&EventTrait")); %else %let DepOption = %str((event=LAST)); %end; %else %if &TraitType=Nominal %then %do; %if %length(&RefTrait)>0 %then %let DepOption = %str((ref="&RefTrait")); %else %let DepOption = %str((ref=FIRST)); %end; %else %let DepOption = ; %put DepOption = &DepOption; %let ModOption = ; %if &TraitType=Binary %then %let ModOption=%str(/ link=logit dist=binary); %else %if &TraitType=Nominal %then %let ModOption=%str(/ link=glogit dist=mult); %else %if &TraitType=Ordinal %then %let ModOption=%str(/ link=clogit dist=mult); %else %if &TraitType=Count %then %let ModOption=%str(/ link=log dist=p); %if ^%length(&ConvOption) %then %let ConvOption = %str(maxiter=50 absgconv=0.001); %put ModOption = &ModOption; %put ConvOption = &ConvOption; %if (&TraitType=Binary) or (&TraitType=Nominal) or (&TraitType=Ordinal) %then %let GlimmixClassVars = &DepVar; %else %let GlimmixClassVars = ; %end; */ * valid data set; %let ValidPath = ; %if ^%symexist(ValidInData) or %symexist(CVing) %then %do; %let ValidPath = ; %let ValidInData = ; %end; %if %length(&ValidInData) %then %do; %PathName(&ValidInData); %let ValidPath=&TmpPath; %let ValidInData = %trim(&TmpName); %end; * test data set; %let TestPath = ; %if ^%symexist(TestInData) or %symexist(CVing) %then %do; %let TestPath = ; %let TestInData = ; %end; %if %length(&TestInData) %then %do; %PathName(&TestInData); libname TInLib "&TmpPath"; %let TestPath=&TmpPath; %let TestInData = %trim(&TmpName); %let TestSpec = Yes; data d_test; set TInLib.&TestInData end=_e_; _Obs = _n_; if _e_ then call symput('nAllObs_sTest',_n_); run; %let intestdat = d_test; * check if the first 5 predictors in Input Dataset exist in the Test Dataset; %let _iid_ = %sysfunc(open(ind1)); %let _tVars_ = %sysfunc(attrn(&_iid_,NVARS)); %if &_tVars_ > 5 %then %let _tVars_ = 5; %checkvarexist( %do i = 1 %to &_tVars_; %sysfunc(VARNAME(&_iid_,&i)) %end; %let _iid_ = %sysfunc(close(&_iid_)); , TInLib.&TestInData, The predictor %if %symexist(Covariates) %then (or covariate);, _ShowError=1, _DsLabel= Test Data Set ); %if &exiterror %then %goto exit; * check if test has the dependent variable; %let _tid_ = %sysfunc(open(d_test)); %let _dnum_ = %sysfunc(varnum(&_tid_,&DepVar)); * check if the existance of &CensorVar for test data; %if %symexist(CensorVar) %then %do; %if %length(&CensorVar) %then %do; %let _cnum_ = %sysfunc(varnum(&_tid_,&CensorVar)); %if ^&_cnum_ %then %do; %let _tid_ = %sysfunc(close(&_tid_)); %put ERROR: Censor Variable, &CensorVar, is not in the Test Data Set. Please check the specified Test Data Set.; %let exiterror = 1; %goto exit; %end; %end; %end; %let _tid_ = %sysfunc(close(&_tid_)); %put _dnum_ = &_dnum_; %put DepVar = &DepVar; * if so, create a truth variable and set dependent variable to have missing values; %if &_dnum_ %then %do; %if %length(&DepVar) > 19 %then %let dv = %substr(&DepVar,1,19); %else %let dv = &DepVar; %let TruthVar = &dv._true; %put TruthVar = &TruthVar; data d_test; set d_test; &TruthVar = &DepVar; %if &_dtype_ = C %then %do; &DepVar = " "; %end; %else %do; &DepVar = .; %end; _train_ = 0; %if %symexist(CensorVar) %then %do; * for Survival PM; %if %length(&CensorVar) %then %do; %if &cvtype=C %then %do; if &CensorVar in (%unquote(&CensorValues)) then _Censor1 = 1; else _Censor1 = 0; %let CensorVar = _Censor1; %end; %end; %if &IDVar=_RowNum_ %then %do; _RowNum_ = _n_; %end; %end; run; data &indat; set &indat; &TruthVar = &DepVar; run; %end; %else %do; %let TruthVar = &DepVar; * Check &CensorVar for SMP; %if %symexist(CensorVar) %then %do; %if %length(&CensorVar) & &cvtype=C %then %do; data d_test; set d_test; if &CensorVar in (%unquote(&CensorValues)) then _Censor1 = 1; else _Censor1 = 0; run; %let CensorVar = _Censor1; %end; %end; %end; %end; %else %do; %let TestSpec = No; %let TruthVar = &DepVar; %end; * check list-style variables; %if %length(&ListContPredVars) | %length(&ListClassPredVars) %then %do; %let ListVars = &ListContPredVars &ListClassPredVars; %let i = 1; %do %while(%length(%scan(&ListVars,&i))); %CheckListVarExist(%scan(&ListVars,&i),ind1); %if &exiterror %then %goto exit; %let i = %eval(&i+1); %end; %end; %nObsVars(ind1); %if ^%symexist(NumVar) %then %let NumVar = &nVars; %else %if ^%length(&NumVar) %then %let NumVar = &nVars; %if %index(&VarSelect,Genetic) and (&NumVar > &nVars) %then %do; %let NumVar = &nVars; %put NOTE: Number of Predictor Variables to Select is reduced to &NumVar, the total number available.; %end; %put NumVar = &NumVar; %if &syserr > 4 %then %goto exit; * check variable types; %if %length(&PredictorVars &ListContPredVars)>0 %then %do; %let BadList = ; %CheckVarType(ind1,&PredictorVars &ListContPredVars,1,BadList); %if %length(&BadList) %then %do; %let exiterror = 1; %put ERROR: The following variables are of type character and misspecified as Predictor Continuous Variables: &BadList..; %goto exit; %end; %end; %if (&ProcessName = DiscriminantAnalysis) or (&ProcessName = KNearestNeighbors) %then %do; %if (&Priors = Custom) %then %do; %if ^%length(&CustomPriors) %then %do; %put ERROR: You must specify custom prior probabilities when specifying Priors = Custom.; %let exiterror = 1; %goto exit; %end; %let Priors = %unquote(&CustomPriors); %put Priors = &Priors; %end; %end; * combine listvars; %let PredictorVars = &PredictorVars &ListContPredVars; %let ClassPredVars = &ClassPredVars &ListClassPredVars; %let LockContPredVars = &LockContPredVars &ListLockContPredVars; %let LockClassPredVars = &LockClassPredVars &ListLockClassPredVars; * set up for ClassVars.sas; %global ListContColon ListClassColon; %if %index(&ListContPredVars,:) %then %let ListContColon = 1; %else %let ListContColon = 0; %if %index(&ListClassPredVars,:) %then %let ListClassColon = 1; %else %let ListClassColon = 0; %let NeedClassIndicators = No; %let RecoverClassPredVars = No; %if ^%symexist(PredWhere) %then %let PredWhere = ; %if ^%symexist(PredClassWhere) %then %let PredClassWhere = ; * APs that can handle class variables; %if (&ProcessName=LogisticRegression) or (&ProcessName=GLMSelect) or (&ProcessName=PartitionTrees) or (&ProcessName=SurvivalPredictiveModeling) %then %do; %if %length(&ClassPredVars) and ((&KMeans=Yes) or (&StatTest=Yes) or %index(&VarSelect,Genetic) or %length(&PredClassWhere)) %then %do; %let NeedClassIndicators = Yes; %if ^%index(&VarSelect,Genetic) %then %do; %let RecoverClassPredVars = Yes; %end; %end; %end; %else %do; %if %length(&ClassPredVars) or %length(&LockClassPredVars) %then %do; %let NeedClassIndicators = Yes; %end; %end; %put RecoverClassPredVars = &RecoverClassPredVars; * save original class variables for later recovery; %if (&RecoverClassPredVars = Yes) %then %do; data orig_d_class; set &indat &intestdat; keep &ClassPredVars; run; %end; * if cross-validating, drop obs with missing depvar; %if %symexist(CVing) %then %do; data &indat; set &indat; if missing(&DepVar) then delete; run; data dtest; set &intestdat; if missing(&DepVar) then delete; _train_ = 0; _Obs = _n_; %if &ProcessName = SurvivalPredictiveModeling %then %do; %if &cvtype=C %then %do; if &CensorVar_old in (%unquote(&CensorValues_old)) then _Censor1 = 1; else _Censor1 = 0; %end; %if &IDVar=_RowNum_ %then %do; _RowNum_ = _n_; %end; %end; run; %let intestdat = dtest; %put intestdat = &intestdat; %end; * for GLM, PLS, or RBM with binary dependent variable, use the second response indicator variable by default; %if ((&ProcessName = GLMSelect) or (&ProcessName = PartialLeastSquares) or (&ProcessName = RadialBasisMachine)) and ((&_dtype_ = C) or (&NumDepAsClass = Yes)) %then %let Level2 = Yes; %else %let Level2 = No; %if ^%symexist(KM_Usage) %then %let KM_Usage = Representative; * if test set has class variables that require levelization or if using the second response level or kmeans; * temporarily combine training and test so they can be levelized together; %if ((&NeedClassIndicators = Yes) and (%length(&LockClassPredVars) or %length(&ClassPredVars))) or (&Level2 = Yes) or %index(&KM_Usage,Means) %then %do; %put Temporarily combining training and test for levelization.; data d_test; set &intestdat; _train_ = 0; run; data &indat; set &indat d_test; _Obs = _n_; run; %let separate = 1; %end; %else %do; data &indat; set &indat; _Obs = _n_; run; %let separate = 0; %end; %if (&TestSpec = Yes) and (&NumDepAsClass = Yes) %then %do; %let ClassVars = &ClassVars &TruthVar; %end; * levelize class variables, create macro lists and indicator variables; * replace ClassPredVars accordingly; /* %if %index(&KM_Usage,Means) %then %do; %if %length(&TestInData) %then %let _testDs_ = d_test; %if %symexist(CVing) %then %let _testDs_ = d2; %let _tmp_cVars = &ClassPredVars; proc datasets library=Work nolist; change d = _tmp_; change &_testDs_ = d; quit; %include "&MacroPath./ClassVars.sas"; %if &syserr %then %goto exit; proc datasets library=Work nolist; change d = &_testDs_; change _tmp_ = d; quit; %let ClassPredVars = &_tmp_cVars; %end; */ %put Beginning class levelization; %include "&MacroPath./ClassVars.sas"; %if &syserr %then %goto exit; %put After class levelization:; %put nixl = &nixl; %put nipl = &nipl; %put nix = &nix; %put nip = &nip; /* %if (&nix = 0) and (&nip = 0) and (&nixl = 0) and (&nipl = 0) %then %do; %put ERROR: At least one predictor variable must be specified.; %let exiterror = 1; %goto exit; %end; */ %let upred = 0; %let uclass = 0; * remove depvar if it is in the pred var lists; %if &nix %then %do; %let _dnum_ = %sysfunc(varnum(&_dsix_,&DepVar)); %if &_dnum_ %then %do; %put Dropping &DepVar from predictor variables; %let _tid_ = %sysfunc(close(&_dsix_)); data dsix; set dsix; drop &DepVar; run; %let _dsix_ = %sysfunc(open(dsix)); %let nix = %sysfunc(attrn(&_dsix_,NVARS)); %put nix = &nix; %let upred = 1; %end; %end; %if &nip %then %do; %let _dnum_ = %sysfunc(varnum(&_dsip_,&DepVar)); %if &_dnum_ %then %do; %put Dropping &DepVar from class predictor variables; %let _tid_ = %sysfunc(close(&_dsip_)); data dsip; set dsip; drop &DepVar; run; %let _dsip_ = %sysfunc(open(dsip)); %let nip = %sysfunc(attrn(&_dsip_,NVARS)); %put nip = &nip; %let uclass = 1; %end; %end; %if &nixl %then %do; %let _dnum_ = %sysfunc(varnum(&_dsixl_,&DepVar)); %if &_dnum_ %then %do; %put Dropping &DepVar from lock-in predictor variables; %let _tid_ = %sysfunc(close(&_dsixl_)); data dsixl; set dsixl; drop &DepVar; run; %let _dsixl_ = %sysfunc(open(dsixl)); %let nixl = %sysfunc(attrn(&_dsixl_,NVARS)); %put nixl = &nixl; %end; %end; %if &nipl %then %do; %let _dnum_ = %sysfunc(varnum(&_dsipl_,&DepVar)); %if &_dnum_ %then %do; %put Dropping &DepVar from lock-in class predictor variables; %let _tid_ = %sysfunc(close(&_dsipl_)); data dsipl; set dsipl; drop &DepVar; run; %let _dsipl_ = %sysfunc(open(dsipl)); %let nipl = %sysfunc(attrn(&_dsipl_,NVARS)); %put nipl = &nipl; %end; %end; * remove lock-in vars from pred var lists if necessary; %if &nixl %then %do; %if &nix %then %do; %let dropv = ; %do i=1 %to &nixl; %let _dnum_ = %sysfunc(varnum(&_dsix_,%sysfunc(varname(&_dsixl_,&i)))); %if &_dnum_ %then %do; %let dropv = &dropv %sysfunc(varname(&_dsixl_,&i)); %end; %end; %if %length(&dropv) %then %do; %put Dropping &dropv from continuous predictor variables because of lock-in; %let _tid_ = %sysfunc(close(&_dsix_)); data dsix; set dsix; drop &dropv; run; %let _dsix_ = %sysfunc(open(dsix)); %let nix = %sysfunc(attrn(&_dsix_,NVARS)); %put nix = &nix; %let upred = 1; %end; %end; %if &nip %then %do; %let dropv = ; %do i=1 %to &nixl; %let _dnum_ = %sysfunc(varnum(&_dsip_,%sysfunc(varname(&_dsixl_,&i)))); %if &_dnum_ %then %do; %let dropv = &dropv %sysfunc(varname(&_dsixl_,&i)); %end; %end; %if %length(&dropv) %then %do; %put Dropping &dropv from class predictor variables because of lock-in; %let _tid_ = %sysfunc(close(&_dsip_)); data dsip; set dsip; drop &dropv; run; %let _dsip_ = %sysfunc(open(dsip)); %let nip = %sysfunc(attrn(&_dsip_,NVARS)); %put nip = &nip; %let uclass = 1; %end; %end; %end; %if &nipl %then %do; %if &nix %then %do; %let dropv = ; %do i=1 %to &nipl; %let _dnum_ = %sysfunc(varnum(&_dsix_,%sysfunc(varname(&_dsipl_,&i)))); %if &_dnum_ %then %do; %let dropv = &dropv %sysfunc(varname(&_dsipl_,&i)); %end; %end; %if %length(&dropv) %then %do; %put Dropping &dropv from continuous predictor variables because of lock-in; %let _tid_ = %sysfunc(close(&_dsix_)); data dsix; set dsix; drop &dropv; run; %let _dsix_ = %sysfunc(open(dsix)); %let nix = %sysfunc(attrn(&_dsix_,NVARS)); %put nix = &nix; %let upred = 1; %end; %end; %if &nip %then %do; %let dropv = ; %do i=1 %to &nipl; %let _dnum_ = %sysfunc(varnum(&_dsip_,%sysfunc(varname(&_dsipl_,&i)))); %if &_dnum_ %then %do; %let dropv = &dropv %sysfunc(varname(&_dsipl_,&i)); %end; %end; %if %length(&dropv) %then %do; %put Dropping &dropv from class predictor variables because of lock-in; %let _tid_ = %sysfunc(close(&_dsip_)); data dsip; set dsip; drop &dropv; run; %let _dsip_ = %sysfunc(open(dsip)); %let nip = %sysfunc(attrn(&_dsip_,NVARS)); %put nip = &nip; %let uclass = 1; %end; %end; %end; * update full predictor lists if necessary; %if &upred %then %do; %let PredictorVars = ; %do i=1 %to &nix; %let PredictorVars = &PredictorVars %sysfunc(varname(&_dsix_,&i)); %end; %let PredictorVars = &PredictorVars &ListContPredVars; %end; %if &uclass %then %do; %let ClassPredVars = ; %do i=1 %to &nip; %let ClassPredVars = &ClassPredVars %sysfunc(varname(&_dsip_,&i)); %end; %let ClassPredVars = &ClassPredVars &ListClassPredVars; %end; * additional initialization; %global nlev BPV nclasses event; %let nlev = 0; %let nclasses = ; %let BPV = 0; %let event = ; %if ^%symexist(Priors) %then %let Priors = Proportional; * determine number of classes and define prior weights if necessary; %if (&_dtype_=C) or (&NumDepAsClass=Yes) %then %do; proc freq data=d noprint; tables &DepVar / out=f; data f; set f; if missing(&DepVar) then delete; run; proc sort data=f; by &DepVar; run; /* title "f"; proc print data=f; run; */ %NObsVars(f); %let nclasses = &nobs; %do i=1 %to &nclasses; %global class&i; %end; data _null_; set f; call symputx("class"||trim(left(_n_)),trim(left(&DepVar))); call symputx("pct"||trim(left(_n_)),Percent); run; /* proc sort data=f; by percent; data _null_; set f nobs=nobs end=the_end; if _n_ = 2 then call symputx('event', trim(left(&TruthVar))); run; */ %put nclasses = &nclasses; %do i=1 %to &nclasses; %global class&i; %put class&i = &&class&i pct&i = &&pct&i; %end; %if (&nclasses > 1) %then %do; %let event = &class2; %end; %else %do; %let event = &class1; %end; %put event = &event; * for these APs, use reciprocal frequency weights to undo implicit proportional priors; %if (&Priors ^= Proportional) and (&ProcessName ^= DiscriminantAnalysis) and (&ProcessName ^= KNearestNeighbors) and (&ProcessName ^= DistanceScoring) %then %do; data d; set d; %do i=1 %to &nclasses; %if (&_dtype_=C) %then %do; if &TruthVar = "&&class&i" then _Prior = 100/&&pct&i; %end; %else %do; if &TruthVar = &&class&i then _Prior = 100/&&pct&i; %end; %end; run; %end; * create macro variables containing P_ names for posterior probs; * truncate depvar name if it is too long; %let maxclen = 0; %do c=1 %to &nclasses; %let clen = %length(&&class&c); %if &clen > &maxclen %then %do; %let maxclen = &clen; %end; %end; %let maxdlen = %eval(32 - &maxclen - 3); %put maxclen = &maxclen; %put maxdlen = &maxdlen; %if maxdlen < 1 %then %do; %let &DepVarTrunc = ; %end; %else %do; %if %length(&DepVar) > &maxdlen %then %do; %let DepVarTrunc = %substr(&DepVar,1,&maxdlen); %end; %else %do; %let DepVarTrunc = &DepVar; %end; %end; * verify class values are valid for sas names, if not, use values 1, 2, 3...; data vn; length vname $ 32; one = 1; %do c=1 %to &nclasses; vname = trim(left(("p_&DepVarTrunc._&&class&c"))); lenv = length(vname); _check1 = verify(upcase(vname),"_ABCDEFGHIJKLMNOPQRSTUVWXYZ"); _check2 = verify(upcase(vname),"_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"); if (_check1 = 1) or ((_check2 > 0) and (_check2 <= lenv)) then do; vname = "p_&DepVarTrunc._&c"; end; output; %end; run; /* ods exclude none; ods results; title "vname"; proc print data=vn; run; */ proc transpose data=vn out=vnt; var one; id vname; run; proc transpose data=vnt out=vntt; run; %do c=1 %to &nclasses; %global p_prob&c; %end; data _null_; set vntt; call symputx("p_prob"||trim(left(_n_)),_name_); run; %do c=1 %to &nclasses; %put p_prob&c = &&p_prob&c; %end; %end; %else %do; %let Priors = Proportional; %end; %if %length(&DepVar) > 30 %then %do; %let DepVarTrunc30 = %substr(&DepVar,1,30); %end; %else %do; %let DepVarTrunc30 = &DepVar; %end; * separate training and test that were combined above; %if &separate %then %do; %put Separating training and test combined above.; data d_test; set d; where _train_ = 0; run; %let intestdat = d_test; data d; set d; where _train_ = 1; run; %end; %let DepVar0 = &DepVar; %let _dtype0_ = &_dtype_; %let TruthVar0 = &TruthVar; * for GLM, PLS, or RBM with binary dependent variable, use the second indicator variable by default; %if (&Level2 = Yes) %then %do; %if (&nic1 = 1) %then %do; %put ERROR: Dependent variable has only one level.; %let exiterror = 1; %goto exit; %end; %else %if (&nic1 > 2) %then %do; %put ERROR: &nic1 levels detected for &DepVar. Only 2 levels can be modeled.; %put If &DepVar is numeric, make sure missing values are coded with a period.; %put If &DepVar is character, make sure missing values are coded with a blank.; %let exiterror = 1; %goto exit; %end; %if %length(&EventTrait) %then %do; %let eidx = 0; %let tidx = 0; data _null_; set ocp1; %if &_dtype_ = C %then %do; if &DepVar = "&EventTrait" then %end; %else %do; if &DepVar = %unquote(&EventTrait) then %end; call symput('eidx',trim(left(_n_))); run; %if ^&eidx %then %do; %put ERROR: Event trait level &EventTrait not found for &DepVar. Please specify a different value or leave it blank to use the second ordered level.; %let exiterror = 1; %goto exit; %end; %end; %else %do; %let eidx = 2; %end; %let EventTrait = &&_lev&eidx; %let event = &EventTrait; %let neidx = %eval(3-&eidx); %let NonEventTrait = &&_lev&neidx; %put eidx = &eidx; %put EventTrait = &EventTrait; %put NonEventTrait = &NonEventTrait; * make sure levels are valid for sas variable names; %if %sysfunc(verify(%upcase(&EventTrait),"_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")) %then %do; %let ETV = 2; %end; %else %do; %let ETV = &EventTrait; %end; %if %sysfunc(verify(%upcase(&NonEventTrait),"_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")) %then %do; %let NTV = 1; %end; %else %do; %let NTV = &NonEventTrait; %end; %put ETV = &ETV; %put NTV = &NTV; %let DepVar = &&_c&eidx; * make sure chosen variable is not the missing value indicator; %let ld = %length(&DepVar); %if &ld > 4 %then %do; %let dvlast4 = %substr(&DepVar,%eval(&ld-3),4); %if &dvlast4 = _Mis %then %do; %let eidx1 = %eval(&eidx+1); %put eidx1 = &eidx1; %let DepVar = &&_c&eidx1; %end; %end; %put New DepVar = &DepVar; * redefine DepVarTrunc and DepVarTrunc30; %if %length(&DepVar) > &maxdlen %then %do; %let DepVarTrunc = %substr(&DepVar,1,&maxdlen); %end; %else %do; %let DepVarTrunc = &DepVar; %end; %if %length(&DepVar) > 30 %then %do; %let DepVarTrunc30 = %substr(&DepVar,1,30); %end; %else %do; %let DepVarTrunc30 = &DepVar; %end; %let ClassVar = &DepVar; %let _dtype_ = N; %let DepSwitch = Yes; %if (&TruthVar0 = &DepVar0) %then %do; %let TruthVar = &DepVar; %end; %else %do; %let len0 = %length(&DepVar0); %let len1 = %eval(%length(&DepVar) - %length(&DepVar0)); %let TruthVar = &TruthVar0.%substr(&DepVar,%eval(&len0+1),&len1); %end; %put New TruthVar = &TruthVar; * set new DepVar to missing wherever original DepVar is missing; data d; set d; if missing(&DepVar0) then &DepVar = .; run; %let TraitType = Binary; %end; %else %do; %let DepSwitch = No; %end; * Where Clause filter on continuous and class predictors; %if (%length(&PredWhere) and &nix) or (%length(&PredClassWhere) and &nip) %then %do; %let allstats = CSS CV IQR KURTOSIS MAD MAX MEAN MEDIAN MIN NMISS SKEWNESS STD STDERR SUM USS VAR; * build a data set with variable names to keep; %if &nix %then %do; proc transpose data=d out=pw; var %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; ; where not missing(&DepVar); run; %if %length(&PredWhere) %then %do; * check for stat specification; %let nstat = 0; %let upPredWhere = %upcase(&PredWhere); %let i=1; %do %while(%length(%scan(&allstats,&i,' '))); %let stat = %scan(&allstats,&i,' '); %if %index(&upPredWhere,&stat) %then %do; %let nstat = %eval(&nstat + 1); %let stat&nstat = &stat; %end; %let i=%eval(&i+1); %end; %if (&nstat > 0) %then %do; data pw; set pw; %do i=1 %to &nstat; &&stat&i = &&stat&i.(of col:); %end; run; %end; %if &syserr > 4 %then %goto exit; data pw; set pw; where %unquote(&PredWhere); run; %end; %if &syserr > 4 %then %goto exit; %end; * filter class predictor variables; %if &nip %then %do; proc transpose data=d out=pwc; var %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ; where not missing(&DepVar); run; %if %length(&PredClassWhere) %then %do; * check for stat specification; %let nstat = 0; %let upPredWhere = %upcase(&PredClassWhere); %let i=1; %do %while(%length(%scan(&allstats,&i,' '))); %let stat = %scan(&allstats,&i,' '); %if %index(&upPredWhere,&stat) %then %do; %let nstat = %eval(&nstat + 1); %let stat&nstat = &stat; %end; %let i=%eval(&i+1); %end; %if (&nstat > 0) %then %do; data pwc; set pwc; %do i=1 %to &nstat; &&stat&i = &&stat&i.(of col:); %end; run; %end; %if &syserr > 4 %then %goto exit; data pwc; set pwc; where %unquote(&PredClassWhere); run; %end; %if &syserr > 4 %then %goto exit; %if &nix %then %do; data pw; length _name_ $32; set pw pwc; run; %end; %else %do; proc datasets library=work; change pwc=pw; run; %end; %end; * remove filtered predictors from the data set; %ReduceData(d,pw,&intestdat,Where Clause,No); %end; %if &syserr > 4 %then %goto exit; * Standardization for continuous predictors; %if ^%symexist(StandardizationMethod) %then %let StandardizationMethod = ; %if %length(&StandardizationMethod) and (&nixl or &nix) %then %do; * substitute numerical argument; %if ^%length(&NumARg) %then %let NumArg = 0; %put Specified Standardization Method = &StandardizationMethod; %put NumArg = &NumArg; %let firstpart = %scan(&StandardizationMethod,1,'('); %put firstpart = &firstpart; %let lfp = %length(&firstpart); %if (%length(&StandardizationMethod) = &lfp) %then %let parenloc = 0; %else %let parenloc = 1; %put parenloc = &parenloc; %if (&NumArg > 0) and (&parenloc > 0) %then %do; %let StandardizationMethod = &firstpart.(&NumArg); %end; * standardize continuous predictors in training data then apply statistics to test data; proc stdize data=d out=d method=&StandardizationMethod outstat=tmpstat; var %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; ; run; %if %length(&intestdat) & &intestdat^=d %then %do; * replace any missing location estimators with 0; * and scale estimators with 1 to avoid errors in next call; data tmpstat; set tmpstat; array pv{*} %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; ; if (_TYPE_ = "LOCATION") or (_TYPE_ = "SCALE") then do; do i=1 to dim(pv); if (_TYPE_ = "LOCATION") then do; if missing(pv[i]) then pv[i] = 0; end; else if (_TYPE_ = "SCALE") then do; if missing(pv[i]) then pv[i] = 1; end; end; end; run; proc stdize data=&intestdat out=&intestdat method=in(tmpstat); var %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; ; run; %end; %end; ods exclude all; ods noresults; * KMeans Predictor Variable Reduction; %if (&KMeans = Yes) %then %do; %let _tid_ = %sysfunc(open(d)); %let _nVars_ = %sysfunc(attrn(&_tid_, NVARS)); %let _nObs_ = %sysfunc(attrn(&_tid_, NOBS)); %let _tid_ = %sysfunc(close(&_tid_)); %let _nCluster_ = &KM_Max; /* %let _nCluster_ = %eval(&KM_Multiple*&_nObs_); %if &_nCluster_ > &KM_Max %then %let _nCluster_ = &KM_Max; */ * call fastclus to compute the k-means; %if %eval(&nix + &nip) > &_nCluster_ or ((&RecoverClassPredVars = Yes) and %index(&KM_Usage,Representative)) %then %do; %if %index(&KM_Usage,Representative) %then %do; %let outseed = ; %end; %else %do; %let outseed = seed; %end; /* Fix for defect S0758603 */ data dsipx; merge %if &_dsix_ %then %do; dsix %end; %if &_dsip_ %then %do; dsip %end; ; run; %fastclus(datapath=, dsname=d, where_clause=%str(not missing(&DepVar)), quantitative_vars= /* %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; */ , qualitative_vars=, list_style_quantitative_vars=, list_style_qualitative_vars=, quantitative_vars_dsname=dsipx, maxclusters=&_nCluster_, metadata_variables_dsname=, seed_dsname=&outseed ); %if %length(&InData) > 28 %then %let inname = %substr(&InData,1,28); %else %let inname = &InData; data OutLib.&inname._kmc; set clus; drop col:; run; %if %index(&KM_Usage,Representative) %then %do; data postclus; set postclus; rename name=_name_; run; %if (&StatTest=Yes) %then %let recov = No; %else %let recov = &RecoverClassPredVars; %ReduceData(d,postclus,&intestdat,K-Means,&recov); %if &exiterror %then %goto exit; %end; %else %do; data OutLib.&inname._kms; set seed; drop col:; run; * replace predictors with kmeans; data km; set seed; keep cluster col:; run; proc transpose data=km out=kmt prefix=KMean; var col:; id cluster; run; data d; merge d kmt; drop %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ; run; * compute the same kmeans for the test data set; %if %length(&intestdat) and (&intestdat ^= d) %then %do; proc transpose data=&intestdat out=trans name=NAME; var %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ; run; proc sort data=trans; by NAME; run; proc sort data=OutLib.&inname._kmc out=tm; by NAME; run; data tm; merge tm trans; by NAME; run; proc sort data=tm; by cluster; run; %NObsVars(&intestdat); proc means data=tm noprint; by cluster; var col1-col&nobs; output out=km mean=col1-col&nobs; run; proc transpose data=km out=kmt prefix=KMean; var col1-col&nobs; id cluster; run; data &intestdat; merge &intestdat kmt; drop %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ; run; %end; %if (&nix > 0) %then %do; %let _rc_ = %sysfunc(close(&_dsix_)); %end; data dsix; set kmt; if (0); keep KMean:; run; %let _dsix_ = %sysfunc(open(dsix)); %NObsVars(seed); %let nix = &nobs; %if (&nip > 0) %then %do; %let _rc_ = %sysfunc(close(&_dsip_)); %let nip = 0; %end; %let PredictorVars = ; %let ClassPredVars = ; %let ListContPredVars = KMean:; %let ListClassPredVars = ; %end; %end; %else %do; %put K-means clustering is not performed because the total number of predictors (%eval(&nix + &nip)); %put is not greater than the specified number of clusters (&_nCluster_).; %end; %end; * Statistical Testing for Predictor Variable Reduction; %if (&StatTest = Yes) %then %do; %put Statistical Testing for Predictor Reduction started: &sysdate &systime; %if &ProcessName ne SurvivalPredictiveModeling %then %do; * if depvar is continuous, discretize it; %if (&_dtype_ = N) and (&NumDepAsClass = No) %then %do; %let ncl = 7; %if %length(&DepVar) > 30 %then %let UseDep = %substr(&DepVar,1,30)_&ncl; %else %let UseDep = &DepVar._&ncl; proc rank data=d out=d groups=&ncl; var &DepVar; ranks &UseDep; run; %do i=1 %to &ncl; %let lev&i = &i; %end; %end; %else %do; data dep; set d; keep &DepVar; where not missing(&DepVar); run; proc sort data=dep out=depn nodupkey; by &DepVar; run; data _null_; set depn end=last; call symput("lev"||trim(left(_n_)),trim(left(&DepVar))); if last then call symput("ncl",trim(left(_n_))); run; %if (%index(%quote(&ST_Method),Radial) or %index(%quote(&ST_Method),Quadratic)) and (&_dtype_ = C) and %length(&ClassVars) %then %do; %let UseDep = &_c1; %end; %else %do; %let UseDep = &DepVar; %end; %end; %end; %else %do; %let UseDep = &DepVar; * do not discretize survival data; %let ncl = 1; %end; %*put ncl = &ncl; %if &MultipleTestingMethod = None %then %let MultipleTestingMethod = ; %if (%eval(&nix + &nip) > &ST_Max) or (&RecoverClassPredVars = Yes) %then %do; %include "&MacroPath./MultipleTestingAdjustment.sas" / nosource; * create names of adjusted p-values computed by PROC MULTTEST; %AdjPValueName(method=&MultipleTestingMethod); %let sterrmsg = did not complete successfully. If it ran out of memory, try reducing the number of predictors; %let sterrmsg = &sterrmsg with filtering or K-means, or use a different statistical testing method.; * tests for continuous predictors; %if (&nix) %then %do; * chunk the data for I/O performance; %if %index(%quote(&ST_Method),Shrunken) %then %let chunksize = 1000000; %else %let chunksize = 10000; %let startv = 1; %let endv = %eval(&startv + &chunksize - 1); %if (&endv > &nix) %then %let endv = &nix; %let nclm1 = %eval(&ncl-1); %let morevars = 1; data opd1; run; %do %while(&morevars=1); %put Computing statistical tests for predictor vars &startv to &endv out of &nix; data d1; set d; if missing(&UseDep) then delete; keep &UseDep %do i=&startv %to &endv; %sysfunc(varname(&_dsix_,&i)) %end; %if &ProcessName = SurvivalPredictiveModeling %then %do; &CensorVar &Covariates &WeightVar; %end; ; run; %if (%quote(&ST_Method) = %quote(T-Test)) %then %do; proc multtest data=d1 out=opd1; class &UseDep; test mean(%do i=&startv %to &endv; %sysfunc(varname(&_dsix_,&i)) %end;); %do i=2 %to &ncl; contrast "&&lev&i - &lev1" -1 %do j=1 %to %eval(&i-2); 0 %end; 1; %end; run; %if (&syserr > 4) %then %do; %put ERROR: PROC MULTTEST &sterrmsg; %goto exit; %end; * get number of nonmissing observations for computing adjusted statistics; %NObsVars(d1); data opd1; length _name_ $ 32; set opd1; _name_ = _var_; _value_ = (&ncl-1)*_value_/&nobs; _se_ = (&ncl-1)*_se_/&nobs; rename _value_=Difference _se_=StdErr; label _value_="Difference" _se_="Standard Error"; drop _var_ _test_; run; %end; %else %if %index(%quote(&ST_Method),Shrunken) %then %do; * set up to call OneWay macro; %global ExpInData AnnoInData GroupVars BlockVars InClassVars MultipleTestingMethod EBShrink LSMeansDiffSet LSMeanStdMethod; * create tall and ed data sets from wide data; proc transpose data=d1 out=tall; var %do i=&startv %to &endv; %sysfunc(varname(&_dsix_,&i)) %end; ; run; %if %length(&UseDep) > 24 %then %do; %let UseDep1 = %substr(&UseDep,1,24); %end; %else %do; %let UseDep1 = &UseDep; %end; data ed; length ColumnName $ 32; set d1; &UseDep1 = &UseDep; Array = _n_; ColumnName = "COL" || trim(left(_n_)); keep &UseDep1 Array ColumnName; run; * assign macro vars; %let InDataSave = &InData; %let ClassVarsSave = &ClassVars; ods exclude all; ods noresults; proc contents data=tall; ods output enginehost=outlib.eng; run; data _null_; set outlib.eng; if Label1="Filename" then call symputx("InData",cValue1); run; proc contents data=ed; ods output enginehost=eng; run; data _null_; set eng; if Label1="Filename" then call symputx("ExpInData",cValue1); run; %let suffix = ; %let AnnoInData = ; %let AnnoWhereStmt = ; %let AnnoOnly = ; %let GroupVars = &UseDep1; %let BlockVars = ; %let InClassVars = ; %let EBShrink = yes; %let LSMeansDiffSet = None; %let OutData = opd1; %let OneWayLaunchJSL = 0; * call OneWay macro; %ShowTime(OneWay started:); %OneWay; %ShowTime(OneWay finished:); %if (&syserr > 4) %then %do; %put ERROR: OneWay macro &sterrmsg; %goto exit; %end; %let InData = &InDataSave; %let ClassVars = &ClassVarsSave; data opd1; set outlib.opd1; rename PrF_T3_&UseDep1=raw_p; run; * sort by descending F statistic since very small p-values may be identical; proc sort data=opd1; by descending F_T3_&UseDep1; run; %end; %else %if %index(%quote(&ST_Method),Unequal) %then %do; data stack; length _name_ $ 32; set d1; %do i=&startv %to &endv; _name_ = "%sysfunc(varname(&_dsix_,&i))"; _y = %sysfunc(varname(&_dsix_,&i)); output; %end; keep _name_ &UseDep _y; run; proc sort data=stack; by _name_; run; %ShowTime(PROC MIXED started:); proc mixed data=stack; by _name_; class &UseDep; model _y = &UseDep; repeated / group=&UseDep; lsmeans &UseDep / diff; ods output diffs=opd1; run; %ShowTime(PROC MIXED finished:); %if (&syserr > 4) %then %do; %put ERROR: PROC MIXED &sterrmsg; %goto exit; %end; data opd1; set opd1; rename Estimate=Difference Probt=raw_p; run; %end; %else %if %index(%quote(&ST_Method),Radial) %then %do; /* %if &_dtype_ ^= N %then %do; %put ERROR: Radial Basis testing is not available for character dependent variables.; %let exiterror = 1; %goto exit; %end; */ * fit of null model for likelihood ratio test; proc glimmix data=d1; model &UseDep = ; ods output fitstatistics=opd0; run; data _null_; set opd0; if Descr = "-2 Res Log Likelihood" then call symput("m2rll0",trim(left(value))); run; %put m2rll0 = &m2rll0; data stack; length _name_ $ 32; set d1; %do i=&startv %to &endv; _name_ = "%sysfunc(varname(&_dsix_,&i))"; _y = %sysfunc(varname(&_dsix_,&i)); output; %end; keep _name_ &UseDep _y; run; proc sort data=stack; by _name_; run; * the glimmix call with a lot of by groups can be slow; %ShowTime(PROC GLIMMIX started:); proc glimmix data=stack; by _name_; model &UseDep = ; random _y / type=rsmooth knotmethod=data(stack); ods output fitstatistics=opd1; run; %ShowTime(PROC GLIMMIX finished:); %if (&syserr > 4) %then %do; %put ERROR: PROC GLIMMIX &sterrmsg; %goto exit; %end; * likelihood ratio test; data opd1; set opd1; if Descr = "-2 Res Log Likelihood"; Difference = &m2rll0 - Value; if (Difference <= 0) then raw_p = 1; * divide lrt by 2 because test is 1 df and on the boundary; else raw_p = (1 - probchi(Difference,1))/2; run; %end; %else %if %index(%quote(&ST_Method),Quadratic) %then %do;; data stack; length _name_ $ 32; set d1; %do i=&startv %to &endv; _name_ = "%sysfunc(varname(&_dsix_,&i))"; x = %sysfunc(varname(&_dsix_,&i)); x2 = x*x; output; %end; keep _name_ &UseDep x x2; run; proc sort data=stack; by _name_; run; %ShowTime(PROC REG started:); proc reg data=stack; by _name_; model &UseDep = x x2 ; ods output parameterestimates=opd1; run; %ShowTime(PROC REG finished:); %if (&syserr > 4) %then %do; %put ERROR: PROC REG &sterrmsg; %goto exit; %end; data opd1; set opd1; rename Estimate=Difference Probt=raw_p; run; %end; %else %if %index(%quote(&ST_Method),Rank) or %index(%quote(&ST_Method),Median) %then %do; %if %index(%quote(&ST_Method),Median) %then %do; %let npmethod = MEDIAN; %let npstat = _CHMED_; %let nppval = P_CHMED; %end; %else %do; %let npmethod = WILCOXON; %let npstat = _KW_; %let nppval = P_KW; %end; proc npar1way data=d1 &npmethod; class &UseDep; /* var %do i=&startv %to &endv; %sysfunc(varname(&_dsix_,&i)) %end; ; */ output out=opd1; run; %if (&syserr > 4) %then %do; %put ERROR: PROC NPAR1WAY &sterrmsg; %goto exit; %end; data opd1; length _name_ $ 32; set opd1; Difference = &npstat; raw_p = &nppval; _name_ = _var_; drop _var_; run; %end; %else %if %index(%quote(&ST_Method),Robust) %then %do; * PROC ROBUSTREG runs out of memory with a large number of BY groups in 9.1.3, so cannot handle large chunks; * This is fixed in 9.2; * PROC ROBUSTREG does not accept CLASS effects for S Estimation, so create indicator variables using PROC GLMMOD; data d1a; set d1; _one = 1; keep _one &UseDep; run; proc glmmod data=d1a prefix=diff outdesign=d1b; class &UseDep; model _one = &UseDep / noint; run; data d2; merge d1b d1; run; data stack; length _name_ $ 32; set d2; %do i=&startv %to &endv; _name_ = "%sysfunc(varname(&_dsix_,&i))"; _y = %sysfunc(varname(&_dsix_,&i)); output; %end; keep _name_ _y diff1-diff&nclm1; run; proc sort data=stack; by _name_; run; * call robustreg; %ShowTime(PROC ROBUSTREG started:); proc robustreg data=stack method=mm; by _name_; model _y = diff1-diff&nclm1 ; ods output parameterestimates=opd1; run; %ShowTime(PROC ROBUSTREG finished:); %if (&syserr > 4) %then %do; %put ERROR: PROC ROBUSTREG &sterrmsg; %goto exit; %end; * save only the indicator effects; data opd1; set opd1; where parameter in (%do i=1 %to &nclm1; "DIFF&i", %end; "dummystring"); rename Estimate=Difference ProbChiSq=raw_p; run; %end; %else %if %index(%quote(&ST_Method),Cox) %then %do; * added in v4.1 for survival data; data stack; length _name_ $ 32; set d1; %do i=&startv %to &endv; _name_ = "%sysfunc(varname(&_dsix_,&i))"; _x = %sysfunc(varname(&_dsix_,&i)); output; %end; keep _name_ &DepVar &CensorVar &Covariates &WeightVar _x %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; ; run; proc sort data=stack; by _name_; run; options nonotes; %ShowTime(PROC PHREG started:); proc phreg data=stack %if %length(&PhregOptions) %then &PhregOptions;; by _name_; %if %length(&WeightVar) %then %do; Weight &WeightVar; %end; %if &nipl %then %do; class %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; ; %end; %if %length(&CensorVar) %then %do; model &DepVar*&CensorVar(&CensorValues) = %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; _x %if %length(PhregModelOptions) %then / &PhregModelOptions; ; %end; %else %do; model &DepVar = %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; _x %if %length(PhregModelOptions) %then / &PhregModelOptions; ; %end; ods output parameterestimates=opd1; run; %ShowTime(PROC PHREG finished:); %if (&syserr > 4) %then %do; %put ERROR: PROC PHREG &sterrmsg; %goto exit; %end; data opd1; set opd1; if Parameter = "_x"; rename Estimate=Difference ProbChiSq=raw_p; run; %end; %if (&startv=1) %then %do; data opd; set opd1; run; %end; %else %do; data opd; set opd opd1; run; %end; %if &endv = &nix %then %do; %let morevars = 0; %end; %else %do; %let startv = %eval(&endv+1); %let endv = %eval(&startv + &chunksize - 1); %if (&endv > &nix) %then %let endv = &nix; %end; %end; %end; * statistical tests for class predictor variables; %if (&nip) %then %do; * chunk the data for I/O performance; %let chunksize = 10000; %let startv = 1; %let endv = %eval(&startv + &chunksize - 1); %if (&endv > &nip) %then %let endv = &nip; %let nclm1 = %eval(&ncl-1); %let morevars = 1; data opd1; run; %do %while(&morevars=1); %put Computing statistical tests for class predictor vars &startv to &endv out of &nip; data d1; set d; keep &UseDep %do i=&startv %to &endv; %sysfunc(varname(&_dsip_,&i)) %end; %if &ProcessName = SurvivalPredictiveModeling %then %do; &CensorVar &Covariates %end; ; run; %if ^%index(%quote(&ST_Method),Cox) %then %do; * Fisher exact test for class predictors; proc multtest data=d out=opd1; class &UseDep; test fisher(%do i=&startv %to &endv; %sysfunc(varname(&_dsip_,&i)) %end;); %do i=2 %to &ncl; contrast "&&lev&i - &lev1" -1 %do j=1 %to %eval(&i-2); 0 %end; 1; %end; run; %if (&syserr > 4) %then %do; %put ERROR: PROC MULTTEST &sterrmsg; %goto exit; %end; data outlib.opd1; set opd1; run; data opd1; length _name_ $ 32; set opd1; _name_ = _var_; if (_xval_ ne .) and (_yval_ ne .) and (_mval_ > 0) and (_nval_ > 0) then PropDiff = _xval_/_mval_ - _yval_/_nval_; else PropDiff = .; label PropDiff="Proportion Diff"; drop _var_ _test_; run; %end; %else %do; * added in v4.1 for survival data; data stack; length _name_ $ 32; set d1; %do i=&startv %to &endv; _name_ = "%sysfunc(varname(&_dsip_,&i))"; _x = %sysfunc(varname(&_dsip_,&i)); output; %end; keep _name_ &DepVar &CensorVar &Covariates _x %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; ; run; proc sort data=stack; by _name_; run; options nonotes; %ShowTime(PROC PHREG started:); proc phreg data=stack %if %length(&PhregOptions) %then &PhregOptions;; by _name_; class %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; _x ; %if %length(&WeightVar) %then %do; Weight &WeightVar; %end; %if %length(&CensorVar) %then %do; model &DepVar*&CensorVar(&CensorValues) = %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; _x %if %length(PhregModelOptions) %then / &PhregModelOptions; ; %end; %else %do; model &DepVar = %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; _x %if %length(PhregModelOptions) %then / &PhregModelOptions; ; %end; ods output parameterestimates=opd1; run; %ShowTime(PROC PHREG finished:); %if (&syserr > 4) %then %do; %put ERROR: PROC PHREG &sterrmsg; %goto exit; %end; data opd1; set opd1; if Parameter = "_x"; rename Estimate=Difference; run; %if (&syserr > 4) %then %do; %put ERROR: PROC PHREG &sterrmsg; %goto exit; %end; %end; %if (&startv=1) and ^(&nix) %then %do; data opd; set opd1; run; %end; %else %do; data opd; set opd opd1; run; %end; %if &endv = &nip %then %do; %let morevars = 0; %end; %else %do; %let startv = %eval(&endv+1); %let endv = %eval(&startv + &chunksize - 1); %if (&endv > &nip) %then %let endv = &nip; %end; %end; %end; * set missing p-values to 1 so they are at the end of the sort; data opd; set opd; if missing(raw_p) then raw_p = 1; run; * p-value adjustment calculations in proc multtest; %if %length(&MultipleTestingMethod) %then %do; %MultipleTestingAdjustment(data=opd,method=&MultipleTestingMethod,out=opd); %end; proc sort data=opd; by &mtpvar %if &mtpvar ^= Raw_p %then %do; Raw_p %end; ; run; data opd; set opd; if (raw_p < 1e-32) then do; NegLog10p = 32; end; else do; NegLog10p = -log10(raw_p); end; if (&mtpvar < 1e-32) then do; NegLog10Adjp = max(32,&NegLog10pCutoff+1); end; else do; NegLog10Adjp = -log10(&mtpvar); end; run; /* %if %symexist(CVing) %then %do; %if %symexist(kval) %then %do; %let suffix = _sts_&iter._&kval; %end; %else %do; %let suffix = _sts_&iter; %end; %end; */ %if %symexist(CVing) %then %do; %if %symexist(kval) %then %do; %if %symexist(iter) %then %do; %let suffix = _sts_&iter._&kval; %end; %else %do; %let suffix = _sts_&kval; %end; %end; %else %do; %if %symexist(iter) %then %do; %let suffix = _sts_&iter; %end; %else %do; %let suffix = _sts; %end; %end; %end; %else %do; %let suffix = _sts; %end; %let b32 = %eval(32 - %length(&suffix)); %if %length(&OutName) > &b32 %then %let inname = %substr(&OutName,1,&b32); %else %let inname = &OutName; %let stout = %quote(&inname.%trim(%left(&suffix))); %put Statistical testing output data set name is &stout; data OutLib.%unquote(&stout) stat_test; set opd; run; * apply filters; data _null_; if (&NegLog10pCutoff < 0) or (&AbsMeanDiffCutoff < 0) or (&AbsPropDiffCutoff < 0) then do; call symput("exiterror",1); end; run; %if &exiterror %then %do; %put ERROR: Cutoffs must be nonnegative.; %goto exit; %end; %if (&NegLog10pCutoff > 0) or (&AbsMeanDiffCutoff > 0) or (&AbsPropDiffCutoff > 0) %then %do; data stat_test; set stat_test; if (NegLog10Adjp >= &NegLog10pCutoff); %if &ProcessName ^= SurvivalPredictiveModeling %then %do; %if &nix %then %do; if ((Difference = .) or (abs(Difference) >= &AbsMeanDiffCutoff)); %end; %if &nip %then %do; if ((PropDiff = .) or (abs(PropDiff) >= &AbsPropDiffCutoff)); %end; %end; run; %end; %NObsVars(stat_test); %if (&nobs > &ST_Max) or (&RecoverClassPredVars = Yes) %then %do; * balance number of sig results among class diffs; %if (&ncl > 2) and (%quote(&ST_Method) = %quote(T-Test)) %then %do; data _null_; call symput("npc",trim(left(ceil(&ST_Max/(&ncl-1))))); run; %put npc = &npc; %do i=2 %to &ncl; %let conname = &&lev&i - &lev1; %if %length(&conname) > 21 %then %let conname = %substr(&conname,1,21); data tfa1; set stat_test; if _contrast_ = "&conname"; run; data tfa1; set tfa1(obs=&npc); run; %if &i=2 %then %do; data tfa; set tfa1; run; %end; %else %do; proc append base=tfa data=tfa1; run; %end; %end; data stat_test; set tfa; run; %end; data stat_test; set stat_test(obs=&ST_Max); run; %end; %ReduceData(d,stat_test,&intestdat,Statistical testing,&RecoverClassPredVars); %if &exiterror %then %goto exit; %end; %else %do; %put Number of predictors is less than the specified maximum, so statistical test filtering is not performed.; %let StatTest = No; %end; %put Statistical Testing for Predictor Reduction finished: &sysdate &systime; %end; * USE FORESTS TO FILTER PREDICTORS OR CREATE INTERACTION INDICATORS; %if (&FR_ImpVars = Yes) or (&FR_Inds = Yes) %then %do; %put Predictors from Forest started: &sysdate &systime; %if (&TraitType=Continuous) %then %let trait_type = quantitative; %else %let trait_type = qualitative; %if ^%symexist(CVing) %then %do; %let plib = %scan(&outdat,1,'.'); %let pdat = %scan(&outdat,2,'.'); %if %length(&pdat) > 28 %then %do; %let pdat = %substr(&pdat,1,28); %end; %let path = &plib..&pdat._pth; %let impfit = &plib..&pdat._ift; %if (&TestSpec=Yes) %then %do; data b2; set d d_test; run; %let ts2 = b2; %end; %end; %else %do; %let path = path; %let impfit = impfit; %end; %if (%eval(&nix + &nip) <= &FR_NVars) and (&FR_ImpVars = Yes) %then %do; %put Number of predictors is less than the specified maximum, so forest filtering is not performed.; %let FR_ImpVars = No; %end; * SET FOREST PROC OPTIONS; %if (&FR_Inds = Yes) %then %let ForestOptions = maxdepth=2 leafsize=&LeafSize seed=&FR_Seed maxtrees=&MaxTrees ninputs=&FR_NInputs; %else %let ForestOptions = maxdepth=&MaxDepth leafsize=&LeafSize seed=&FR_Seed maxtrees=&MaxTrees ninputs=&FR_NInputs; proc geneselect method=forest data=d &ForestOptions; %if &nix %then %do; var %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; / vartype=quantitative; %end; %if &nip %then %do; var %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; / vartype=qualitative; %end; trait &DepVar / vartype=&trait_type; %if (&FR_Inds = Yes) %then %do; %str(score data=d out=scrts1 dummy;) * CREATE INTERACTION INDICATORS; %str(save path=&path;) * INTERACTION INDICATORS DEFINITIONS; %if %length(&intestdat) > 0 %then %do; %str(score data=&intestdat role=test out=scrts2 dummy;) %end; %end; %if (&FR_ImpVars = Yes) %then %do; importance outfit=impfit nvars=&FR_NVars; * PREDICTORS REDUCTION; %end; quit; * RETRIEVE FILTERED QUANTITATIVE VS QUALITATIVE PREDICTORS; %if (&FR_ImpVars = Yes) %then %do; data impfit; set impfit; if _input1_ = "" then delete; run; %nObsVars(impfit); %if &nObs %then %do; * RETRIEVE QUANTITATIVE VS QUALITATIVE PREDICTORS; %if &nix %then %do; data p; set d; keep %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; ; run; proc transpose data=p out=pt(keep=_NAME_); run; data pt; length vartype $32; set pt; rename _NAME_ = _input1_; label _NAME_ = "Input 1"; vartype = "quantitative"; run; %end; %if &nip %then %do; data cp; set d; keep %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ; run; proc transpose data=cp out=cpt(keep=_NAME_); run; data cpt; length vartype $32; set cpt; rename _NAME_ = _input1_; label _NAME_ = "Input 1"; vartype = "qualitative"; run; %end; data preds; set %if &nix %then %do; pt %end; %if &nip %then %do; cpt %end; ; run; data &impfit; set impfit; run; proc sort data=impfit; by _input1_; run; proc sort data=preds; by _input1_; run; data ipreds cpreds; merge impfit preds; keep _input1_; by _input1_; if _ASE_ = . or _input1_ = "" then delete; if vartype = "quantitative" then output ipreds; else output cpreds; run; %let FRClassPredVars =; %NObsVars(cpreds); %if &nObs %then %do; %let _cpid_ = %sysfunc(open(cpreds)); %do c=1 %to &nObs; %let rc = %sysfunc(fetchobs(&_cpid_, &c)); %let input = %sysfunc(getvarc(&_cpid_, 1)); %let FRClassPredVars=&FRClassPredVars &input; %end; %let _cpid_ = %sysfunc(close(&_cpid_)); %end; %let FRPredictorVars =; %NObsVars(ipreds); %if &nObs %then %do; %let _ipid_ = %sysfunc(open(ipreds)); %do i=1 %to &nObs; %let rc = %sysfunc(fetchobs(&_ipid_, &i)); %let input = %sysfunc(getvarc(&_ipid_, 1)); %let FRPredictorVars=&FRPredictorVars &input; %end; %let _ipid_ = %sysfunc(close(&_ipid_)); %end; %end; data impfit; set impfit; rename _input1_ = _name_; run; * redefine data sets containing predictor variable columns; /* %if %length(&FRPredictorVars) %then %do; %let _rc_ = %sysfunc(close(&_dsix_)); data dsix; set d; keep &FRPredictorVars; run; %let _dsix_ = %sysfunc(open(dsix)); %let nix = %sysfunc(attrn(&_dsix_,NVARS)); %end; %if %length(&FRClassPredVars) %then %do; %let _rc_ = %sysfunc(close(&_dsip_)); data dsip; set d; keep &FRClassPredVars; run; %let _dsip_ = %sysfunc(open(dsip)); %let nip = %sysfunc(attrn(&_dsip_,NVARS)); %end; */ %ReduceData(d,impfit,&intestdat,Forest,&RecoverClassPredVars); %if &exiterror %then %goto exit; /* %if %length(&FRPredictorVars) %then %do; %let PredictorVars = &FRPredictorVars; %end; %if %length(&FRClassPredVars) %then %do; %let ClassPredVars = &FRClassPredVars; %end; */ %end; %if (&FR_Inds = Yes) %then %do; %NObsVars(&path); %let _nObspath_ = &nObs; %if &_nObspath_ %then %do; * RETRIEVE INTERACTION INDICATORS DEFINITIONS; data path; set &path; X1 = "_I"||trim(left(LEAF)); run; proc sort data=path; by X1; run; data friidef; set path; retain prev_varname prev_variable; keep X1 INDICATOR_DEFINITION LEAF; if _n_ = 1 then do; prev_varname = VAR_NAME; prev_variable = VARIABLE; end; if VAR_NAME = "" then do; VAR_NAME = prev_varname; VARIABLE = prev_variable; end; else do; prev_varname = VAR_NAME; prev_variable = VARIABLE; end; if NUMERIC_VALUE = . then delete; INDICATOR_DEFINITION = "( "||trim(left(VARIABLE))||" "|| trim(left(RELATION))||" "|| trim(left(CHARACTER_VALUE))||" )"; run; proc sort data=friidef; by LEAF; run; data friidef; set friidef end=end; length def $32767; keep prev_x1 def; rename prev_x1 = X1; label prev_x1 = "Predictor 1"; rename def = INDICATOR_DEFINITION; label def = "Indicator Definition"; retain prev_x1 def; if _n_ = 1 then do; prev_x1 = X1; def = trim(left(INDICATOR_DEFINITION)); end; else do; if X1 = prev_x1 then do; def = trim(left(def))||" * "||trim(left(INDICATOR_DEFINITION)); end; else do; output; def = trim(left(INDICATOR_DEFINITION)); prev_X1 = X1; end; end; if end ne 0 then output; run; %NObsVars(friidef); %let _nvii_ = &nObs; %put Adding &_nvii_ interaction indicators from forest; %if &_nvii_ %then %do; %do ii=1 %to &_nvii_; %let PredictorVars=&PredictorVars _Iⅈ %end; %end; %end; * redefine training and test sets; data d; set scrts1; * keep &DepVar &PredictorVars &ClassPredVars; run; %if %length(&intestdat) > 0 %then %do; data &intestdat; set scrts2; * keep &DepVar &PredictorVars &ClassPredVars; run; %end; * redefine data sets containing predictor variable columns; %if &nix %then %do; %let _rc_ = %sysfunc(close(&_dsix_)); %put PredictorVars = &PredictorVars; data dsix; set d; keep &PredictorVars; run; %let _dsix_ = %sysfunc(open(dsix)); %let nix = %sysfunc(attrn(&_dsix_,NVARS)); %put nix = &nix; %end; %if &nip %then %do; %let _rc_ = %sysfunc(close(&_dsip_)); data dsip; set d; keep &ClassPredVars; run; %let _dsip_ = %sysfunc(open(dsip)); %let nip = %sysfunc(attrn(&_dsip_,NVARS)); %put nip = &nip; %end; %end; %put Predictors from Forest finished: &sysdate &systime; %end; %if (&GenerateHTML = Yes) and ^%symexist(CVing) %then %do; %if &ProcessName = SurvivalPredictiveModeling %then %do; %let ProcName = SPM; %end; %else %do; %let ProcName = &ProcessName; %end; %PrepHTML(&ProcName._Body, &ProcName._Frame, &ProcName._Contents, &ProcName..body.html, &ProcName..frame.html, &ProcName..contents.html); ods listing gpath="&OutPath"; ods html body = "&&&ProcName._Body" (url="&bodyfile") frame = "&&&ProcName._Frame" (url="&framefile") contents = "&&&ProcName._Contents" (url="&contentsfile") gpath = "&OutPath" ; ods graphics on %if (&ProcessName = GLMSelect) %then /imagename="&&&_OutDataName";; filename plotout "&OutPath"; goptions reset=all device=png gsfname=plotout; %end; %if ^%symexist(CVing) %then %do; ods exclude none; ods results; %if (&StatTest = Yes) %then %do; %if (&nix) %then %do; %if (&nip) %then %do; %let tstring = %quote(&ST_Method) and Fisher Exact Tests; %end; %else %do; %let tstring = %quote(&ST_Method); %end; %end; %else %do; %let tstring = Fisher Exact Test; %end; %NObsVars(stat_test); title "Top &nobs Predictors from &tstring"; proc print data=stat_test; run; title; %end; %end; %else %do; %put CV_ListAll = &CV_ListAll; %if &CV_ListAll = Yes %then %do; ods exclude none; ods results; %end; %end; ************************************* * JMP Analyses; %if (&Mode = Interactive) %then %do; data &outdat; set d; run; /* %if (&ProcessName = PartialLeastSquares) %then %do; %if %length(&ClassPredVars) %then %do; %let i = 1; %do %while(%length(%scan(&ClassPredVars,&i))); %let plsV&i = %scan(&ClassPredVars,&i,' '); %let i=%eval(&i+1); %end; %let plsVn= %eval(&i-1); %end; %end; */ /* %CheckLabel(&outdat,DepVar); %if %length(&LabelVar) %then %do; %CheckLabel(&outdat,LabelVar); %end; */ %global _mainjslfile_ ; %let _mainjslfile_ = &&&_JSLFile; * put JSL statements; data _null_; file "&&&_JSLFile"; put "//!"; put 'Names Default to Here(1);'; put "Here:DataList = {};"; put "Here:TabButtonList = {};"; put "Here:TabCodeList = {};"; put "Here:TabAfterCodeList = {};"; put "Here:TabOpenList = {};"; put "Here:data=open(""&ClientOutPath.&&&_OutDataName...sas7bdat"",UseLabelsForVarNames(false),invisible);"; put " "; put "Here:data<0) or (&nix>0) or (&nip>0) %then %do; put "," @@; %end; %else %do; put " "; %end; %end; %do i=1 %to &nipl; %let v = %sysfunc(varname(&_dsipl_,&i)); put " Name(""&v"")" @@; %if (&i<&nipl) or (&nix>0) or (&nip>0) %then %do; put "," @@; %end; %else %do; put " "; %end; %end; %do i=1 %to &nix; %let v = %sysfunc(varname(&_dsix_,&i)); put " Name(""&v"")" @@; %if (&i<&nix) or (&nip>0) %then %do; put "," @@; %end; %else %do; put " "; %end; %end; %do i=1 %to &nip; %let v = %sysfunc(varname(&_dsip_,&i)); put " Name(""&v"")" @@; %if (&i<&nip) %then %do; put "," @@; %end; %else %do; put " "; %end; %end; put ")" @@; %if &VarSelect = Stepwise %then %do; %if (&ProcessName = LogisticRegression) %then %do; put " , Personality(Stepwise), Run Model"; put " );"; put ' fmw = Window("Fit Model");'; put " fmw< 1 %then %do; %let first2 = %upcase(%substr(&vari,1,2)); %if %quote(&first2) = %quote(P_) %then %let droplist1 = &droplist1 &vari; %end; %end; %let _rc_ = %sysfunc(close(&_dsid_)); %end; %if %length(&droplist1) %then %do; data &ts1; set &ts1; drop &droplist1; run; %put WARNING: The following variables in the training data have been dropped naming conflicts in output results: &droplist1; %end; %if (&ts1 ^= &ts2) %then %do; %let droplist2 = ; %let _dsid_ = %sysfunc(open(&ts2)); %if &_dsid_ %then %do; %let nvars = %sysfunc(attrn(&_dsid_,NVARS)); %do i=1 %to &nvars; %let vari = %upcase(%sysfunc(varname(&_dsid_,&i))); /* %put vari = &vari; */ %if (%quote(&vari)=PRED) %then %let droplist2 = &droplist2 PRED; %if (%quote(&vari)=PREDICTED_CLASS) %then %let droplist2 = &droplist2 PREDICTED_CLASS; %if (%quote(&vari)=PROB_ACTUAL) %then %let droplist2 = &droplist2 PROB_ACTUAL; %if (%quote(&vari)=CORRECT) %then %let droplist2 = &droplist2 CORRECT; %if (%quote(&vari)=CORRECT_PRED) %then %let droplist2 = &droplist2 CORRECT_PRED; %if (%quote(&vari)=PREDICTED_VALUE) %then %let droplist2 = &droplist2 PREDICTED_VALUE; %if %length(&vari) > 1 %then %do; %let first2 = %upcase(%substr(&vari,1,2)); %if %quote(&first2) = %quote(P_) %then %let droplist2 = &droplist2 &vari; %end; %end; %let _rc_ = %sysfunc(close(&_dsid_)); %end; %if %length(&droplist2) %then %do; data &ts2; set &ts2; drop &droplist2; run; %put WARNING: The following variables in the test data have been dropped to avoid naming conflicts in output results: &droplist2; %end; %end; %if %symexist(CVing) %then %do; %let ts2 = &intestdat; %if &CV_ListAll = No %then %do; ods exclude all; ods noresults; %end; %let droplist = ; %let _dsid_ = %sysfunc(open(&ts2)); %if &_dsid_ %then %do; %let nvars = %sysfunc(attrn(&_dsid_,NVARS)); %do i=1 %to &nvars; %let vari = %upcase(%sysfunc(varname(&_dsid_,&i))); %if (&vari=PRED) %then %let droplist = &droplist PRED; %if (&vari=PROB_ACTUAL) %then %let droplist = &droplist PROB_ACTUAL; %if (&vari=CORRECT) %then %let droplist = &droplist CORRECT; %if (&vari=CORRECT_PRED) %then %let droplist = &droplist CORRECT_PRED; /* %put i=&i vari=&vari varis2=%substr(&vari,1,2); */ %if %length(&vari) > 1 %then %do; %if %qsubstr(&vari,1,2) = %quote(P_) %then %let droplist = &droplist &vari; %end; %end; %let _rc_ = %sysfunc(close(&_dsid_)); %end; %if %length(&droplist) %then %do; data &ts2; set &ts2; drop &droplist; run; %end; %end; * stepwise variable selection; %if %index(&VarSelect,Stepwise)|%index(&VarSelect,Forward)| %index(&VarSelect,Backward)|%index(&VarSelect,Score) %then %do; * v4.1 updates: * add in the F|B|Score selections for utilizing the corresponding selection for PROC PHREG; * may update for Proc StepDisc and PROC LOGISTIC; %put nixl=&nixl; %put nipl=&nipl; %put nix=&nix; %put nip=&nip; data sum; run; %if (&ProcessName = SurvivalPredictiveModeling) %then %do; data ParEst; run; %end; %if (&ProcessName = LogisticRegression) %then %do; proc logistic data=&ts1 &LogisticOptions namelen=65; %if &nipl | &nip %then %do; class %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; / missing ; %end; model &DepVar &LogisticResponseOptions = %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %if &OrderInt > 1 and ((&i < &nixl) or (&nipl > 0) or (&nix > 0) or (&nip > 0)) %then %do; | %end; %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %if &OrderInt > 1 and ((&i < &nipl) or (&nix > 0) or (&nip > 0)) %then %do; | %end; %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %if &OrderInt > 1 and ((&i < &nix) or (&nip > 0)) %then %do; | %end; %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %if &OrderInt > 1 and (&i < &nip) %then %do; | %end; %end; %if &OrderInt > 1 %then %do; @ &OrderInt %end; / selection=stepwise slentry=&SLEntry slstay=&SLStay maxstep=&MaxStep include=%eval(&nixl+&nipl) &LogisticModelOptions ; &LogisticStatements; ods output modelbuildingsummary=sum; run; /* code to handle class effects directly %let pvl=; %let numvars=0; %if %length(&PredictorVars)>0 %then %do; %do i=1 %to %eval(&nix); %let pvl=&pvl %sysfunc(varname(&_dsix_,&i)); %end; %let numvars=%eval(&numvars+&nix); %end; %if %length(&ClassPredVars)>0 %then %do; %do i=1 %to %eval(&nip); %let pvl=&pvl %sysfunc(varname(&_dsip_,&i)); %let cvl=&cvl %sysfunc(varname(&_dsip_,&i)); %end; %let numvars=%eval(&numvars+&nip); %end; %if &OrderInt>1 %then %do; %let mstate=%sysfunc(translate(%sysfunc(compbl(&pvl)),"|"," ")); %let mstate=%str(&mstate@&OrderInt); %end; %else %let mstate=&pvl; %let neff=0; %do i=1 %to %eval(&OrderInt); %let neff=%eval(&neff+%sysfunc(comb(&numvars,&i))); %end; %if &neff>32767 %then %do; %put Number of effects = &neff.; %put ERROR: There are more than 32,767 model effects. Please enter a smaller Maximum Order of Interactions or specify fewer predictors.; %goto exit; %end; proc logistic data=&ts1 &LogisticOptions; %if %length(&ClassPredVars)>0 %then %do; class &cvl; %end; model &DepVar &LogisticResponseOptions = &mstate / selection=stepwise slentry=&SLEntry slstay=&SLStay maxstep=&MaxStep &LogisticModelOptions ; &LogisticStatements; ods output modelbuildingsummary=sum; run; */ %if (&syserr>4 | &exiterror) %then %goto exit; %let effentered = effectentered; %let effremoved = effectremoved; %end; %else %if (&ProcessName = SurvivalPredictiveModeling) %then %do; * set up baseline data; %if %length(&TestInData) | %symexist(CVing) %then %do; data _Covariates; %if %length(&TestInData) %then %do; set &ts1 &intestdat; %end; %else %do; set &ts1 &ts2; %end; run; %end; %else %do; data _Covariates; set &ts1; run; %end; %let _CovData = _Covariates; * delete rows with missing covariates; /* data &_CovData; set &_CovData; if nmiss(of %if %length(&WeightVar) %then %do; &WeightVar %end; %if %length(&CensorVar) %then %do; &CensorVar %end; &Covariates %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ) then delete; run; */ %if %index(&VarSelect,Stepwise)|%index(&VarSelect,Forward)|%index(&VarSelect,Backward) %then %do; proc phreg data=&ts1 namelen=200 %if %length(&PhregOptions) %then &PhregOptions;; %if &nipl | &nip %then %do; class %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ; %end; %if %length(&WeightVar) %then %do; weight &WeightVar; %end; %if %length(&CensorVar) %then %do; model &DepVar*&CensorVar(&CensorValues) = %end; %else %do; model &DepVar = %end; %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %if &OrderInt > 1 and ((&i < &nixl) or (&nipl > 0) or (&nix > 0) or (&nip > 0)) %then %do; | %end; %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %if &OrderInt > 1 and ((&i < &nipl) or (&nix > 0) or (&nip > 0)) %then %do; | %end; %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %if &OrderInt > 1 and ((&i < &nix) or (&nip > 0)) %then %do; | %end; %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %if &OrderInt > 1 and (&i < &nip) %then %do; | %end; %end; %if &OrderInt > 1 %then %do; @ &OrderInt %end; %if %index(&VarSelect,Stepwise) %then %do; / selection=stepwise slentry=&SLEntry slstay=&SLStay maxstep=&MaxStep include=%eval(&nixl+&nipl) %if %length(PhregModelOptions) %then &PhregModelOptions; %end; %else %if %index(&VarSelect,Forward) %then %do; / selection=Forward slentry=&SLEntry include=%eval(&nixl+&nipl) %if %length(PhregModelOptions) %then &PhregModelOptions; %end; %else %if %index(&VarSelect,Backward) %then %do; / selection=stepwise slstay=&SLStay include=%eval(&nixl+&nipl) %if %length(PhregModelOptions) %then &PhregModelOptions; %end; ; * end of Model statement; baseline covariates=&_CovData out=SurFunc(keep=_obs _train_ &DepVar &CensorVar &IDVar &ColorVar &WeightVar Survival StdErrSurvival) survival=_all_ / rowid=Id ; /* %if ^%symexist(CVing) %then %do; assess var=( &Covariates %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ) ph / crpanel resample seed=19 ; %end; */ ods output ModelBuildingSummary=Sum parameterestimates=ParEst; run; %end; %else %if %index(&VarSelect,Score) %then %do; * 1. Select effect by Score option first (without fitting model); * 2. Call PROC PHREG second time to fit model with the effects selected in step 1.; %if %eval(&nix+&nip) < &nScorePredictors %then %do; %let nTmp = %trim(%left(%eval(&nix+&nip))); %put WARNING: The specified Number of Predictors Included in Model, %trim(%left(&nScorePredictors)), is larger than; %put the total predictors, &nTmp, left after Predictor Reduction.; %put WARNING: The Number of Predictors Included in Model for Score Model Selection will be replaced by &nTmp.; %let nScorePredictors = %eval(&nix+&nip); %end; * check how many combination of factor selections for Score approach; data _null_; format n2 8.2; x = %eval(&nix+&nip); y = &nScorePredictors; n = (lperm(x,y)-lperm(y))/log(10); n1 = floor(n); n2 = 10**(n-n1); if n>17 then do; put 'WARNING: There are ' n2 @; put +(-1) 'e+' n1 'variable selections to be screened to search for the best selection.'; put 'WARNING: It can take a long time to search for the best score among these selections.'; end; else do; put 'There are ' n2 @; put +(-1) 'e+' n1 'variable selections to be screened.'; end; run; proc phreg data=&ts1 %if %length(&PhregOptions) %then &PhregOptions;; %if &nipl | &nip %then %do; class %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ; %end; %if %length(&WeightVar) %then %do; weight &WeightVar; %end; %if %length(&CensorVar) %then %do; model &DepVar*&CensorVar(&CensorValues) = %end; %else %do; model &DepVar = %end; %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; / selection=Score best=1 start=&nScorePredictors stop=&nScorePredictors include=%eval(&nixl+&nipl) %if %length(PhregModelOptions) %then &PhregModelOptions; ; ods output BestSubsets=BestSet; run; data _null_; set BestSet end=_e_; if _e_ then call symput("ScoreEffects",VariablesInModel); run; * call PROC PHREG second time to fit scored model; proc phreg data=&ts1 %if %length(&PhregOptions) %then &PhregOptions;; %if &nipl | &nip %then %do; class %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ; %end; %if %length(&WeightVar) %then %do; weight &WeightVar; %end; %if %length(&CensorVar) %then %do; model &DepVar*&CensorVar(&CensorValues) = &ScoreEffects %if %length(PhregModelOptions) %then /&PhregModelOptions; ; %end; %else %do; model &DepVar = &ScoreEffects %if %length(PhregModelOptions) %then /&PhregModelOptions; ; %end; baseline covariates=&_CovData out=SurFunc(keep=_obs _train_ &DepVar &CensorVar &IDVar &ColorVar &WeightVar Survival StdErrSurvival) survival=_all_ / rowid=Id ; ods output parameterestimates=ParEst; run; %end; %if (&syserr>4 | &exiterror) %then %goto exit; %let effentered = effectentered; %let effremoved = effectremoved; %end; %else %do; proc stepdisc data=&ts1 slentry=&SLEntry slstay=&SLStay maxstep=&MaxStep include=%eval(&nixl+&nipl) short; class &DepVar; var %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ; ods output summary=sum; run; %if (&syserr>4 | &exiterror) %then %goto exit; %let effentered = entered; %let effremoved = removed; %end; %if (&ProcessName ne SurvivalPredictiveModeling) %then %do; %NObsVars(sum); %end; %else %do; %NObsVars(ParEst); %end; %put nobs stepwise = &nobs; %if (&nvars and &nobs) or &nixl or &nipl %then %do; %if (&ProcessName = SurvivalPredictiveModeling) %then %do; * extract selected variables into macro variables (including covariates); %let SelVars = ; * avoid duplicate record for class variable; data PE; set ParEst(keep=Parameter); row = _n_; run; proc sort data=PE nodupkey; by Parameter; run; proc sort data=PE; by row; run; data _null_; set PE end=_e_; name = "sVar"||trim(left(_n_)); call symput(name,Parameter); if _e_ then call symput("NumVar",_n_); run; * build single macro variable with entered variables; %do i = 1 %to &NumVar; %let SelVars = &SelVars &&sVar&i; * covariate included; %end; %end; %else %if %index(&VarSelect,Stepwise) %then %do; %let nsteps = 0; * extract selected variables into macro variables; data sum; set sum end=end; en = "e" || trim(left(_n_)); rn = "r" || trim(left(_n_)); call symput(en,trim(left(&effentered))); call symput(rn,trim(left(&effremoved))); if end=1 then call symput('nsteps',trim(left(_n_))); run; * build single macro variable with entered variables; %do i=1 %to &nixl; %let SelVars = &Selvars %sysfunc(varname(&_dsixl_,&i)); %end; %do i=1 %to &nipl; %let SelVars = &Selvars %sysfunc(varname(&_dsipl_,&i)); %end; %put nsteps = &nsteps; %let NumVar = 0; %do s=1 %to &nsteps; %if (%length(%trim(&&e&s))) %then %do; %put entering &&e&s; %let SelVars = &SelVars &&e&s; %let NumVar = %eval(&NumVar + 1); %end; %if (%length(%trim(&&r&s))) %then %do; %let sm1 = %eval(&s-1); %if (%quote(&&r&s) = %quote(&&e&s)) or ((&s > 1) and (%quote(&&e&s) = ) and (%quote(&&r&s) = %quote(&&e&sm1))) %then %do; %put not removing &&r&s because it was just entered; %end; %else %do; %put removing &&r&s; %let lr = %length(%trim(&&r&s)); %let idx = %index(&SelVars,&&r&s); %let idx2 = %eval(&idx+&lr+1); %if (&idx = 1) %then %do; * removing the first variable in the string; %let SelVars = %substr(&SelVars,&idx2); %end; %else %if (&idx2 > %length(%trim(&SelVars))) %then %do; * removing the last variable in the string; %let SelVars = %substr(&SelVars,1,%eval(&idx-1)); %end; %else %do; * removing variable from the middle of the string; %let SelVars = %substr(&SelVars,1,%eval(&idx-1)) %substr(&SelVars,&idx2); %end; %let NumVar = %eval(&NumVar - 1); %end; %end; %end; %end; %end; %end; %else %if %index(&VarSelect,Genetic) %then %do; * skip this part for Survival data; %let ClassKeep = &DepVar; %if (&ProcessName = PartialLeastSquares) %then %do; %do i=1 %to &nic; %let ClassKeep = &ClassKeep &&_c&i; %end; %do i=1 %to &nit; %let ClassKeep = &ClassKeep &&_t&i; %end; %end; %SelectVarsGA( Data=&ts1, Class=&ClassKeep, Vars= %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; , GroupSize=&NumVar, MaxIter=&MaxIter, StopValue=&StopVal ); %if &exiterror = 1 %then %goto exit; data vipd; set VariableImportance; run; data bpd; set BestPredictorRuns(obs=10); run; * use the first set from BestPredictorRuns as the selected variables; data bpd1; set BestPredictorRuns(obs=1); %do s=1 %to &NumVar; call symput("e&s",var&s); %end; run; /*---use the top &NumVar important variables as the selected variables---*/ /* data vsel; set VariableImportance(obs=&NumVar); en = "e" || trim(left(_n_)); call symput(en,trim(left(variable))); run; */ %do s=1 %to &NumVar; %let SelVars = &SelVars &&e&s; %end; %end; %else %do; * without variable selection; %let SelVars = &LockContPredVars &LockClassPredVars &PredictorVars &ClassPredVars; %end; * end of variable selction; * if no variables selected, create a dummy intercept as the only predictor; %if ^%length(&SelVars) %then %do; %put WARNING: No variables selected. An intercept-only model is assumed.; %if ^%index(&PredictorVars &ClassPredVars,Intercept) %then %do; %put Dropping &PredictorVars &ClassPredVars and adding Intercept; data &ts1; set &ts1; Intercept = 1; drop &PredictorVars &ClassPredVars; run; %if &ts2^=&ts1 %then %do; data &ts2; set &ts2; Intercept = 1; drop &PredictorVars &ClassPredVars; run; %end; %end; %let SelVars = Intercept; %if %symexist(_dsix_) %then %do; %put _dsix_ = &_dsix_; %if (&_dsix_) %then %do; %let _dsix_ = %sysfunc(close(&_dsix_)); %end; %end; %let nix = 1; data dsix; Intercept = 1; run; %let _dsix_ = %sysfunc(open(dsix)); %if %symexist(_dsip_) %then %do; %if (&_dsip_) %then %do; %let _rc_ = %sysfunc(close(&_dsip_)); %end; %end; %let nip = 0; %end; * main computing block, with sections for each predictive method; %if %length(&SelVars) %then %do; %let SomeSel = 1; %let havets2v = 0; %put SelVars = &SelVars; data ts2o; if 0; run; data ts2vars; if 0; run; %if ^%length(&TruthVar) %then %let TruthVar = &DepVar; * evaluate prediction accuracy; %if (&ProcessName = LogisticRegression) %then %do; * string for analysis settings; %let ASet = Variable Section Method = &VarSelect; %if (&VarSelect = Stepwise) %then %do; %let ASet = &Aset, SLEntry = &SLEntry, SLStay = &SLStay; %end; %if ^%symexist(CVing) %then %do; %let plib = %scan(&outdat,1,'.'); %let pdat = %scan(&outdat,2,'.'); %if %length(&pdat) > 28 %then %do; %let pdat = %substr(&pdat,1,28); %end; %if (&TestSpec=Yes) %then %do; data b2; set &ts1 d_test; run; %let ts2 = b2; %end; %end; %if (&VarSelect = Penalized) %then %do; %if (&_dtype_=C) or (&NumDepAsClass=Yes) %then %let trait_type = qualitative; %else %let trait_type = quantitative; %if %symexist(CVing) %then %do; %let savep = NO; %let podat = ; %let pfit = ; %end; %else %do; %let savep = YES; %let podat = &plib..&pdat._mod; %let pfit = &plib..&pdat._fit; %end; %if (&FR_Inds = Yes) %then %let GeneSelectOptions = &GeneSelectOptions nway=1; %else %do; %if &OrderInt > 0 %then %let GeneSelectOptions = &GeneSelectOptions nway=&OrderInt; %else %let GeneSelectOptions = &GeneSelectOptions nway=2; %end; %if ^%symexist(CVing) %then %do; %let GeneSelectOptions = &GeneSelectOptions plrdropfit=yes; %end; %geneselect( train_dsname=&ts1, test_dsname=&ts2, where_clause=, inmodel_dsname=, method=plr, trait=&DepVar, trait_type=&trait_type, quantitative_vars= %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; , qualitative_vars= %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; , n_quantitative_lists=0, n_qualitative_lists=0, n_vars=, n2way=, save_model=, model_output=, save_parms=&savep, parms_output=&podat, save_fit=&pfit, save_importance=, importance=ts2i, importance_var=, importance_out=, importance_outfit=, save_score=YES, score_out=ts2o, score_outfit=, convert_to_cvscore=YES, proc_options=&GeneSelectOptions, priors=&Priors ); %put pfit=&pfit; %if %symexist(CVing) %then %do; %if (&FR_Inds = Yes) %then %do; * IF CVMC, REMOVE _Ixx INTERACTION INDICATOR FROM THE SAVE IMPORTANCE= DATA; %NObsVars(&path); %let _nObspath_ = &nObs; %if &_nObspath_ %then %do; proc sort data=ts2i; by NAME; run; data indicators; set friidef; rename X1 = NAME; TYPE = "Indicator"; run; proc sort data=indicators; by NAME; run; data ts2i; merge ts2i indicators; by NAME; drop TYPE; if TYPE="Indicator" then delete; run; %end; %end; %end; %else %if ^%symexist(CVing) %then %do; %if %length(&pfit) > 0 and &pfit ne "" %then %do; %NObsVars(&pfit); %if &nObs %then %do; %let _pfitid_ = %sysfunc(open(&pfit)); %let x2num = %sysfunc(varnum(&_pfitid_, X2)); %if &x2num %then %do; %let x2type = %sysfunc(vartype(&_pfitid_, &x2num)); %end; %let _pfitid_ = %sysfunc(close(&_pfitid_)); data &pfit; set &pfit; length Predictor $64; drop base_ase base_deviance; retain base_ase base_deviance 0; %if &x2num %then %do; if %if (&x2type=C) %then %do; trim(left(X2))="" then do; %end; %else %do; X2=. then do; %end; Predictor=trim(left(X1)); end; else do; Predictor=trim(left(X1))||" * "||trim(left(X2)); end; %end; %else %do; Predictor=trim(left(X1)); %end; if X1="Full Model" then do; base_ase = ASE; base_deviance = DEVIANCE; end; ASE_LIFT = (ASE - BASE_ASE)/BASE_ASE; DEVIANCE_LIFT = (DEVIANCE - BASE_DEVIANCE)/BASE_DEVIANCE; label ASE_LIFT = "Ave Square Error Lift"; label DEVIANCE_LIFT = "Deviance Lift"; label Predictor="Final Selected Variables"; run; %end; %end; %if (&FR_Inds = Yes) %then %do; proc sort data=&pfit; by X1; run; proc sort data=friidef; by X1; run; data &pfit; merge &pfit friidef; by X1; if ASE = . then delete; if trim(left(INDICATOR_DEFINITION)) ne "" then do; Predictor = INDICATOR_DEFINITION; end; run; %NObsVars(&pfit); %if &nObs %then %do; %let _pfitid_ = %sysfunc(open(&pfit)); %let x2num = %sysfunc(varnum(&_pfitid_, X2)); %let idefnum = %sysfunc(varnum(&_pfitid_, INDICATOR_DEFINITION)); %let _pfitid_ = %sysfunc(close(&_pfitid_)); %if &x2num or &idefnum %then %do; proc freq data=&pfit noprint; %if &x2num %then %do; tables X2/out=x2frq; %end; %if &idefnum %then %do; tables INDICATOR_DEFINITION/out=idfrq; %end; run; %end; %if &x2num %then %do; %NObsVars(x2frq); %if &nObs = 1 %then %do; data _null_; set x2frq; if X2="" or X2=. then do; call symput("dropx2", 1); end; run; %if &dropx2=1 %then %do; data &pfit; set &pfit; drop X2; run; %end; %end; %end; %if &idefnum %then %do; %NObsVars(idfrq); %if &nObs = 1 %then %do; data _null_; set idfrq; if INDICATOR_DEFINITION="" then do; call symput("dropid", 1); end; run; %if &dropid=1 %then %do; data &pfit; set &pfit; drop INDICATOR_DEFINITION; run; %end; %end; %end; %end; %end; %end; %if &syserr > 4 %then %do; %let exiterror = 1; %goto exit; %end; %if ^%symexist(CVing) %then %do; title "Penalized Logistic Regression Model"; proc print data=&podat; run; proc sort data=ts2o; by _Obs; run; %end; /* data outlib.ts2o; set ts2o; run; */ * insert underscore in p_ var names; data p1; set ts2o(obs=1); keep p_:; run; proc transpose data=p1 out=p1t; var p_:; run; data _null_; set p1t; call symput("pgprob"||trim(left(_n_)),trim(left(_name_))); run; data ts2o; set ts2o; rename %do c=1 %to &nclasses; &&pgprob&c = &&p_prob&c %end; ; * drop I_&DepVarTrunc30 to avoid type conflicts while merging results with other APs.; drop I_&DepVarTrunc30; run; %if (&TestSpec=Yes) %then %do; data ts2o; set ts2o; array pp %do c=1 %to &nclasses; &&p_prob&c %end; ; _cidx = .; %do c=1 %to &nclasses; %if (&_dtype_=N) %then %do; if trim(left(&TruthVar)) = &&class&c then _cidx = &c; %end; %else %do; if trim(left(&TruthVar)) = "&&class&c" then _cidx = &c; %end; %end; if not missing(_cidx) then actualp = pp[_cidx]; else actualp = .; run; %if ^%symexist(CVing) %then %do; data &ts1; set ts2o; where _train_ = 1; run; data &ts2; set ts2o; where _train_ = 0; run; %end; %end; %Rename_P_; %let into = U_&DepVarTrunc30; %let pred = P_&DepVarTrunc30; data ts2vars; set ts2i; if importance > 0; run; %let havets2v = 1; %UpdateSelVars; %end; %else %do; %let CompleteSeparation = 0; proc logistic data=&ts1 &LogisticOptions namelen=65; %if &nipl | &nip %then %do; class %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; / missing ; %end; model &DepVar &LogisticResponseOptions = %if %index(&VarSelect,Stepwise) or %index(&VarSelect,Genetic) %then %do; &SelVars %end; %else %do; %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; %end; / &LogisticModelOptions; %if (&Priors^=Proportional) %then %do; weight _Prior; %end; &LogisticStatements; output predprobs=i out=ts1o(drop= %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ); score data=&ts2 out=ts2o(drop= %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ) fitstat; ods output responseprofile=_namedat convergencestatus=status; run; /* ods exclude none; ods results; title "ts1o"; proc print data=ts1o; run; title "ts2o"; proc print data=ts2o; run; */ %if &syserr > 4 %then %do; %let exiterror = 1; %goto exit; %end; * check for complete separation; data status; set status; if length(reason) >= 19 then do; if substr(reason,1,19) = "Complete separation" then do; call symputx("CompleteSeparation",1); end; end; run; %put CompleteSeparation = &CompleteSeparation; title "status"; proc print data=status; run; * if there is a test set specified, rename the _IP: variables in ts1o to _P: so they agree with those in ts2o; %if (&TestSpec = Yes) %then %do; data t1; set ts1o(obs=1); keep ip_:; run; %let _dsid_ = %sysfunc(open(t1)); %let nvars = %sysfunc(attrn(&_dsid_,NVARS)); data ts1o; set ts1o; rename %do i=1 %to &nvars; %let ipname = %sysfunc(varname(&_dsid_,&i)); %let pname = %substr(&ipname,2,%eval(%length(&ipname)-1)); &ipname = &pname %end; _INTO_ = I_&DepVarTrunc30; ; run; %let _rc_ = %sysfunc(close(&_dsid_)); /* title "ts1o after rename"; proc print data=ts1o; run; */ %end; * create variables for computing cv ase; data _namedat; set _namedat end=_last; cn = "cvalue" || trim(left(_n_)); call symput(cn,trim(left(Outcome))); if (_last) then call symput('ncl',trim(left(_n_))); run; %put ncl = &ncl; %global cvartype; %VarAttributes(ts2o,&TruthVar,_Len=,_Type=cvartype); data ts2o; set %if (&TestSpec = Yes) %then %do; ts1o %end; ts2o; array pp p_:; _cidx = .; %do c=1 %to &ncl; %if (&cvartype=N) %then %do; if trim(left(&TruthVar)) = &&cvalue&c then _cidx = &c; %end; %else %do; if trim(left(&TruthVar)) = "&&cvalue&c" then _cidx = &c; %end; %end; if not missing(_cidx) then actualp = pp[_cidx]; else actualp = .; run; * for complete separation, set training posterior probabilities; %if (&CompleteSeparation) %then %do; data ts2o; set ts2o; array pp p_:; if not missing(_cidx) and _train_ = 1 then do; do _c=1 to &ncl; if _cidx = _c then pp[_c] = 0.9999; else pp[_c] = 0.0001; end; actualp = pp[_cidx]; F_&DepVarTrunc30 = &DepVar; I_&DepVarTrunc30 = &DepVar; drop _c; end; run; %end; %Rename_P_; %let into = I_&DepVarTrunc30; %end; %end; %else %if (&ProcessName = PartitionTrees) %then %do; * string for analysis settings; %let ASet = Tree Model Type = &ModelType; %if (&_dtype_=C) or (&NumDepAsClass=Yes) %then %let trait_type = qualitative; %else %let trait_type = quantitative; %if %symexist(CVing) %then %do; %let savem = NO; %let modat = ; %let impfit = ; %let saveimp = NO; %let N_ImpVars = ; %end; %else %do; %let savem = YES; %let mlib = %scan(&outdat,1,'.'); %let mdat = %scan(&outdat,2,'.'); %if %length(&mdat) > 28 %then %do; %let mdat = %substr(&mdat,1,28); %end; %let modat = &mlib..&mdat._mod; %let N_ImpVars = ; %let impfit = ; %let saveimp = NO; %if %upcase(&ModelType)=FOREST %then %do; %if (&FR_Impfit=Yes) %then %do; %let impfit = &mlib..&mdat._ift; %let saveimp = YES; %end; %let N_ImpVars = &FR_NVars; %end; %end; %if ^%symexist(CVing) %then %do; %if (&TestSpec=Yes) %then %do; data b2; set d d_test; run; %let ts2 = b2; %end; %else %let ts2 = ; %end; %put TreeMethod=&TreeMethod; %put ModelType=&ModelType; %geneselect( train_dsname=&ts1, test_dsname=&ts2, where_clause=, inmodel_dsname=, method=&TreeMethod, trait=&DepVar, trait_type=&trait_type, quantitative_vars= %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; , qualitative_vars= %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; , n_quantitative_lists=0, n_qualitative_lists=0, n_vars=&N_ImpVars, n2way=, save_model=&savem, model_output=&modat, save_importance=&saveimp, importance=ts2i, importance_var=, importance_out=, importance_outfit=&impfit, save_score=YES, score_out=ts2o, score_outfit=, convert_to_cvscore=YES, proc_options=&GeneSelectOptions, priors=&Priors ); %if %upcase(&TreeMethod)=FOREST and (&FR_Impfit=Yes) %then %do; %nObsVars(&impfit); %if &nObs %then %do; data &impfit; set &impfit; if _input1_ = "" then delete; run; %end; %end; %if %upcase(&TreeMethod)=TREE and ^%symexist(CVing) %then %do; %let mdat = %scan(&outdat,2,'.'); %if %length(&mdat) > 28 %then %do; %let mdat = %substr(&mdat,1,28); %end; %let inmodel_dsname = &mdat._mod; %let model_output = &mdat.omdl; %let nodestats_output = &mdat._nds; %let importance_output = &mdat._imp; %let rules_output = &mdat._rul; %let cmprules_output = &mdat._enr; %let sequence_output = &mdat._seq; %let score_train_output = &mdat._stn; /* %let partialdep_output = &mdat._ptd; */ %if %length(&ValidInData) > 0 %then %do; %let score_valid_output = &mdat._svd; %end; %else %do; %let score_valid_output = ; %end; %if %length(&TestInData) > 0 %then %do; %let score_test_output = &mdat._stt; %end; %else %do; %let score_test_output = ; %end; proc datasets library=Outlib nolist; delete &nodestats_output &importance_output &rules_output &sequence_output &score_train_output &score_valid_output &score_test_output /* &partialdep_output */ ; quit; /* %NObsVars(ts2i); %if &nObs and &nVars %then %do; %let _impid_ = %sysfunc(open(ts2i)); %let namenum = %sysfunc(varnum(&_impid_, NAME)); %let rc = %sysfunc(fetchobs(&_impid_, 1)); %let var1st = %sysfunc(getvarc(&_impid_, &namenum)); %let _impid_ = %sysfunc(close(&_impid_)); %end; %put partialdep_varname=&var1st; %put partialdep_output=&partialdep_output; */ %tree(TrainPath=&TrainPath, TestPath=&TestPath, ValidPath=&ValidPath, train_dsname=&InData, test_dsname=&TestInData, valid_dsname=&ValidInData, outputpath=&OutPath, where_clause=, inmodel_dsname=&inmodel_dsname, trait=&DepVar, trait_type=&trait_type, freq=, quantitative_vars=, qualitative_vars=, n_quantitative_lists=, n_qualitative_lists=, subtree=, assess=, model_output=&model_output, nodestats_output=&nodestats_output, importance_output=&importance_output, similarity_output=, rules_output=&rules_output, cmprules_output=&cmprules_output, summary_output=, sequence_output=&sequence_output, score_train_output=&score_train_output, score_valid_output=&score_valid_output, score_test_output=&score_test_output, score_train_outfit=, score_valid_outfit=, score_test_outfit=, partialdep_varname=/*&var1st*/, partialdep_output=/*&partialdep_output*/, proc_options=, branch_nodes=, priors=proportional ); %put nleaves=&nleaves; %if &missing=DISTRIBUTE %then %do; %score_branches(TrainPath=&TrainPath, TestPath=&TestPath, ValidPath=&ValidPath, train_dsname=&InData, test_dsname=&TestInData, valid_dsname=&ValidInData, outputpath=&OutPath, inmodel_dsname=&inmodel_dsname, trait=&DepVar, trait_type=&trait_type, nodestats_output=&nodestats_output, score_train_output=&score_train_output, score_valid_output=&score_valid_output, score_test_output=&score_test_output, score_train_outfit=, score_valid_outfit=, score_test_outfit=, parent_node=1 ); %end; %end; %if &syserr > 4 %then %do; %let exiterror = 1; %goto exit; %end; %if ^%symexist(CVing) %then %do; /* comment this out to prevent output listing to be created which triggers populating results output tab title "Partition Tree Model"; proc print data=&modat; run; */ proc sort data=ts2o; by _Obs; run; %end; %if (&trait_type=quantitative) %then %do; data ts2o; set ts2o; drop R_&DepVar; run; %end; %else %do; * drop I_&DepVarTrunc30 to avoid type conflicts while merging results with other APs.; data ts2o; set ts2o; drop I_&DepVarTrunc30; run; %Rename_P_; * compute actualp for testspec; %if (&TestSpec=Yes) %then %do; data ts2o; set ts2o; array pp %do c=1 %to &nclasses; &&p_prob&c %end; ; _cidx = .; %do c=1 %to &nclasses; %if (&_dtype_=N) %then %do; if trim(left(&TruthVar)) = &&class&c then _cidx = &c; %end; %else %do; if trim(left(&TruthVar)) = "&&class&c" then _cidx = &c; %end; %end; if not missing(_cidx) then actualp = pp[_cidx]; else actualp = .; run; %if ^%symexist(CVing) %then %do; data &ts1; set ts2o; where _train_ = 1; run; data &ts2; set ts2o; where _train_ = 0; run; %end; %end; %end; %let into = U_&DepVarTrunc30; %let pred= P_&DepVarTrunc30; data ts2vars; set ts2i; if importance > 0; run; %let havets2v = 1; %UpdateSelVars; %end; %else %if (&ProcessName = DiscriminantAnalysis) or (&ProcessName = KNearestNeighbors) %then %do; * analysis settings output string; %if (&ProcessName = DiscriminantAnalysis) %then %do; %let ASet = Metric = &Metric, Variable Section Method = &VarSelect; %if (&VarSelect = Stepwise) %then %do; %let ASet = &Aset, SLEntry = &SLEntry, SLStay = &SLStay; %end; %end; %else %if (&ProcessName = KNearestNeighbors) %then %do; %let ASet = Number of Nearest Neighbors = &NNN, Distance Metric = &Distance,; %let ASet = &ASet Variable Section Method = &VarSelect; %end; * number of canonical components; %if %length(&NumVar) %then %do; data _null_; ncan = min(5,&NumVar); /* %if %index(&VarSelect,Genetic) %then %do; ncan = max(2,min(&NumVar,%eval(&nlev-1))); %end; %else %do; ncan = 2; %end; */ call symput("ncan",trim(left(ncan))); run; %end; %else %do; %let ncan = 2; %end; %put ncan = &ncan; proc discrim data=&ts1 out=ts1o(drop= %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ) testdata=&ts2 testout=ts2o(drop= %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ) &DiscrimOptions crossvalidate canonical ncan=&ncan manova; class &DepVar; var %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ; priors &Priors; ods output levels=_namedat; run; %if &syserr > 4 %then %do; %let exiterror = 1; %goto exit; %end; * create variables for computing cv ase; data _namedat; set _namedat end=_last; cn = "cvalue" || trim(left(_n_)); call symputx(cn,trim(left(&DepVar))); if (_last) then call symput('ncl',trim(left(_n_))); run; /* proc print; run; */ /* get variable names straight from the data set in case they have changed from the original levels. assume the post_prob variables are at the end of the data set, except the very last one, which should be _INTO_ */ %let _dsid_ = %sysfunc(open(ts2o)); %let nvars = %sysfunc(attrn(&_dsid_,NVARS)); %do c=1 %to &nclasses; %let cn&c = %sysfunc(varname(&_dsid_,%eval(&nvars - &nclasses - 1 + &c))); %put cn&c = &&cn&c; %end; %global cvartype; %let vnum = %sysfunc(varnum(&_dsid_,&TruthVar)); %let cvartype = %sysfunc(vartype(&_dsid_,&vnum)); %let rc=%sysfunc(close(&_dsid_)); data ts2o; set %if (&TestSpec = Yes) %then %do; ts1o %end; ts2o; _cidx = .; %do c=1 %to &nclasses; &&p_prob&c = &&cn&c; %if (&cvartype=N) %then %do; if trim(left(&TruthVar)) = &&cvalue&c then _cidx = &c; %end; %else %do; if trim(left(&TruthVar)) = "&&cvalue&c" then _cidx = &c; %end; %end; %if (&cvartype=N) %then %do; if missing(_INTO_) then _INTO_ = &cvalue1; %end; %else %do; if missing(_INTO_) then _INTO_ = "&cvalue1"; %end; run; /* ods exclude none; ods results; proc contents data=ts2o; run; */ data ts2o; set ts2o; array _p p_:; if not missing(_cidx) then actualp = _p[_cidx]; else actualp = .; run; %let into = _INTO_; %end; %else %if (&ProcessName = DistanceScoring) %then %do; * analysis settings output string; %let ASet = Distance Metric = &DistMetric, Computing Distances to &CenTime; %let ASet = &ASet, Summarization Method = &CenMethod, Kernel = &PostFunction; %if (&CenTime = Each Observation) %then %do; %let ASet = &ASet, Number of Nearest Neighbors = &NNN; %end; %let trainds = &ts1; * compute class centroids if necessary; %if ((&TraitType=Binary) or (&TraitType=Nominal)) and %index(&CenTime,Centroids) %then %do; * limit to 32767 vars for now, see DataSummary.sas for a possible way around this, or its likely much faster to transpose; %if ^%index(&VarSelect,Genetic) %then %do; %let numpred = %eval(&nix + &nip); %if (&numpred > 32767) %then %do; %put ERROR: The number of predictors &numpred exceeds the maximum of 32767 for Distance Scoring.; %put Pleae use Predictor Reduction or try a different model.; %let exiterror = 1; %goto exit; %end; %end; proc sort data=&ts1 out=ts1s; %if (&DepVar = &TruthVar) %then %do; by &DepVar; %end; %else %do; by &DepVar &TruthVar; %end; where not missing(&DepVar); run; proc means data=ts1s &CenMethod; %if (&DepVar = &TruthVar) %then %do; by &DepVar; %end; %else %do; by &DepVar &TruthVar; %end; var %if %index(&VarSelect,Genetic) %then %do; &SelVars %end; %else %do; %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; %end; ; output out=ts1m &CenMethod.( %if %index(&VarSelect,Genetic) %then %do; &SelVars %end; %else %do; %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; %end; ) = %if %index(&VarSelect,Genetic) %then %do; &SelVars %end; %else %do; %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; %end; ; run; %let trainds = ts1m; %if (&Priors = Proportional) %then %do; data priors; set ts1m; prior = _freq_; keep &DepVar prior; run; %end; %end; %if ^%symexist(CVing) and (&TestSpec = Yes) %then %do; data b2; set d d_test; run; %let ts2 = b2; %end; %put ts2 = &ts2; %put trainds = &trainds; data one; one = 1; run; data b1; set &ts2 &trainds; run; /* title "b1"; proc print data=b1(obs=10); var &DepVar &TruthVar %if %index(&VarSelect,Genetic) %then %do; &SelVars %end; %else %do; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; %end; ; run; */ %NObsVars(&ts2); %let ntest = &nobs; %put ntest= &ntest; %if (&ntest > 0) %then %do; %put VarLevel = &VarLevel; * compute distances; proc distance data=b1 method=&DistMetric out=dist prefix=dist shape=SQR &ProcDistanceOptions ; var &VarLevel.( %if %index(&VarSelect,Genetic) %then %do; &SelVars %end; %else %do; %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; %end; ); %if (&DepVar = &TruthVar) %then %do; copy &DepVar; %end; %else %do; copy &DepVar &TruthVar; %end; run; %if &syserr>4 %then %do; %let exiterror = 1; %goto exit; %end; * extract rectangle of distances between training and test; data distrec; set dist; if _n_ > &ntest; keep &DepVar &TruthVar dist1-dist&ntest; run; * replace zeros with missings to remove self-selfs so they are not picked up in stat; * perhaps this should not be done when zero distances are legit among different obs; data distrec; set distrec; array dist{*} dist1-dist&ntest; do i=1 to &ntest; if dist[i] < 1e-6 then dist[i] = .; end; drop i; run; * k-nearest neighbors, keep k smallest distances, set others to zero; %if (&NNN > 0) %then %do; %NObsVars(distrec); %let ntrain = &nobs; %if (&ntrain <= &NNN) %then %do; %put The number of training observations &ntrain is less than specified number of neighbors &NNN. All points are used.; %end; %else %do; %put Using the smallest &NNN out of &ntrain distances.; title "Distances Before &NNN Nearest Neighbors"; proc print data=distrec; run; title; data dr1; set distrec; keep &DepVar &TruthVar; run; proc transpose data=distrec out=drt prefix=d; var dist1-dist&ntest; run; data drt; set drt; array d{*} d1-d&ntrain; do k=1 to &NNN; minidx = 0; do i=1 to &ntrain; if not missing(d[i]) and (d[i] > 0) then do; minidx = i; i = &ntrain + 1; end; end; if (minidx > 0) then do; do i=(minidx+1) to &ntrain; if not missing(d[i]) and (d[i] > 0) and (d[i] < d[minidx]) then minidx = i; end; * use temporary sign reversal to flag keepers, assumes all distances are nonnegative; d[minidx] = -d[minidx]; end; end; do i=1 to &ntrain; if not missing(d[i]) then do; if (d[i] < 0) then d[i] = -d[i]; else d[i] = .; end; end; run; proc transpose data=drt out=drtt; var d1-d&ntrain; run; data distrec; merge dr1 drtt; run; title "Distances After &NNN Nearest Neighbors"; proc print data=distrec; run; title; %end; %end; * compute class means if necessary; %if ((&TraitType=Binary) or (&TraitType=Nominal)) and ^%index(&CenTime,Centroids) %then %do; proc sort data=distrec; %if (&DepVar = &TruthVar) %then %do; by &DepVar; %end; %else %do; by &DepVar &TruthVar; %end; where not missing(&DepVar); run; proc means data=distrec &CenMethod noprint; %if (&DepVar = &TruthVar) %then %do; by &DepVar; %end; %else %do; by &DepVar &TruthVar; %end; var dist1-dist&ntest; output out=drm &CenMethod.(%do i=1 %to &ntest; dist&i %end;)=%do i=1 %to &ntest; dist&i %end; ; run; data distrec; set drm; run; %if (&Priors = Proportional) %then %do; data priors; set drm; prior = _freq_; keep &DepVar prior; run; %end; %end; * initialize output data set; data ts2os; set &ts2; keep %if %index(&VarSelect,Genetic) %then %do; &SelVars %end; %else %do; %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; %end; ; run; data ts2o; set &ts2; drop %do i=1 %to &nix; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ; run; data ts2o; merge ts2o ts2os; run; * for continuous traits, compute averages weighted by inverse distances; %if (&TraitType=Continuous) %then %do; * compute inverse distances, which are used as weights below; data distrec; set distrec; array dist{*} dist1-dist&ntest; array idist{*} idist1-idist&ntest; do t=1 to &ntest; if missing(dist[t]) or (dist[t]=0) then idist[t] = .; idist[t] = 1/dist[t]; end; drop t; run; title "Reciprocal Distances"; proc print data=distrec; run; title; %do t=1 %to &ntest; proc means data=distrec &CenMethod noprint; var &DepVar; weight idist&t; output out=wm(keep=pred) &CenMethod.(&DepVar)=pred; run; %if (&t=1) %then %do; data wm_all; set wm; run; %end; %else %do; proc append base=wm_all data=wm; run; %end; %end; data ts2o; merge ts2o wm_all; run; %end; %else %do; * merge prior probabilities; %if (&Priors = Proportional) %then %do; proc stdize data=priors method=sum out=priors; var prior; run; title "Distance Scoring Prior Probabilities"; proc print data=priors; run; title; data distrec; merge distrec priors; by &DepVar; run; %end; * compute posterior probabilities using Gaussian kernel as in PROC DISCRIM; * weight by prior probabilities; %if (&PostFunction = Gaussian) %then %do; data distrec; set distrec; array dist{*} dist1-dist&ntest; array gdist{*} gdist1-gdist&ntest; do t=1 to &ntest; if missing(dist[t]) then dist[t] = 1; %if (&Priors = Equal) %then %do; gdist[t] = exp(-0.5*dist[t]*dist[t]); %end; %else %do; gdist[t] = prior*exp(-0.5*dist[t]*dist[t]); %end; if gdist[t] < 1e-8 then gdist[t]= 1e-8; end; drop t; run; %let dvlist = gdist1-gdist&ntest; %end; %else %do; * compute reciprocal distances; data distrec; set distrec; array dist{*} dist1-dist&ntest; array idist{*} idist1-idist&ntest; do t=1 to &ntest; * if distance is missing, assign prior; if missing(dist[t]) then dist[t] = 1; %if (&Priors = Equal) %then %do; idist[t] = 1/dist[t]; %end; %else %do; idist[t] = prior/dist[t]; %end; end; drop t; run; title "Reciprocal Distances"; proc print data=distrec; run; title; %let dvlist = idist1-idist&ntest; %end; * divide by sum so they sum to one; proc stdize data=distrec method=sum out=pp; var &dvlist; run; %put syserr = &syserr; %if &syserr >= 4 %then %do; %put ERROR: Unable to standardize posterior probabilities. The computed distances may be too large.; %put; %put If so, it should help to standardize or rescale the predictor variables or use a different kernel.; %let exiterror = 1; %goto exit; %end; * make sure values will result in valid sas names; data pp; set pp end=_last; length _depid $ 24; retain maxlendep 0; _depid = trim(left(&DepVar)); lendep = min(length(_depid),24); maxlendep = max(maxlendep,lendep); do i=1 to lendep; _check2 = verify(upcase(substr(_depid,i,1)),"_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"); if _check2 then do; substr(_depid,i,1) = "_"; end; end; if _last then call symputx("maxlendep",trim(left(maxlendep))); run; /* ods exclude none; ods results; title "pp"; proc print data=pp; var _depid lendep _check2; run; */ %put maxlendep = &maxlendep; %let lenleft = %eval(32-&maxlendep-3); %if %length(&cprefix) > &lenleft %then %let cpref = %substr(&cprefix,1,&lenleft); %else %let cpref = &cprefix; * transpose; proc transpose data=pp prefix=p_&cpref._ out=ppt(drop=_name_); var &dvlist; id _depid; run; /* title "Distance Scoring Posterior Probabilities"; proc print data=ppt; run; */ * levels of depvar; data ts2o; set ts2o; %if &_dtype_ = C %then %do; %do i=1 %to &nic1; _lev&i = "&&_lev&i"; %end; %end; %else %do; %do i=1 %to &nic1; _lev&i = &&_lev&i; %end; %end; run; /* proc print data=ts2o; run; */ %put DS nic1 = &nic1; * get names of posterior probabilities; data ppn; set ppt(obs=1); keep p_&cpref._:; run; proc transpose data=ppn out=ppnt; var p_&cpref._:; run; proc sort data=ppnt; by _name_; run; data _null_; set ppnt; call symputx("pp"||trim(left(_n_)),trim(left(_name_))); run; %put cpref = &cpref; %do i=1 %to &nic1; %put pp&i = &&pp&i; %end; * merge and compute predicted classes; data ts2o; merge ts2o ppt; array pp %do i=1 %to &nic1; &&pp&i %end; ; array lev _lev1-_lev&nic1; maxp = 0; maxpi = 0; levi = .; do i=1 to dim(pp); if (pp[i] > maxp) then do; maxp = pp[i]; maxpi = i; end; if (lev[i] = &TruthVar) then levi = i; end; I_&DepVarTrunc30 = lev[maxpi]; if not missing(levi) then actualp = pp[levi]; else actualp = .; drop _lev: i maxp maxpi levi; run; %Rename_P_; %end; %end; %let into = I_&DepVarTrunc30; %let pred = pred; %end; %else %if (&ProcessName = GLMSelect) %then %do; * analysis settings output string; %let ASet = Model Selection Method = &SelectMethod, Stop Criterion = &StopCriterion; %if (&StopCriterion = SL) %then %do; %let ASet = &ASet, SLEntry = &SLEntry, SLStay = &SLStay; %end; %if (&VarSelect = Genetic Algorithm) %then %do; %let ASet = &ASet, &VarSelect; %end; proc glmselect data=&ts1 %if %length(&Seed) %then %do; seed=&Seed %end; &GLMSelectOptions namelen=65 %if ^%symexist(CVing) and &SelectMethod ^= NONE %then %do; plots=all %end; ; %if &nipl or &nip %then %do; class %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; / missing ; %end; model &DepVar = %if %index(&VarSelect,Genetic) %then %do; &SelVars %end; %else %do; %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %if &OrderInt > 1 and ((&i < &nixl) or (&nipl > 0) or (&nix > 0) or (&nip > 0)) %then %do; | %end; %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %if &OrderInt > 1 and ((&i < &nipl) or (&nix > 0) or (&nip>0)) %then %do; | %end; %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %if &OrderInt > 1 and ((&i < &nix) or (&nip > 0)) %then %do; | %end; %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %if &OrderInt > 1 and (&i < &nip) %then %do; | %end; %end; %if &OrderInt > 1 %then %do; @ &OrderInt %end; %end; / selection=&SelectMethod %if &SelectMethod ^= NONE %then %do; ( stop=&StopCriterion maxstep=&MaxStep %if (&SelectMethod^=LAR) and (&SelectMethod^=LASSO) %then %do; select=&StopCriterion %end; %else %do; %if (&StopCriterion=PRESS) %then %do; LSCOEFFS %end; %end; %if (&StopCriterion=SL) %then %do; %if (&SelectMethod=FORWARD) or (&SelectMethod=STEPWISE) %then %do; slentry=&SLEntry %end; %if (&SelectMethod=BACKWARD) or (&SelectMethod=STEPWISE) %then %do; slstay=&SLStay %end; %end; %if (&SelectMethod=FORWARD) or (&SelectMethod=BACKWARD) or (&SelectMethod=STEPWISE) %then %do; include=%eval(&nixl+&nipl) %end; ) %end; %if (&StopCriterion=CV) and ^%index(%upcase(&GLMSelectModelOptions),CVMETHOD) %then %do; cvmethod=&CVMethod ( &CVKFold ) %end; &GLMSelectModelOptions ; %if %length(&WeightVar) %then %do; weight &WeightVar; %end; %else %if (&Priors^=Proportional) %then %do; weight _Prior; %end; /* %if %length(&FreqVar) %then %do; freq &FreqVar; %end; */ %if (&TestSpec = Yes) %then %do; output out=ts1o(drop= %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ) pred; %end; score data=&ts2 out=ts2o(drop= %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ) pred; ods output selectedeffects=se; run; %if &syserr > 4 %then %do; %let exiterror = 1; %goto exit; %end; %if &SelectMethod ^= NONE %then %do; data ts2vars; length name $ 32; set se; i = 1; do while(i); name = scan(effects,i,' '); if lengthn(name)=0 then i = 0; else do; if name ne "Intercept" then output; i = i + 1; end; end; drop i label effects; run; %let havets2v = 1; %UpdateSelVars; * for LAR and LASSO, class vars are split, so adjust names here; %if (&nipl or &nip) and ((&SelectMethod=LAR) or (&SelectMethod=LASSO)) %then %do; %let _dsid_ = %sysfunc(open(&ts2)); %let i=1; %let svnew = ; %do %while(%length(%scan(&SelVars,&i,' '))); %let svi = %scan(&SelVars,&i,' '); %let lensvi = %length(&svi); %if %substr(&svi,&lensvi,1) = . %then %do; %let svi = %substr(&svi,1,%eval(&lensvi-1)); %end; %let vnum = %upcase(%sysfunc(varnum(&_dsid_,&svi))); * delete part of name after last underscore; %if ^&vnum %then %do; %let another=1; %let svij = &svi; %let svi = ; %do %while(&another); %let iu = %index(&svij,_); %if &iu %then %do; %if %length(&svi) %then %let svi = &svi._; %let svi = &svi.%substr(&svij,1,%eval(&iu-1)); %let len = %length(&svij); %if (&len > &iu) %then %do; %let svij = %substr(&svij,%eval(&iu+1),%eval(&len-&iu)); %end; %else %let another = 0; %end; %else %let another = 0; %end; %end; %let svnew = &svnew &svi; %let i=%eval(&i+1); %end; %let _rc_ = %sysfunc(close(&_dsid_)); %let SelVars = &svnew; %put Unsplit SelVars = &SelVars; data ts2vars; length name $ 32; %let i=1; %do %while(%length(%scan(&SelVars,&i,' '))); name = "%scan(&SelVars,&i,' ')"; output; %let i=%eval(&i+1); %end; run; %end; %end; %if (&TestSpec = Yes) %then %do; data ts2o; set ts1o ts2o; run; %end; data ts2o; set ts2o; rename p_&DepVar=pred; run; /* proc print data=ts2o; run; */ * create variables for computing cv ase; %if (&TraitType=Binary) %then %do; data ts2o; set ts2o; if missing(pred) then do; p_&DepVarTrunc._0 = .; p_&DepVarTrunc._1 = .; _INTO_ = .; end; else do; p_&DepVarTrunc._0 = 1 - pred; if p_&DepVarTrunc._0 < 0 then p_&DepVarTrunc._0 = 0; else if p_&DepVarTrunc._0 > 1 then p_&DepVarTrunc._0 = 1; p_&DepVarTrunc._1 = 1 - p_&DepVarTrunc._0; _INTO_ = (p_&DepVarTrunc._1 > 0.5); end; %if &_dtype_ = C %then %do; if &TruthVar = " " then %end; %else %do; if &TruthVar = . then %end; actualp = .; else if (&TruthVar=1) then actualp = p_&DepVarTrunc._1; else actualp = p_&DepVarTrunc._0; run; /* ods exclude none; ods results; title "glmselect binary ts2o"; proc print data=ts2o; run; */ /* %let ncl = 2; %let pprob1 = p_&DepVarTrunc._0; %let pprob2 = p_&DepVarTrunc._1; */ %end; %let into = _INTO_; %let pred = pred; %end; %else %if (&ProcessName = PartialLeastSquares) %then %do; * string for analysis settings; %let ASet = Number of PLS Components = &NumberOfComponents; %if (&VarSelect = Genetic Algorithm) %then %do; %let ASet = &ASet, &VarSelect; %end; %if ^%index(&VarSelect,Genetic) and %eval(&nix+&nip) > &PLSLimit %then %do; %put ERROR: Partial Least Squares cannot handle more than &PLSLimit total predictor columns. Please specify fewer predictors.; %let exiterror = 1; %goto exit; %end; * using missing values for the dependent variable to create predictions; %if %symexist(CVing) or (&TestSpec=Yes) %then %do; data &ts1; set &ts1; _y0 = &DepVar; _y = &DepVar; run; data ts1o; set &ts1; run; data &ts2; set &ts2; _y0 = &DepVar; %if (&_dtype_=C) %then %do; _y = " "; %end; %else %do; _y = .; %end; run; data &ts1; set &ts1 &ts2; run; %let UseDep = _y; %end; %else %do; %let UseDep = &DepVar; %end; %if %sysfunc(exist(xw)) %then %do; proc datasets library=work nolist; delete xw; quit; %end; %if (&YReflection = Yes) %then %do; data &ts1; set &ts1; &DepVarTrunc._R = 1 - &UseDep; run; %end; proc pls data=&ts1 nfac=&NumberOfComponents &ProcPLSOptions nocenter noscale details /* missing=avg missing=em */; model &UseDep %if (&YReflection = Yes) %then %do; &DepVarTrunc._R %end; = %if %index(&VarSelect,Genetic) %then %do; &SelVars %end; %else %do; %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; %end; / solution; ; output out=ts2o(drop= %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ) predicted=pred %if (&YReflection = Yes) %then %do; &DepVarTrunc._RP %end; xscore=pls h=leverage tsquare=tsquare ; ods output XWeights=xw PercentVariation=pctvar ParameterEstimates=solution; %if ^%symexist(CVing) %then %do; ods exclude XLoadings XWeights ParameterEstimates; %end; %else %do; %if (&CV_ListAll = Yes) %then %do; ods exclude XLoadings XWeights ParameterEstimates; %end; %end; run; data ts2o; length _ScoreType $ 3 _Label $ 32; set ts2o; if pred < 0 then pred = 0; else if pred > 1 then pred = 1; /* pred = exp(7.5*(pred-0.5))/(1+exp(7.5*(pred-0.5))); pred = probnorm(4*(pred-0.5)); */ %if (&YReflection = Yes) %then %do; pred1 = pred; if &DepVarTrunc._RP < 0 then &DepVarTrunc._RP = 0; else if &DepVarTrunc._RP > 1 then &DepVarTrunc._RP = 1; /* &DepVarTrunc._RP = exp(7.5*(&DepVarTrunc._RP-0.5))/(1+exp(7.5*(&DepVarTrunc._RP-0.5))); &DepVarTrunc._RP = probnorm(4*(&DepVarTrunc._RP-0.5)); */ pred_tot = pred+&DepVarTrunc._RP; if pred_tot > 0 then pred = pred/pred_tot; else pred = .; /* pred = (pred + 1 - &DepVarTrunc._RP)/2; */ %end; _ScoreType = "Row"; %if %length(&ColorVar) %then %do; _Color = trim(left(&ColorVar)); %end; %else %do; _Color = "Row"; %end; %if %length(&LabelVar) %then %do; _Label = trim(left(&LabelVar)); %end; %else %do; _Label = "Row Score"; %end; label _Color=' ' _Label=' '; run; %if ^%sysfunc(exist(xw)) %then %do; %put WARNING: No PLS components selected.; data ts2vars; length name $ 32; name = "Intercept"; run; %let NumberOfComponents = 1; %end; %else %do; %NObsVars(xw); %let NumberOfComponents = &nobs; proc transpose data=xw out=ts2vars prefix=pls; var %if %index(&VarSelect,Genetic) %then %do; &SelVars %end; %else %do; %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; %end; ; run; * compute variable importance as described in PROC PLS doc; data _null_; set pctvar; call symput("cyv"||trim(left(_n_)),trim(left(CurrentYVariation))); run; data ts2v; length name $ 32; set ts2vars; name = _name_; tsquare = uss(of pls1-pls&NumberOfComponents); _ScoreType = "Col"; drop _name_; run; proc transpose data=pctvar out=pctvary prefix=cyv; var CurrentYVariation; run; proc sql; create table ts2vars as select *, %do i=1 %to &NumberOfComponents; pls&i/sqrt(uss(pls&i)) as plsnorm&i, %end; tsquare/max(tsquare) as tsquare_range from ts2v left join pctvary(drop=_name_ _label_) on 1; %NObsVars(ts2vars); data ts2vars; set ts2vars; VIP = 0; %do i=1 %to &NumberOfComponents; VIP = VIP + cyv&i * (plsnorm&i**2); %end; VIP = sqrt(VIP * &nobs / sum(of cyv:)); run; data solution; set solution; if (RowName = 'Intercept') then delete; drop RowName; run; data ts2vars; merge ts2vars solution; run; proc sort data=ts2vars; by descending vip; run; %if %symexist(CVing) %then %do; data ts2vars; set ts2vars; if VIP > 0.999; keep name; run; %end; %end; %let havets2v = 1; /* data outlib.ts2oplsb; set ts2o; run; */ * create variables for computing cv ase; %if (&TraitType=Binary) %then %do; data ts2o; set ts2o; %if %symexist(CVing) %then %do; %if &_dtype_ = C %then %do; where &UseDep = " "; %end; %else %do; where &UseDep = .; %end; &DepVar = _y0; drop _y0 _y; %end; if missing(pred) then do; p_&DepVarTrunc._0 = .; p_&DepVarTrunc._1 = .; _INTO_ = .; end; else do; /* pred = exp(pred-0.5)/(1+exp(pred-0.5)); */ p_&DepVarTrunc._0 = 1 - pred; if p_&DepVarTrunc._0 < 0 then p_&DepVarTrunc._0 = 0; else if p_&DepVarTrunc._0 > 1 then p_&DepVarTrunc._0 = 1; p_&DepVarTrunc._1 = 1 - p_&DepVarTrunc._0; _INTO_ = (p_&DepVarTrunc._1 > 0.5); end; %if &_dtype_ = C %then %do; if &TruthVar = " " then %end; %else %do; if &TruthVar = . then %end; actualp = .; else if (&TruthVar=1) then actualp = p_&DepVarTrunc._1; else actualp = p_&DepVarTrunc._0; run; %end; %let into = _INTO_; %let pred = pred; %end; %else %if (&ProcessName = RadialBasisMachine) %then %do; * string for analysis settings; %if (&VarSelect = Genetic Algorithm) %then %do; %let ASet = &VarSelect; %end; * using missing values for the dependent variable to create predictions; %if %symexist(CVing) or (&TestSpec=Yes) %then %do; data &ts1; set &ts1; _y0 = &DepVar; _y = &DepVar; run; data ts1o; set &ts1; run; data &ts2; set &ts2; _y0 = &DepVar; %if (&_dtype_=C) %then %do; _y = " "; %end; %else %do; _y = .; %end; run; data &ts1; set &ts1 &ts2; run; %let UseDep = _y; %let kdata= ts1o; %end; %else %do; %let UseDep = &DepVar; %let kdata = &ts1; %end; /* data &ts1; set &ts1; _obs_ = _n_; run; */ proc glimmix data=&ts1 &ProcOption; /* class _obs_; */ model &UseDep.&DepOption = &FixedEffects &ModOption; %if %quote(&SelVars) ne %quote(Intercept) %then %do; random %if %index(&VarSelect,Genetic) %then %do; &SelVars %end; %else %do; %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; %end; / type=rsmooth knotmethod=data(&kdata) /* _obs_ / type=sp(gau)( %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ) */ %if (&TraitType=Nominal) or (&TraitType=Ordinal) %then %do; group=&UseDep %end; ; %end; %if (&Priors^=Proportional) %then %do; weight _Prior; %end; /* random _residual_ / group=&UseDep; */ &ProcGlimmixStatements; output out=ts2o(drop= %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ) pred(blup ilink)=pred; %if (&TraitType=Ordinal) %then %do; ods output responseprofile=rp; %end; run; %put syserr = &syserr; %if &syserr > 4 %then %do; %let exiterror = 1; %goto exit; %end; %NObsVars(ts2o); %if &nobs = 0 %then %do; %put ERROR: PROC GLIMMIX did not converge or produce predictions.; %put If cross-validating, you may have constant data. If not, please ; %put inspect the SAS Output and try specifying convergence options or a different model.; %let exiterror = 1; /* %goto exit; */ %end; %else %do; %put RBM TraitType = &TraitType; %if (&TraitType=Nominal) or (&TraitType=Ordinal) %then %do; %if (&TraitType=Ordinal) %then %do; %NObsVars(rp); %let nlev = &nobs; * load levels into macro variables; data _null_; set rp; call symput('lev_'||trim(left(_n_)),trim(left(&DepVar))); run; %end; data ts2o; set ts2o; keep _Obs _Level_ pred; run; proc transpose data=ts2o out=justpred(drop=_name_ _label_) prefix=p_; by _Obs; var pred; id _level_; run; %if (&TraitType=Nominal) %then %do; proc transpose data=ts2o out=justlev(drop=_name_ _label_) prefix=lev_; by _Obs; var _level_; run; %end; %else %if (&TraitType=Ordinal) %then %do; * for binary and ordinal, convert cumulative probabilities to class probabilities and fill in last level; * transpose twice to get rid of special characters and ensure valid SAS names; proc transpose data=rp out=rpt prefix=lev_; var &DepVar; run; proc transpose data=rpt out=rptt; var lev_:; run; * load sasified levels into macro variables; data _null_; set rp; call symput('levsas_'||trim(left(_n_)),trim(left(&DepVar))); run; data justpred; set justpred; array pp p_:; p_&&levsas_&nlev = 1 - pp[dim(pp)]; do i=dim(pp) to 2 by - 1; pp[i] = pp[i] - pp[i-1]; end; run; %NObsVars(&ts1); data justlev; set rpt; do _Obs = 1 to &nobs; output; end; drop _name_; run; %end; data ts2o; merge &ts1 justpred justlev; by _Obs; run; %end; %put RBM TruthVar = &TruthVar; * create variables for computing cv ase; data ts2o; set ts2o; %if %symexist(CVing) %then %do; %if &_dtype_ = C %then %do; where &UseDep = " "; %end; %else %do; where &UseDep = .; %end; &DepVar = _y0; drop _y0 _y; %end; %if (&TraitType=Binary) %then %do; if missing(pred) then do; p_&DepVarTrunc._0 = .; p_&DepVarTrunc._1 = .; _INTO_ = .; end; else do; p_&DepVarTrunc._0 = 1 - pred; if p_&DepVarTrunc._0 < 0 then p_&DepVarTrunc._0 = 0; else if p_&DepVarTrunc._0 > 1 then p_&DepVarTrunc._0 = 1; p_&DepVarTrunc._1 = 1 - p_&DepVarTrunc._0; _INTO_ = (p_&DepVarTrunc._1 > 0.5); end; %if &_dtype_ = C %then %do; if &TruthVar = " " then %end; %else %do; if &TruthVar = . then %end; actualp = .; else if (&TruthVar=1) then actualp = p_&DepVarTrunc._1; else actualp = p_&DepVarTrunc._0; /* %let ncl = 2; %let pprob1 = p_&DepVarTrunc._0; %let pprob2 = p_&DepVarTrunc._1; */ %end; %else %if (&TraitType=Nominal) or (&TraitType=Ordinal) %then %do; array pp p_:; array lev lev_:; maxp = 0; maxpi = 0; levi = .; do i=1 to dim(pp); if (pp[i] > maxp) then do; maxp = pp[i]; maxpi = i; end; if (lev[i] = &TruthVar) then levi = i; end; _INTO_ = lev[maxpi]; if not missing(levi) then actualp = pp[levi]; else actualp = .; * drop lev_: i maxp maxpi levi; %end; run; %let into = _INTO_; %let pred = pred; %end; %end; %else %if &ProcessName = SurvivalPredictiveModeling %then %do; * string for analysis settings; %let ASet = Variable Section Method = &VarSelect; %if (&VarSelect = Stepwise) %then %do; %let ASet = &Aset, SLEntry = &SLEntry, SLStay = &SLStay, MaxStep = &MaxStep; %end; %else %if (&VarSelect = Forward) %then %do; %let ASet = &Aset, SLEntry = &SLEntry; %end; %else %if (&VarSelect = Backward) %then %do; %let ASet = &Aset, SLStay = &SLStay; %end; %else %if (&VarSelect = Score) %then %do; %let ASet = &Aset, nPredictors = &nScorePredictors; %end; %else %do; * for &VarSelect = None; %if %length(&TestInData) | %symexist(CVing) %then %do; data _Covariates; %if %length(&TestInData) %then %do; set &ts1 &intestdat; %end; %else %do; set &ts1 &ts2; %end; run; %let _CovData = _Covariates; %end; %else %do; %let _CovData = &ts1; %end; proc phreg data=&ts1 %if %length(&PhregOptions) %then &PhregOptions;; %if &nipl | &nip %then %do; class %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %end; ; * specifying all class predictors in class statement to avoid error; %end; %if %length(&WeightVar) %then %do; weight &WeightVar; %end; %if %length(&CensorVar) %then %do; model &DepVar*&CensorVar(&CensorValues) = %end; %else %do; model &DepVar = %end; %* proc phreg cannot accept list style variable expression; %if ^%length(&ListContPredVars) & ^%length(&ListClassPredVars) %then %do; &SelVars %end; %else %do; %do i=1 %to &nixl; %sysfunc(varname(&_dsixl_,&i)) %if &OrderInt > 1 and ((&i < &nixl) or (&nipl > 0) or (&nix > 0) or (&nip > 0)) %then %do; | %end; %end; %do i=1 %to &nipl; %sysfunc(varname(&_dsipl_,&i)) %if &OrderInt > 1 and ((&i < &nipl) or (&nix > 0) or (&nip > 0)) %then %do; | %end; %end; %do i=1 %to &nix; %sysfunc(varname(&_dsix_,&i)) %if &OrderInt > 1 and ((&i < &nix) or (&nip > 0)) %then %do; | %end; %end; %do i=1 %to &nip; %sysfunc(varname(&_dsip_,&i)) %if &OrderInt > 1 and (&i < &nip) %then %do; | %end; %end; %if &OrderInt > 1 %then %do; @ &OrderInt %end; %end; %if %length(PhregModelOptions) %then / &PhregModelOptions ; ; baseline covariates=&_CovData out=SurFunc(keep=_obs _train_ &DepVar &CensorVar &IDVar &ColorVar &WeightVar Survival StdErrSurvival) survival=_all_ / rowid=&IDVar ; ods output parameterestimates=ParEst; run; %end; %nObsVars(SurFunc); %if ^&nobs %then %do; %put ERROR: There is no valid fitting model found.; %put ERROR: This may be caused by not enough valid data in the input data set.; %let exiterror = 1; %goto exit; %end; %if %upcase(&DepVar) = SURVIVAL %then %let SurvivalVar = Survival2; %else %let SurvivalVar = Survival; /* options notes; */ * output selected predictors; %if %quote(&SelVars) ne %quote(Intercept) %then %do; data spmv; set ParEst(keep= Parameter); order = _n_; rename Parameter = Name; run; proc sort data=spmv nodupkey; by Name; run; proc sort data=spmv out=&outdatvars(drop=order); by order; run; data _null_; set &outdatvars end=_e_; vname="_v"||trim(left(_n_)); call symput(vname,name); if _e_ then call symput('_nsv',_n_); run; %let SelVarShow = &_v1; %do i = 2 %to &_nsv; %let SelVarShow = &SelVarShow &&_v&i; %end; %end; %else %do; data &outdatvars; name = "Intercept"; output; run; %let SelVarShow = Intercept; %end; * Extract selected predictors; %let i = 1; %let sVarList = ; %do %while(%length(%qscan(&SelVarShow,&i))); %if %qscan(&SelVarShow,&i) ne %quote(Intercept) %then %do; %let sVarList = &sVarList %qscan(&SelVarShow,&i); %end; %let i = %eval(&i+1); %end; * keep original time var; data _TimeVar; set &_CovData; %if %length(&TestInData) %then %do; &DepVar = &DepVar._true; %end; keep _Obs %if %length(&TestInData) | %symexist(CVing) %then _train_; &DepVar; rename &DepVar = Org_&DepVar; label &DepVar = "Original &DepVar"; run; * merge original &TimeVar back to SurFunc; data surfunc; if 0 then set _TimeVar; if _n_ = 1 then do; declare hash h(dataset: "_TimeVar", hashexp: 6); %if %length(&TestInData) | %symexist(CVing) %then %do; h.defineKey('_Obs','_Train_'); %end; %else %do; h.defineKey('_Obs'); %end; h.defineData("Org_&DepVar"); h.defineDone(); end; set surfunc; if (h.find() = 0) then output; run; proc sort data=SurFunc; by %if %length(&TestInData) | %symexist(CVing) %then descending _train_; _obs; run; * keep a copy of stack data; %if %symexist(CVing) %then %do; data &outdat; * This will be the major output for cvmc. The corresponding AUC and Harrells C will be; * recalculated in the cvmc routines; %end; %else %do; data outlib.Surfunc_Stack; %end; set SurFunc; by %if %length(&TestInData) | %symexist(CVing) %then descending _train_; _obs; retain _TimeOrder_; if first._obs then _TimeOrder_=1; else _TimeOrder_+1; if _train_ then _DataType_ = "Training"; else _DataType_ = "Test"; label _DataType_ = "DataType"; keep _obs _train_ _TimeOrder_ _DataType_ &DepVar Org_&DepVar &CensorVar &IDVar &SurvivalVar StdErrSurvival; run; * Extract values at reference time; %if ^%length(&RefTime) %then %do; proc means data=SurFunc noprint; var &DepVar; output out=mt median=mt; run; data _null_; set mt; call symput("RefTime",trim(left(put(mt,6.1)))); run; %end; data &SPM_OutData3; set SurFunc; by %if %length(&TestInData) | %symexist(CVing) %then descending _train_; _obs; retain _top_; if first._obs then _top_ = 1; *** add character index for numerical color var; %if %length(&ColorVar) %then %do; %if &_cType_ = N %then %do; _ColorVar_ = trim(left(&ColorVar)); %end; %end; if _train_ then _DataType_ = "Training"; else _DataType_ = "Test"; if _top_ and &DepVar >= &RefTime then do; output; _top_ = 0; end; label &DepVar="Reference Survival Time" &SurvivalVar="Reference Survival Estimate" _DataType_ = "DataType"; drop _top_; %if %length(&ColorVar) %then %do; %if &_cType_ = N %then %do; drop &ColorVar; rename _ColorVar_ = &ColorVar; %end; %end; run; data AUC; set surfunc; by %if %length(&TestInData) | %symexist(CVing) %then descending _train_; _obs; retain _AUC_ _MedSurTime_ _mIndex_; _dT_ = &DepVar - lag(&DepVar); _PreSurvival_= lag(&SurvivalVar); _Area_ = _dT_*_PreSurvival_; if first._obs then do; _mIndex_ = 0; _MedSurTime_ = .; _AUC_ = 0; end; else do; if ^_mIndex_ then do; if &SurvivalVar <= 0.5 | last._obs then do; * all survival rate above 0.5 then assign the maximum time to be med-time; _MedSurTime_ = &DepVar; _mIndex_ = 1; end; end; _AUC_ = _AUC_ + _Area_; end; if last._obs; label _MedSurTime_="Median Survival Time" _AUC_="Area Under Survival Curve"; keep %if %length(&TestInData) | %symexist(CVing) %then _train_; _obs _MedSurTime_ _AUC_; run; * data with selected predictors; data SVars; set SurFunc; by %if %length(&TestInData) | %symexist(CVing) %then descending _train_; _obs; if first._Obs; keep %if %length(&TestInData) | %symexist(CVing) %then _train_; _obs; run; data &SPM_OutData3; merge &SPM_OutData3 AUC SVars; by %if %length(&TestInData) | %symexist(CVing) %then descending _train_; _obs; run; * merge back the dependent variable; %if %length(&DepVar) < 31 %then %let MedSurVar = M_&DepVar; %else %let MedSurVar = M_%substr(&DepVar,1,30); %if ^%length(&TestInData) & ^%symexist(CVing) %then %do; proc sort data=&SPM_OutData3; by &IDVar; run; proc sort data=&_CovData(keep=&DepVar &IDVar) out=_tSet_; by &IDVar; run; data &SPM_OutData3; merge _tSet_ &SPM_OutData3(rename=(&DepVar=&MedSurVar)); by &IDVar; if M_&DepVar ne .; run; data _H1_; set &SPM_OutData3(keep=&DepVar &CensorVar _AUC_); run; %end; %else %do; proc sort data=&SPM_OutData3; by descending _train_ &IDVar; run; proc sort data=&_CovData(keep=&TruthVar &IDVar _train_) out=_tSet_(rename=(&TruthVar=&DepVar)); by descending _train_ &IDVar; run; data &SPM_OutData3; merge _tSet_ &SPM_OutData3(rename=(&DepVar=&MedSurVar)); by descending _train_ &IDVar; if M_&DepVar ne .; run; data _H1_ _H2_; set &SPM_OutData3(keep=&DepVar &CensorVar _AUC_ _DataType_); if upcase(_DataType_) = "TRAINING" then output _H1_; else output _H2_; run; %if %symexist(CVing) %then %do; data &outdataspm3; set &SPM_OutData3; run; %end; %end; * toggle censor variable for proc harrell if necessary; %global CVarH; %if %length(&CensorVar) %then %do; %if &cvtype=N and %length(&CensorValues) and %quote(&CensorValues) ne %quote(1) %then %do; data _H1_; set _H1_; if &CensorVar in (&CensorValues) then _Censor1 = 1; else _Censor1 = 0; run; %if %length(&TestInData) | %symexist(CVing) %then %do; data _H2_; set _H2_; if &CensorVar in (&CensorValues) then _Censor1 = 1; else _Censor1 = 0; run; %if %symexist(CVing) %then %let cdat = &outdat; %else %let cdat = outlib.Surfunc_Stack; data &cdat; set &cdat; if &CensorVar in (&CensorValues) then _Censor1 = 1; else _Censor1 = 0; run; %end; %let CVarH = _Censor1; %end; %else %do; %let CVarH = &CensorVar; %end; %end; * Harrell C-statistic using _AUC_ as the predictive value; proc harrell data=_H1_ out=Harrells_C; time &DepVar; %if %length(&CensorVar) %then %do; censor &CVarH; %end; predict _AUC_; run; data _null_; set Harrells_C; call symputx('Harrells_C',Harrell); call symputx("Harrells_C2",trim(left(round(Harrell,0.01))));* for displaying; run; %if %length(&TestInData) | %symexist(CVing) %then %do; data _H22_; set _H2_; if not missing(&DepVar) and not missing(_AUC_); run; %NObsVars(_H22_); %if &nobs %then %do; proc harrell data=_H2_ out=Harrells_C_2; time &DepVar; %if %length(&CensorVar) %then %do; censor &CVarH; %end; predict _AUC_; run; data _null_; set Harrells_C_2; call symput('Harrells_C_2',Harrell); call symput("Harrells_C2_2",trim(left(round(Harrell,0.01))));* for displaying; run; %end; %else %do; %let Harrells_C_2 = .; %let Harrells_C2_2 = .; %end; %end; proc sort data=%if %symexist(CVing) %then &outdat; %else outlib.Surfunc_Stack; out=SurFunc; by %if %length(&TestInData) | %symexist(CVing) %then _DataType_; _TimeOrder_ &DepVar; run; * set output data for estimated survival function; %let nBin = 50; %if %length(&TestInData) | %symexist(CVing) %then %do; data SurFunc_train SurFunc_test; set SurFunc; if _DataType_ = "Training" then output SurFunc_Train; else output SurFunc_Test; run; proc transpose data=SurFunc_train out=&SPM_OutData(drop= _Name_ _Label_ _TimeOrder_); by _TimeOrder_ &DepVar; var &SurvivalVar; id &IDVar; idlabel &IDVar; run; proc transpose data=SurFunc_test out=&SPM_OutData2(drop= _Name_ _Label_ _TimeOrder_); by _TimeOrder_ &DepVar; var &SurvivalVar; id &IDVar; idlabel &IDVar; run; %if %symexist(CVing) %then %do; data &outdataspm1; set &SPM_OutData; run; data &outdataspm2; set &SPM_OutData2; run; %end; data t1_DsToMerge t2_DsToMerge; set &SPM_OutData3; if _DataType_ = "Training" then output t1_DsToMerge; else output t2_DsToMerge; run; %SurFunc_Tall_to_Wide(_inDs=&SPM_OutData,_outDs=Surfunc_wide1,_idVar=&IDVar, _nBin=&nBin,_DsToMerge=t1_DsToMerge, _time_vListPrefix=wide_vList,_time_lListPrefix=wide_lList); %SurFunc_Tall_to_Wide(_inDs=&SPM_OutData2,_outDs=Surfunc_wide2,_idVar=&IDVar, _nBin=&nBin,_DsToMerge=t2_DsToMerge); data &SPM_OutData3; set Surfunc_wide1 Surfunc_wide2; run; %end; %else %do; proc transpose data=SurFunc out=&SPM_OutData(drop= _Name_ _Label_ _TimeOrder_); by _TimeOrder_ &DepVar; var &SurvivalVar; id &IDVar; idlabel &IDVar; run; %SurFunc_Tall_to_Wide(_inDs=&SPM_OutData,_outDs=Surfunc_wide,_idVar=&IDVar, _nBin=&nBin,_DsToMerge=&SPM_OutData3, _time_vListPrefix=wide_vList,_time_lListPrefix=wide_lList); data &SPM_OutData3; set Surfunc_wide; run; %end; %goto PRS; %end; %else %do; %put ERROR: &ProcessName not implemented in PredictiveAnalysis; %let exiterror = 1; %goto exit; %end; ************************************; * final processing for all methods *; ************************************; %put _dtype_ = &_dtype_; %put NumDepAsClass = &NumDepAsClass; %if (&_dtype_=C) or (&NumDepAsClass=Yes) %then %do; * make sure vartype of &into matches that of &DepVar; %let _itype_ = ; %VarAttributes(ts2o,&into,_Type=_itype_); %put _itype_ = &_itype_; %if %length(&_itype_) and (&_itype_ ^= &_dtype_) %then %do; data ts2o; set ts2o; %if (&_dtype_ = N) %then %do; _newi = 0 + &into; %end; %else %do; _newi = trim(left(&into)); %end; drop &into; run; data ts2o; set ts2o; rename _newi = &into; run; %end; %if %length(&ASet) %then %let ASet = &ASet, Priors = &Priors; %else %let ASet = Priors = &Priors; %put into = &into; * if success cutoff is not 0.5, adjust into values; %if (&SuccessCutoff^=0.5) and &nclasses=2 %then %do; * get the two values; data check; set ts2o; keep &TruthVar &DepVar &into p_:; run; proc sort data=check nodupkey; by &TruthVar &DepVar &into; run; %do c=1 %to &nclasses; %if &&class&c = &event %then %let eidx1 = &c; %end; %put eidx1 = &eidx1; %let ivalue0 = &class1; %let ivalue1 = &class2; data _null_; set check; retain got0 got1 0; array pp[*] p_:; if &DepVar = &into then do; if pp[&eidx1] >= 0.5 then do; call symputx('ivalue1',&into); got1 = 1; end; else do; call symputx('ivalue0',&into); got0 = 1; end; end; else do; if not got0 and pp[&eidx1] >= 0.5 then do; call symputx('ivalue0',&DepVar); end; if not got1 and pp[&eidx1] < 0.5 then do; call symputx('ivalue1',&DepVar); end; end; run; %put ivalue0 = &ivalue0; %put ivalue1 = &ivalue1; data ts2o; set ts2o; array pp[*] p_:; if pp[&eidx1] >= &SuccessCutoff then do; %if (&_dtype_ = N) %then %do; &into = 0 + &ivalue1; %end; %else %do; &into = "&ivalue1"; %end; end; else do; %if (&_dtype_ = N) %then %do; &into = 0 + &ivalue0; %end; %else %do; &into = "&ivalue0"; %end; end; run; %end; * construct _INTO_ and Predicted_Class variable; data ts2o; set ts2o; %if &into ^= _INTO_ %then %do; _INTO_ = &into; %end; %if &_dtype_ = C %then %do; Predicted_Class = &into; %end; %else %do; Predicted_Class = 0 + &into; %end; * drop &into; run; * switch back to original dependent variable if necessary; %put DepSwitch = &DepSwitch; %if (&DepSwitch = Yes) %then %do; /* ods exclude none; ods results; title "ts2o before depswitch"; proc print data=ts2o; run; */ data ts2o; set ts2o; rename Predicted_Class = Pred_Class01; drop &into; run; /* data ocp1s; set ocp1; %if &eidx = 2 %then %do; Pred_Class01 = _n_ - 1; %end; %else %do; Pred_Class01 = 2 - _n_; %end; %if &_dtype0_ = N %then %do; Predicted_Class = 0 + &DepVar0; %end; %else %do; Predicted_Class = &DepVar0; %end; &into = Predicted_Class; keep Pred_Class01 Predicted_Class &into; run; */ data dclass; set d; keep &DepVar0 &DepVar; if missing(&DepVar0) then delete; run; proc sort data=dclass nodupkey; by &DepVar0; run; data dclass; set dclass; rename &DepVar=Pred_Class01 &DepVar0=Predicted_Class; &into = &DepVar0; run; /* data outlib.ts2o; set ts2o; run; data outlib.dclass; set dclass; run; */ /* title "dclass"; proc print data=dclass; run; */ proc sql; create table ts2o1 as select * from ts2o as l left join dclass as r on l.Pred_Class01 = r.Pred_Class01; quit; data ts2o; set ts2o1; label Pred_Class01=' ' Predicted_Class=' '; run; /* data outlib.ts2o; set ts2o; run; */ %let DepVarOld = &DepVar; %let _dtypeold_ = &_dtype_; %let DepVar = &DepVar0; %let _dtype_ = &_dtype0_; %put DepVar=&DepVar DepVarOld=&DepVarOld _dtype_=&_dtype_ _dtypeold_=&_dtypeold_; %let TruthVarOld = &TruthVar; %let TruthVar = &TruthVar0; %put TruthVar=&TruthVar TruthVarOld=&TruthVarOld; %let switchp = 0; data _null_; set dclass; if Pred_Class01 = 1 then call symput('switchp',1); stop; run; %if (&switchp) %then %do; data ts2o; set ts2o; rename p_&DepVarTrunc._0=_temp0 p_&DepVarTrunc._1=_temp1; run; data ts2o; set ts2o; rename _temp0=p_&DepVarTrunc._1 _temp1=p_&DepVarTrunc._0; run; %end; %Rename_P_; %end; * if prediction is missing, impute the baseline prediction to avoid missing rmse; %let missingp = 0; data ts2o; set ts2o; if missing(Predicted_Class) then do; _missp_ = 1; call symput("missingp","1"); end; else do; _missp_ = 0; end; run; %if (&missingp) %then %do; %put; %put Imputing baseline predictions for missing predicted values.; %put; /* data outlib.ts2obi; set ts2o; run; */ * compute baseline prediction value; data one; %if (&_dtype_=C) %then %do; &DepVar = " "; Predicted_Value = " "; %end; %else %do; &DepVar = .; Predicted_Value = .; %end; output; run; %do c=1 %to &nclasses; %global actualp&c; %end; %basescor( train_dsname=&ts1, test_dsname=one, trait=&DepVar, cv_custompriors=&custompriors ); data ts2o1 ts2o2; set ts2o; _sortindex_ = _n_; if _missp_ then output ts2o2; else output ts2o1; run; proc sort data=ts2o2; by &DepVar; run; data ts2o3; run; /* ods exclude none; ods results; title "ts2o2"; proc print data=ts2o2; run; title "actualp"; proc print data=actualp; run; */ %put BPV = &BPV; * merge with actualp created in macro basescor; proc sql; create table ts2o3 as select * from ts2o2(drop=actualp) as l, actualp as r where l.&DepVar=r.&DepVar; quit; %do c=1 %to &nclasses; %put actualp&c = &&actualp&c; %end; data ts2o3; set ts2o3; array pp{*} p_:; %do c=1 %to &nclasses; pp[&c] = &&actualp&c; %end; &pred = pp[2]; %if &_dtype_ = C %then %do; Predicted_Class = trim(left("&BPV")); %end; %else %do; Predicted_Class = &BPV; %end; run; /* data outlib.actualp; set actualp; run; */ data outlib.ts2o3; set ts2o3; run; /* data ts2o2; merge ts2o2 actualp; by &DepVar; %if &_dtype_ = C %then %do; Predicted_Class = trim(left("&BPV")); %end; %else %do; Predicted_Class = &BPV; %end; run; */ /* proc print data=ts2o3; run; */ data ts2o; set ts2o1 ts2o3; run; proc sort data=ts2o; by _sortindex_; run; data ts2o; set ts2o; drop _sortindex_; run; %end; %if %length(%trim(&CustomCosts)) %then %do; %ApplyCosts(data=ts2o,costfunction=&CustomCosts,intovar=Predicted_Class); %let ASet = &ASet, Custom Costs = &CustomCosts; %end; * construct Correct and Correct_Pred variables; data ts2o; set ts2o; length Correct 8 Correct_Pred $ 3; %if &_dtype_ = C %then %do; if &TruthVar = " " or Predicted_Class = " " then do; %end; %else %do; if &TruthVar = . or Predicted_Class = . then do; %end; Correct = .; Correct_Pred = " "; actualp = .; end; else if &TruthVar = Predicted_Class then do; Correct = 1; Correct_Pred = "Yes"; end; else do; Correct = 0; Correct_Pred = "No"; end; %if (&ProcessName = DiscriminantAnalysis) or (&ProcessName = KNearestNeighbors) %then %do; %do c=1 %to &ncan; if Can&c = . then Can&c = 0; %end; %end; %if ^%symexist(CVing) %then %do; rename actualp = Prob_Actual; %end; run; /* data outlib.ts2oai; set ts2o; run; */ %end; %else %do; data ts2o; set ts2o; Predicted_Value = &pred; run; * if not in CVMC, compute baseline prediction value; %if ^%symexist(CVing) or %symexist(TSMC) or %symexist(LCMC) %then %do; %let missingp = 0; data _null_; set ts2o; if missing(Predicted_Value) then do; call symputx("missingp","1"); stop; end; run; %put missingp = &missingp; %if (&missingp) %then %do; data one; &DepVar = .; Predicted_Value = .; output; run; %basescorint(train_dsname=&ts1, test_dsname=one, trait=&DepVar); %end; %end; %put BPV = &BPV; data ts2o; set ts2o; * if prediction is missing, use the baseline prediction to avoid missing rmse; if missing(Predicted_Value) then Predicted_Value = &BPV; Residual = &TruthVar - Predicted_Value; AbsResidual = abs(Residual); * drop &pred; run; %end; /* ods exclude none; ods results; title "&ProcessName"; proc print data=ts2o; run; ods exclude all; ods noresults; */ /* for discrim or knn with nk > 1, check if the first canonical score means are positively correlated with the first canonical score means from k=1. if not, multiply can1 by -1. this is so colored points will cluster appropriately in the plot of the canonical scores. */ /* need to move this block to cross_validate, disable for now */ /* %if ((&ProcessName = DiscriminantAnalysis) or (&ProcessName = KNearestNeighbors)) and (&nk > 1) %then %do; data ts2oc1; set ts2o; keep &DepVar can1; run; proc sort data=ts2oc1; by &DepVar; run; proc means data=ts2oc1 noprint; by &DepVar; var can1; output out=c1m mean=can1mean; run; %let negcorr = 0; %if (&k=1) %then %do; data c1m1; set c1m; rename can1mean=can1mean1; run; %end; %else %do; data c1m2; merge c1m1 c1m; by &DepVar; run; proc corr data=c1m2 out=corrm1 noprint; var can1mean1 can1mean; run; proc print data=corrm1; run; data _null_; set corrm1; if _type_ = "CORR" and _name_ = "can1mean"; if can1mean1 < 0 then call symput("negcorr","1"); run; %end; %if (&negcorr=1) %then %do; %put Reversing the sign of the first canonical score for iteration &k of &nk..; data ts2o; set ts2o; can1 = -can1; run; %end; %end; */ * if not cross-validating, add selected variables to output data set; * assuming sort order of &ts1 and &ts2 has not changed; %if ^%symexist(CVing) %then %do; ods exclude none; ods results; /* title "&ts2"; proc print data=&ts2; run; */ %put TestSpec = &TestSpec; %put ts1 = &ts1; %put ts2 = &ts2; * capture where statements; %let wherestmt = ; %if (&ProcessName = RadialBasisMachine) %then %do; %let stmts = &ProcGlimmixStatements; %end; %else %do; %let stmts = ; %end; %let ls = %length(&stmts); %if &ls %then %do; %let i1 = %index(&stmts,where); %if &i1 %then %do; %let s1 = %substr(&stmts,&i1,%eval(&ls-&i1+1)); %let i2 = %index(&s1,%str(;)); %let wherestmt = %substr(&s1,1,&i2); %put wherestmt = &wherestmt; %end; %end; data sv; set &ts1 %if (&TestSpec = Yes) %then %do; &ts2 %end; ; %if %length(&wherestmt) %then %do; &wherestmt; %end; keep %let i=1; %do %while(%length(%scan(&SelVars,&i,'*'))); %scan(&SelVars,&i,'*') %let i=%eval(&i+1); %end; ; run; data ts2sum; merge ts2o sv; if missing(_obs) then delete; run; %end; %else %do; data ts2sum; set ts2o; run; %end; * data set of selected variable names; %if ^%length(%trim(&SelVars)) %then %let SelVars = None; %put havets2v = &havets2v; %if ^&havets2v %then %do; * expand SelVars if it contains list-style specs; %if %index(&SelVars,:) or %index(&SelVars,-) %then %do; data ts2vars; length name $ 32; %do i=1 %to &nixl; name = "%sysfunc(varname(&_dsixl_,&i))"; output; %end; %do i=1 %to &nipl; name = "%sysfunc(varname(&_dsipl_,&i))"; output; %end; %do i=1 %to &nix; name = "%sysfunc(varname(&_dsix_,&i))"; output; %end; %do i=1 %to &nip; name = "%sysfunc(varname(&_dsip_,&i))"; output; %end; run; %end; %else %do; data ts2vars; length name $ 32; %let i=1; %do %while(%length(%scan(&SelVars,&i,' '))); name = "%scan(&SelVars,&i,' ')"; output; %let i=%eval(&i+1); %end; run; %end; /* proc print data=ts2vars; run; */ %end; %end; %if (&SomeSel = 0) %then %do; %put WARNING: No variables selected. An intercept-only model is assumed. You may want to try relaxing; %put the selection criteria or using a different model.; %let SelVars = None; %end; * build list of first 10 variables to show in the output; %let onlyint = 0; %if ^%index(&SelVars,*) %then %do; %if (%quote(&SelVars) = %quote(None)) or (%quote(&SelVars) = %quote(Intercept)) %then %do; %let onlyint = 1; %end; %end; %if (&onlyint) %then %do; %let SelVarShow = Intercept; %end; %else %do; %if (&ProcessName = LogisticRegression) and (&VarSelect = Penalized) %then %do; data pfit; set &pfit; keep X1 Predictor; rename X1=NAME; label X1="NAME"; if X1="Full Model" then delete; run; proc sort data=pfit; by NAME; run; proc sort data=ts2vars; by NAME; run; data ts2vars; merge pfit ts2vars; by NAME; run; proc sort data=ts2vars; by descending IMPORTANCE; run; %end; %if (&FR_Inds = Yes) %then %do; data ts2vars; set ts2vars; X1 = NAME; run; proc sort data=ts2vars; by X1; run; proc sort data=friidef; by X1; run; data ts2vars; merge ts2vars friidef; by X1; drop X1; if name ="" then delete; run; %let _varsid_ = %sysfunc(open(ts2vars)); %let idefnum = %sysfunc(varnum(&_varsid_, INDICATOR_DEFINITION)); %let _varsid_ = %sysfunc(close(&_varsid_)); %if &idefnum %then %do; proc freq data=ts2vars noprint; tables INDICATOR_DEFINITION/out=idfrq; run; %NObsVars(idfrq); %if &nObs = 1 %then %do; data _null_; set ts2vars; if INDICATOR_DEFINITION="" then do; call symput("dropid", 1); end; run; %if &dropid=1 %then %do; data ts2vars; set ts2vars; drop INDICATOR_DEFINITION; run; %let idefnum = 0; %end; %end; %end; %let selvarname=NAME; %end; %else %do; %let selvarname=name; %end; %put selvarname = &selvarname; %NObsVars(ts2vars); %if (&nobs > 7) %then %do; %let nlv = 7; data ts2vars7; set ts2vars; if _n_ > 7 then stop; run; %let ts2vds = ts2vars7; %let ending = plus %eval(&nobs - &nlv) not shown, see &&&_OutDataVarsName...sas7bdat; %end; %else %do; %let nlv = &nobs; %let ts2vds = ts2vars; %let ending = ; %if (&FR_Inds = Yes) %then %do; %if &idefnum > 0 %then %let ending =, see &&&_OutDataVarsName...sas7bdat for _I Definition; %end; %end; data _null_; set &ts2vds; call symput('sv'||trim(left(_n_)),&selvarname); run; %let SelVarShow = ; %do i=1 %to &nlv; /* %put sv=&&sv&i; */ %let SelVarShow = &SelVarShow &&sv&i; %end; %let SelVarShow = &SelVarShow &ending; %end; %put Selected Variables = &SelVarShow; title2 "Selected Variables = &SelVarShow"; /* %if (&_dtype_=C) or (&NumDepAsClass=Yes) %then %do; proc freq data=ts2sum; tables &DepVar * Predicted_Class; run; %end; */ %put creating outdat=&outdat; %put creating outdatvars=&outdatvars; data &outdat; set ts2sum; run; data &outdatvars; set ts2vars; run; %global TrainSC TestSC; * if not cross-validating, create JSL for output display; %if %symexist(CVing) %then %do; title; title "Cross Validation Model Comparison"; options nonotes; %end; %else %do; * check for missing dependent variable values and create indicator; %if (&TestSpec = Yes) %then %do; %put Test set is assumed to have missing values for &DepVar in outdat=&outdat; data &outdat; set &outdat; length DataType $ 8; if missing(&DepVar) then DataType = "Test"; else DataType = "Training"; run; %end; * compute RMSE and AUC for training and test data; %global rmse mae mann; %let rmse = ; %let mann = ; data training; set &outdat; %if (&TestSpec = Yes) %then %do; where DataType = "Training"; %end; run; %if (&TestSpec = Yes) %then %do; data test; set &outdat; where DataType = "Test"; run; %end; %if %length(&&&_outdataName) > 29 %then %let _psoutname = %substr(&&&_outdataName,1,29); %else %let _psoutname = &&&_outdataName; %if (&_dtype_=C) or (&NumDepAsClass=Yes) %then %do; %NObsVars(training); %if ^&nobs and ^%symexist(CVing) %then %do; %put ERROR: Insufficient training data to continue calculations. You may have too few observations or too many missing predictor values.; %let exiterror = 1; %goto exit; %end; * RMSE and MAE; %compute_ase_nominal( data=training, output=train_ase, actual=Prob_Actual, nclasses=&nclasses, mdlname=training, prevalences=&Priors, cv_custompriors=&CustomPriors, trait=&TruthVar ); data _null_; set train_ase; rmse = sqrt(ase); call symput('rmse',put(rmse,6.4)); call symput('mae',put(mae,6.4)); run; %let TrainSC = Root Mean Square Error = &rmse, Mean Absolute Error = &mae; * AUC; %if &nclasses=2 %then %do; %put event=&event TruthVar=&TruthVar etype=&_dtype_; data training; set training; if missing(&TruthVar) then delete; run; %locate_pevent_isevent( data=training, trait=&TruthVar, event=&event, etype=&_dtype_, actual=Prob_Actual ); /* ods exclude none; ods results; title "training after locate_pevent"; proc print data=training; run; */ /* data outlib.training_afterlp; set training; run; */ %compute_mann( data=training, pevent=_pevent, isevent=_isevent ); %global _outroc; %let _outroc = ; %PrepOutDS(_outroc,_outrocname,&&&_outdataname,_roc); %compute_roc( data=training, pevent=_pevent, isevent=_isevent, outdata=&_outroc; ); data _null_; mann = &mann; call symput('mann',put(mann,6.4)); run; %put train mann = &mann; %let TrainSC = &TrainSC, Area Under ROC Curve = &mann; %end; %put Train Accuracy Trait = &DepVar; * Accuracy; %compute_acc_nominal( data=training, output=train_acc, trait=&DepVar, correct=Correct, mdlname=training, prevalences=&Priors, cv_custompriors=&CustomPriors ); proc transpose data=train_acc out=acct; var Accuracy_:; run; %NObsVars(acct); %let nlev = &nobs; data acct; set acct; call symput('acc_name'||trim(left(_n_)),trim(left(_name_))); run; data _null_; set train_acc; call symput('acc',put(accuracy,6.4)); %do i=1 %to &nlev; call symput("acc&i",put(&&acc_name&i,6.4)); %end; run; %let TrainSC = &TrainSC, Accuracy = &acc; %if &nlev=2 and &acc_name1=Accuracy_0 and &acc_name2=Accuracy_1 %then %do; %let acc_name1 = Specificity; %let acc_name2 = Sensitivity; %end; %do i=1 %to &nlev; %let TrainSC = &TrainSC, &&acc_name&i = &&acc&i; %end; /* save RMSE, ACC, AUC to a table */ data tmp_training; length MDLSC $1000; _Data_ = "training"; _Model_ = "&&&_outdataName"; Accuracy = &acc; %if &acc_name1=Specificity %then %do; length Specificity 6 Sensitivity 6; Specificity = &acc1; Sensitivity = &acc2; %end; %if &nclasses=2 %then %do; AUC = &mann; %end; RMSE = &rmse; MDLSC = "&TrainSC"; run; data OutLib.&_psoutname._ps; set tmp_training; run; %if (&TestSpec = Yes) %then %do; %compute_ase_nominal( data=test, output=test_ase, actual=Prob_Actual, nclasses=&nclasses, mdlname=test, prevalences=&Priors, cv_custompriors=&CustomPriors, trait=&TruthVar ); data _null_; set test_ase; rmse = sqrt(ase); call symput('rmse',put(rmse,6.4)); call symput('mae',put(mae,6.4)); run; %let TestSC = Root Mean Square Error = &rmse, Mean Absolute Error = &mae; %if &nclasses=2 %then %do; %locate_pevent_isevent( data=test, trait=&TruthVar, event=&event, etype=&_dtype_, actual=Prob_Actual ); %compute_mann( data=test, pevent=_pevent, isevent=_isevent ); %compute_roc( data=test, pevent=_pevent, isevent=_isevent, outdata=tmp; ); data &_outroc; set &_outroc; DataType = "Training"; run; data tmp; set tmp; DataType = "Test"; run; data &_outroc; set &_outroc tmp; run; data _null_; mann = &mann; call symput('mann',put(mann,6.4)); run; %put test mann = &mann; %let TestSC = &TestSC, Area Under ROC Curve = &mann; %end; %put Test Accuracy Trait = &TruthVar; * Accuracy; %compute_acc_nominal( data=test, output=test_acc, trait=&TruthVar, correct=Correct, mdlname=test, prevalences=&Priors, cv_custompriors=&CustomPriors ); proc transpose data=test_acc out=acct; var Accuracy_:; run; %NObsVars(acct); %let nlev = &nobs; data acct; set acct; call symput('acc_name'||trim(left(_n_)),trim(left(_name_))); run; data _null_; set test_acc; call symput('acc',put(accuracy,6.4)); %do i=1 %to &nlev; call symput("acc&i",put(&&acc_name&i,6.4)); %end; run; %let TestSC = &TestSC, Accuracy = &acc; %do i=1 %to &nlev; %if &&acc_name&i = Accuracy_0 %then %let acc_name&i = Specificity; %else %if &&acc_name&i = Accuracy_1 %then %let acc_name&i = Sensitivity; %let TestSC = &TestSC, &&acc_name&i = &&acc&i; %end; /* save RMSE, ACC, AUC to a table */ %if %length(&InData) > 28 %then %let inname = %substr(&InData,1,28); %else %let inname = &InData; data tmp_test; length MDLSC $1000; _Data_ = "test"; _Model_ = "&&&_outdataName"; Accuracy = &acc; length Specificity 6 Sensitivity 6; %do i=1 %to &nlev; if &&acc_name&i = Specificity then Specificity = &&acc&i; else if &&acc_name&i = Sensitivity then Sensitivity = &&acc&i; %end; %if &nclasses=2 %then %do; AUC = &mann; %end; RMSE = &rmse; MDLSC = "&TestSC"; run; data OutLib.&_psoutname._ps; set OutLib.&_psoutname._ps tmp_test; run; %end; %end; %else %do; data train1; set training; if missing(&TruthVar) then delete; run; * Harrell C-statistic; proc harrell data=train1 out=train1_hc; time &TruthVar; predict Predicted_Value; run; data _null_; set train1_hc; call symputx('Harrell_C',Harrell); run; %compute_ase_interval( data=train1, output=train_ase, y=&TruthVar, predict=Predicted_Value, mdlname=training ); data _null_; set train_ase; rmse = sqrt(ase); call symput('rmse',put(rmse,6.4)); call symput('mae',put(mae,6.4)); run; %let TrainSC = Root Mean Square Error = &rmse, Mean Absolute Error = &mae, Harrell C-Statistic = &Harrell_C; data train_ps; length MDLSC $1000; _Data_ = "training"; _Model_ = "&_OutDataName"; RMSE = &rmse; MAE = &mae; Harrell_C = &Harrell_C; MDLSC = "&trainSC"; run; data outlib.&_psoutname._ps; set train_ps; run; %if (&TestSpec = Yes) %then %do; data test2; set test; if not missing(&TruthVar) and not missing(Predicted_Value); run; %NObsVars(test2); %if &nobs %then %do; * Harrell C-statistic; proc harrell data=test out=test_hc; time &TruthVar; predict Predicted_Value; run; data _null_; set test_hc; call symputx('Harrell_C',Harrell); run; %compute_ase_interval( data=test, output=test_ase, y=&TruthVar, predict=Predicted_Value, mdlname=test ); data _null_; set test_ase; rmse = sqrt(ase); call symput('rmse',put(rmse,6.4)); call symput('mae',put(mae,6.4)); run; %end; %else %do; %let Harrell_C = .; %let rmse = .; %let mae = .; %end; %let TestSC = Root Mean Square Error = &rmse, Mean Absolute Error = &mae, Harrell C-Statistic = &Harrell_C; data test_ps; length MDLSC $1000; _Data_ = "test"; _Model_ = "&_OutDataName"; RMSE = &rmse; MAE = &mae; Harrell_C = &Harrell_C; MDLSC = "&TestSC"; run; data outlib.&_psoutname._ps; set outlib.&_psoutname._ps test_ps; run; %end; %end; %PRS:; * create predictor reduction settings string; %let PRS = ; %if %length(&PredWhere) %then %do; %let PRS = &PRS Continuous Predictor Include Filter = &PredWhere,; %end; %if %length(&PredClassWhere) %then %do; %let PRS = &PRS Class Predictor Include Filter = &PredClassWhere,; %end; * if PRS contains double quotes, add \! in front of them for jsl.; %let len = %length(%bquote(&PRS)); %if &len %then %do; %put Current PRS = &PRS; %let i = 1; %do %while(&i <= &len); %let chari = %qsubstr(%nrbquote(&PRS),&i,1); /* %put chari = &chari;*/ %if %str(%") = %bquote(&chari) %then %do; %let part1 = %qsubstr(%nrbquote(&PRS),1,%eval(&i-1)); %let part2 = %qsubstr(%nrbquote(&PRS),&i,%eval(&len-&i+1)); %if %length(%bquote(&part1)) = %eval(&i-1) %then %do; %let PRS = &part1.\!&part2; %end; %else %do; %let PRS = &part1. \!&part2; %end; /* %put New PRS = &PRS; */ %let i=%eval(&i+2); %let len = %eval(&len+2); %end; %let i=%eval(&i+1); %end; %put New PRS = &PRS; %end; %if %length(&StandardizationMethod) %then %do; %let PRS = &PRS Predictor Standardization = &StandardizationMethod,; %end; %if &KMeans=Yes %then %do; %let PRS = &PRS K-Means = &KM_Max,; %end; %if &StatTest=Yes %then %do; %if (&nix) %then %do; %let STMeth = &ST_Method; %if (&nip) %then %let STMeth = &STMeth and Fisher Exact Test; %end; %else %do; %let STMeth = Fisher Exact Test; %end; %let MTMeth = &MultipleTestingMethod; %if ^%length(&MTMeth) %then %let MTMeth = None; %let PRS = &PRS Stat Filter = &STMeth, Multiple Testing Method = &MTMeth,; %if &NegLog10pCutoff > 0 %then %do; %let PRS = &PRS -log10(p-value) Cutoff = &NegLog10pCutoff,; %end; %if &AbsMeanDiffCutoff > 0 %then %do; %let PRS = &PRS Mean Difference Cutoff = &AbsMeanDiffCutoff,; %end; %if &AbsPropDiffCutoff > 0 %then %do; %let PRS = &PRS Proportion Difference Cutoff = &AbsPropDiffCutoff,; %end; %end; %if (&ProcessName = LogisticRegression) %then %do; %if (&VarSelect = Penalized) %then %do; %if &FR_ImpVars = Yes %then %do; %let PRS = &PRS Best &FR_NVars from Forest,; %end; %end; %end; %if ^%length(&PRS) %then %do; %let PRS = None; %end; %else %do; %let len = %eval(%length(&PRS)-1); %let PRS = %substr(%nrbquote(&PRS),1,&len); %end; %if &ProcessName = SurvivalPredictiveModeling %then %goto SurvivalJSL; * Save DepVar name to OrgDepVar; %let traitVar=&DepVar; * convert names to labels; %CheckLabel(&outdat,DepVar); %let traitLabel=&DepVar; %if %length(&LabelVar) %then %do; %CheckLabel(&outdat,LabelVar); %end; %CheckLabel(OutLib.&&&_OutDataName,TruthVar); %if %symexist(ColorVar) %then %do; %if %length(&ColorVar) %then %do; %CheckLabel(OutLib.&&&_OutDataName,ColorVar); %end; %end; %if %length(&LabelVar) %then %do; %CheckLabel(OutLib.&&&_OutDataName,LabelVar); %end; %global _mainjslfile_; %let _mainjslfile_ = &&&_JSLFile; * output JSL code; data _null_; file "&&&_JSLFile"; put "//!"; put 'Names Default to Here(1);'; put "Here:DataList = {};"; put "Here:TabButtonList = {};"; put "Here:TabCodeList = {};"; put "Here:TabAfterCodeList = {};"; put "Here:TabOpenList = {};"; %if (&ProcessName = PartitionTrees) and %upcase(&TreeMethod)=TREE and ^%symexist(CVing) %then %do; put "Here:PRS = ""&PRS"";"; put "Here:ASet = ""&ASet"";"; put "Here:_OutDataName = ""&&&_OutDataName"";"; put "Here:_OutDataVarsName = ""&&&_OutDataVarsName"";"; %let _OutPSName = &_psoutname._ps; put "Here:_OutPSName = ""&_OutPSName"";"; %if &trait_type = qualitative %then %do; %if &nclasses = 2 %then %do; put "Here:_outrocname = ""&_outrocname"";"; %end; %else %do; put "Here:_outrocname = "" "";"; %end; %if &_dtype_ = C %then %do; put "Here:event = ""&event"";"; %end; %else %do; put "Here:event = &event;"; %end; put "Here:nclasses = &nclasses;"; put "Here:prevalences = ""&Priors"";"; %end; %else %do; put "Here:_outrocname = "" "";"; put "Here:event = .;"; put "Here:nclasses = .;"; put "Here:prevalences = "" "";"; %end; put "Here:TrainPath=""&TrainPath"";"; put "Here:TestPath=""&TestPath"";"; put "Here:ValidPath=""&ValidPath"";"; put "Here:train_dsname=""&InData"";"; put "Here:valid_dsname=""&ValidInData"";"; put "Here:test_dsname=""&TestInData"";"; put "Here:outputpath=""&OutPath"";"; put "Here:where_clause="" "";"; put "Here:inmodel_dsname=""&inmodel_dsname"";"; put "Here:trait=""&&traitVar"";"; put "Here:traitlabel=""&&traitLabel"";"; put "Here:trait_type=""&trait_type"";"; put "Here:subtree="" "";"; put "Here:assess="" "";"; put "Here:model_output=""&model_output"";"; put "Here:nodestats_output=""&nodestats_output"";"; put "Here:importance_output=""&importance_output"";"; put "Here:similarity_output="" "";"; put "Here:rules_output=""&rules_output"";"; put "Here:cmprules_output=""&cmprules_output"";"; put "Here:summary_output="" "";"; put "Here:sequence_output=""&sequence_output"";"; put "Here:score_train_output=""&score_train_output"";"; put "Here:score_valid_output=""&score_valid_output"";"; put "Here:score_test_output=""&score_test_output"";"; put "Here:score_train_outfit="" "";"; put "Here:score_valid_outfit="" "";"; put "Here:score_test_outfit="" "";"; put "Here:priors=""&priors"";"; %if &missing= DISTRIBUTE %then %do; put "Here:missing=""DISTRIBUTE"";"; %end; %else %do; put "Here:missing="" "";"; %end; %if &nleaves > 1 %then %do; put "Here:valid_assess_column = &valid_assess_column;"; %end; * UPDATE MODEL CRITERIA IN MODEL RESULTS TAB; put "Here:GetModelCriteria = Expr("; put " If(IsEmpty(Here:ps_dt),"; put " Here:ps_dt=open(""&ClientOutPath.&_OutPSName..sas7bdat"",invisible);"; put " CurrentDataTable(ps_dt);"; put " summarize(Here:mdlsc_list=by(MDLSC));"; put " Here:nsc = nitems(mdlsc_list);"; put " );"; put ");"; /* put " if( !IsEmpty(tb5b), tb5b << Set Text(mdlsc_list[1]));"; put " if( nsc > 1,"; put " if( !IsEmpty(tb4b), tb4b << Set Text(mdlsc_list[2]));"; put " );"; */ * OPEN VARIABLE IMPORTANCE TABLE; put "Here:OpenImpDsExpr = Expr("; put " if(IsEmpty(Here:varimp_dt),"; put " Here:varimp_dt=open(""&ClientOutPath.&importance_output..sas7bdat"",invisible);"; put " );"; put ' Here:where_imp_expr = ":RELATIVE IMPORTANCE > 0";'; put ' if( score_valid_output != "",'; put ' where_imp_expr = where_imp_expr || " & :VALIDATION RELATIVE IMPORTANCE > 0";'; put " );"; put " CurrentDataTable(varimp_dt);"; put " if( IsEmpty(subset_imp_dt) != 1, "; put " Close(subset_imp_dt, NoSave);"; put " );"; put ' Here:subset_imp_expr = "subrows = varimp_dt< 1 %then %do; put " summarize(Here:vars_list=by(NAME));"; put " nvars=nitems(vars_list);"; put " Here:remains = 0;"; put " if( nvars > 10,"; put " Here:remains = nvars - 10;"; put " endlist = 10;"; put " ,"; put " endlist = nvars;"; put " );"; put " for(v=1, v<=endlist, v++,"; put " Here:var=vars_list[v];"; put " if( v==1,"; put " Here:selvars = var;"; put " ,"; put ' selvars = selvars||" "||var;'; put " );"; put " );"; put " if( remains > 0,"; put ' selvars = selvars ||" plus "||char(eval(remains))||" not shown, see Variable Importance Table";'; put " );"; %end; %else %do; put ' selvars = "None";'; %end; /* put " if( !IsEmpty(tb3b), tb3b << Set Text(selvars));"; */ put ");"; %end; * main results; put "Here:vb=Function({},{default local},"; %if (&ProcessName = PartitionTrees) and %upcase(&TreeMethod)=TREE and ^%symexist(CVing) %then %do; put "GetModelCriteria;"; put "GetFinalSelectedVars;"; %end; put " If(IsEmpty(Here:pred_data),"; put " Here:pred_data=open(""&ClientOutPath.&&&_OutDataName...sas7bdat"",invisible);"; %if %length(&LabelVar) %then %do; put " %str(column(%"&LabelVar%") << label(1);)"; %end; put " Here:pred_data << Select Where(1);"; put " Here:pred_data << Markers(8);"; put " Here:pred_data << Clear Select;"; %if (&_dtype_=C) or (&NumDepAsClass=Yes) %then %do; put " Here:pred_data<= 2) %then %do; put "Here:cs=Function({},{default local},"; put " If(IsEmpty(Here:pred_data),"; put " Here:pred_data=open(""&ClientOutPath.&&&_OutDataName...sas7bdat"",invisible);"; put " );"; put " CurrentDataTable(Here:pred_data);"; put ' HListBox('; %if (&ncan=2) %then %do; put ' VListBox(Bivariate(Y( :Can2), X( :Can1), /* Density Ellipse(0.95, {Line Color("Red")}), */ '; %if (&TestSpec = Yes) %then %do; put " By(:DataType),"; %end; put ' SendToReport(Dispatch({}, "Bivar Plot", FrameBox, {Marker Size(4), '; put ' Marker Drawing Mode(Outlined)}))))'; %end; %else %do; put " VListBox(Multivariate(Y( "; %do c = 1 %to %eval(&ncan-1); put " :Can&c , " ; %end; put " :Can&ncan),"; %if (&TestSpec = Yes) %then %do; put " By(:DataType),"; %end; put " Scatterplot Matrix(Density Ellipses(0), Show Correlations(0), Horizontal(1))"; put " ))"; %end; put " )"; put ");"; put " "; put "InsertInto(DataList,{{""Here:pred_data""}});"; put "InsertInto(TabButtonList, ""Canonical Scores"");"; put "InsertInto(TabCodeList,expr(Here:cs()));"; put "InsertInto(TabAfterCodeList, "" "");"; put "InsertInto(TabOpenList,1);"; %end; %end; %if (&ProcessName = LogisticRegression) and (&VarSelect = Penalized) and ^%symexist(CVing) %then %do; put "Here:vi=Function({},{default local},"; put " If(IsEmpty(Here:fit_data),"; put " Here:fit_data=open(""&ClientOutPath.&pdat._fit.sas7bdat"",invisible);"; put " Here:baserow=fit_data< 1 %then %do; put "include(jslpath||""Partition Trees Viewer.jsl"");"; %end; %end; %if (&ProcessName = PartialLeastSquares) %then %do; put " "; put "Here:vrs=Function({},{default local},"; put " If(IsEmpty(Here:pred_data),"; put " Here:pred_data=open(""&ClientOutPath.&&&_OutDataName...sas7bdat"",invisible);"; put " );"; put " CurrentDataTable(Here:pred_data);"; put ' HListBox('; %if (&NumberOfComponents=2) %then %do; put ' VListBox(Bivariate(Y( :pls2), X( :pls1),'; %if (&TestSpec = Yes) %then %do; put " By(:DataType),"; %end; put ' SendToReport(Dispatch({}, "Bivar Plot", FrameBox, {Marker Size(4), '; put ' Marker Drawing Mode(Outlined)}))))'; %end; %else %if (&NumberOfComponents>2) %then %do; put " , VListBox(Multivariate(Y( "; %do c = 1 %to %eval(&NumberOfComponents-1); put " :pls&c , " ; %end; put " :pls&NumberOfComponents),"; %if (&TestSpec = Yes) %then %do; put " By(:DataType),"; %end; put " Scatterplot Matrix(Density Ellipses(0), Show Correlations(0), Horizontal(1))"; put " ))"; %if (&Include3D = Yes) %then %do; put " , VListBox(Scatterplot 3D("; put " Y(:pls1,:pls2,:pls3),"; %if (&TestSpec = Yes) %then %do; put " By(:DataType),"; %end; put " Frame3D("; put " Set Marker Transparency( 0.6 ),"; put " Set Marker Quality( 1 ),"; put " Set Marker Scale( 2.5 )"; put " )"; put " ))"; %end; %end; put " )"; put ");"; put " "; put "InsertInto(DataList,{{""Here:pred_data""}});"; put "InsertInto(TabButtonList, ""Row Scores"");"; put "InsertInto(TabCodeList,expr(Here:vrs()));"; put "InsertInto(TabAfterCodeList, "" "");"; put "InsertInto(TabOpenList,1);"; %if (&PlotColScores=Yes) and (&NumberOfComponents>=2) %then %do; put " "; put "Here:vwp=Function({},{default local},"; put " If(IsEmpty(Here:vars_data),"; put " Here:vars_data=open(""&ClientOutPath.&&&_OutDataVarsName...sas7bdat"",invisible);"; put " Here:vars_data << Select Where(1);"; put " Here:vars_data << Markers(8);"; put " Here:vars_data << Colors(4);"; put " Here:vars_data << Clear Select;"; put ' column("name") << label(1);'; put " );"; put " Current Data Table(Here:vars_data);"; put ' HListBox('; %if (&NumberOfComponents=2) %then %do; put ' VListBox(Bivariate(Y( :pls2), X( :pls1),'; put ' SendToReport(Dispatch({}, "Bivar Plot", FrameBox, {Marker Size(4), '; put ' Marker Drawing Mode(Outlined)}))))'; %end; %else %do; put " Multivariate(Y( "; %do c = 1 %to %eval(&NumberOfComponents-1); put " :pls&c , " ; %end; put " :pls&NumberOfComponents),"; put " Scatterplot Matrix(Density Ellipses(0), Show Correlations(0), Horizontal(1))"; put " )"; %if (&Include3D = Yes) and (&NumberOfComponents>=3) %then %do; put " , Scatterplot 3D("; put " Y(:pls1,:pls2,:pls3),"; put " Frame3D("; put " Set Marker Transparency( 0.6 ),"; put " Set Marker Quality( 1 ),"; put " Set Marker Scale( 2.5 )"; put " )"; put " )"; %end; %end; put " )"; put ");"; put " "; put "InsertInto(DataList,{{""Here:vars_data""}});"; put "InsertInto(TabButtonList, ""Variable Weights"");"; put "InsertInto(TabCodeList,expr(Here:vwp()));"; put "InsertInto(TabAfterCodeList, "" "");"; put "InsertInto(TabOpenList,1);"; %end; %end; run; %goto End_JSL; %SurvivalJSL:; * output JSL code for Survival Modeling; * this part shall be revised if applying any accessment; %if %length(&ColorVar) %then %do; * extract varable values for &ColorVar for coloring points in one-way plot for median survival; proc sort data=&SPM_OutData3(keep=&ColorVar &IDVar _DataType_) out=cmap; by &ColorVar; run; proc sort data=cmap out=cmap2 nodupkey; by &ColorVar; run; data _null_; set cmap2 end=_e_; retain _ColorIndex_ 0; name = "_vColorVar"||trim(left(_n_)); call symput(name,trim(left(&ColorVar))); _ColorIndex_ = _ColorIndex_ + 1; if _colorIndex_ > 36 then _ColorIndex_ = _ColorIndex_ - 36; name = "_iColorVar"||trim(left(_n_)); call symput(name,trim(left(_ColorIndex_))); if _e_ then call symput("_nColor",_n_); run; * Set color index for each IDVar to be applied in JSL; data cmap; set cmap; by &colorVar; retain _ColorIndex_ 0; if _DataType_ ne ""; if first.&ColorVar then _ColorIndex_ = _ColorIndex_ + 1; if _colorIndex_ > 36 then _ColorIndex_ = _ColorIndex_ - 36; run; %end; %else %do; data cmap; set &SPM_OutData3(keep=&ColorVar &IDVar _DataType_); if _DataType_ ne ""; _colorIndex_ = 1; run; %let _nColor = 1; %end; %if %length(&TestInData) %then %do; data cmap cmap_test; set cmap; if _DataType_ = "Training" then output cmap; else if _DataType_ = "Test" then output cmap_test; run; data _null_; set cmap_test end=_e_; %if %length(&ColorVar) %then %do; by &ColorVar; retain _nColor_ 0; if first.&ColorVar then _nColor_+1; name = "_CVar_te"||trim(left(_n_)); call symput(name,&ColorVar); %end; name = "_IDVar_te"||trim(left(_n_)); call symput(name,&IDVar); name = "_cIDVar_te"||trim(left(_n_)); call symput(name,_ColorIndex_); if _e_ then do; %if %length(&ColorVar) %then %do; call symput("_nColor_te",_nColor_); %end; call symput("_nTest",_n_); end; run; %if ^%length(&ColorVar) %then %let _nColor_te = 1; %end; data _null_; set cmap end=_e_; %if %length(&ColorVar) %then %do; by &ColorVar; retain _nColor_ 0; if first.&ColorVar then _nColor_+1; name = "_CVar_te"||trim(left(_n_)); call symput(name,&ColorVar); %end; name = "_IDVar_tr"||trim(left(_n_)); call symput(name,&IDVar); name = "_cIDVar_tr"||trim(left(_n_)); call symput(name,_ColorIndex_); if _e_ then do; %if %length(&ColorVar) %then %do; call symput("_nColor_tr",_nColor_); %end; call symput("_nTrain",_n_); end; run; %if ^%length(&ColorVar) %then %let _nColor_tr = 1; %if %length(&TestInData) %then %do; %let _nSample1 = &_nTest; %let _DataTable1 = %scan(&SPM_OutData2,2,.).sas7bdat; %let _IDName = _IDVar_te; %let _cIDName = _cIDVar_te; %let _DataType1 = Test; %end; %else %do; %let _nSample1 = &_nTrain; %let _DataTable1 = %scan(&SPM_OutData,2,.).sas7bdat; %let _IDName = _IDVar_tr; %let _cIDName = _cIDVar_tr; %let _DataType1 = Training; %end; * check missing results; data _null_; set &SPM_OutData3(keep=_train_ &IDVar _DataType_); by descending _train_; retain _c_; if first._train_ then _c_=0; _c_+1; if last._train_ then do; if _train_ = 1 then call symput("nsResults1",_c_); else if _train_ = 0 then call symput("nsResults0",_c_); end; run; %let nmissingResults1 = %eval(&nAllObs_sTrain-&nsResults1); %if %length(&TestInData) %then %let nmissingResults0 = %eval(&nAllObs_sTest-&nsResults0); %else %let nmissingResults0 = 0; %global _mainjslfile_; %let _mainjslfile_ = %quote(&&&_JSLFile); %let IDVarLabel = &IDVar; %if %length(&IDVarLabel) %then %do; %CheckLabel(&SPM_OutData3,IDVarLabel); %end; %let ColorVarLabel = &ColorVar; %if %length(&ColorVarLabel) %then %do; %CheckLabel(&SPM_OutData3,ColorVarLabel); %end; data _null_; file "&&&_JSLFile"; put "//!"; put 'Names Default to Here(1);'; put "Here:DataList = {};"; put "Here:TabButtonList = {};"; put "Here:TabCodeList = {};"; put "Here:TabAfterCodeList = {};"; put "Here:TabOpenList = {};"; put " "; * Set 36 colors (may be refined); put '//color list to be applied for groupped coloring'; put 'Here:ColorList = {3,5,6,7,8,4,9,10,11,12,13,14,' / ' 51,53,54,55,56,52,57,58,59,60,61,62,' / ' 67,69,70,71,72,68,73,74,75,76,77,78};'; put " "; put 'Here:Survival=Function({}, {default local},'; put " If(IsEmpty(Here:vars_data),"; put " Here:vars_data=open(""&ClientOutPath.&&&_OutDataVarsName...sas7bdat"",invisible);"; put " );"; put ' current data table(Here:vars_data);'; put ' vListBox('; put ' LineUpBox(NCol(2),Spacing(3),'; put ' Here:tb1a = TextBox(" Predictor Reduction Settings: "),'; put ' Here:tb1b = TextBox(" ' "%nrbquote(&PRS)" '"),'; put ' Here:tb1b << SetWrap(1000);'; %if %length(&ASet) %then %do; put ' Here:tb2a = TextBox(" Analysis Settings: "),'; put " Here:tb2b = TextBox(""\[%nrbquote(&ASet)]\""),"; put ' Here:tb2b << SetWrap(1000);'; %end; put ' Here:tb3a = TextBox(" Final Selected Variables: "),'; put ' Here:tb3b = TextBox(" ' "&SelVarShow" '"),'; put ' Here:tb3b << SetWrap(1000);'; %if (&TestSpec = Yes) %then %do; put ' tb5a = TextBox(" Test Set Criteria: "),'; put ' tb5b = TextBox(" ' "Harrell's C Statistic = &Harrells_C2_2" '"),'; put ' tb5b << SetWrap(1000);'; %end; put ' Here:tb4a = TextBox(" Training Set Criteria: "),'; put ' Here:tb4b = TextBox(" ' "Harrell's C Statistic = &Harrells_C2" '"),'; put ' Here:tb4b << SetWrap(1000);'; put ' Here:tb5a = TextBox(" Reference Survival Time: "),'; put ' Here:tb5b = TextBox(" ' "&RefTime" '"),'; put ' Here:tb5b << SetWrap(1000);'; %if &nmissingResults1 %then %do; put ' Here:tb7a = TextBox(" Warning: "),'; %if ^&nmissingResults0 %then %do; %if &nmissingResults1 = 1 %then %do; put ' Here:tb7b = TextBox("' " 1 observation is missing" '"),'; %end; %else %do; put ' Here:tb7b = TextBox("' " %trim(&nmissingResults1) observations are missing" '"),'; %end; %end; %else %do; put ' Here:tb7b = TextBox("' " %trim(&nmissingResults1) and %trim(&nmissingResults0) observations ' 'in the training and test data sets, respectively, are missing" '"),'; %end; put ' Here:tb7b << SetWrap(1000);'; %end; put ' ),'; %if &nmissingResults1 %then %do; put ' Here:tb8=TextBox("Missing results are due either to missing or invalid values for the time, ' 'censoring, frequency, or explanatory variables or to invalid operations in generating the ' 'values for some of the explanatory variables."),'; put ' Here:tb8 << SetWrap(1000);'; put ' textbox(" "),'; %end; put ' hListBox('; ***** one plot for suvival function ********; put " If(IsEmpty(Here:Sur_data_1),"; put " Here:Sur_data_1=open(""&ClientOutPath.%scan(&SPM_OutData3,2,.).sas7bdat"",UseLabelsForVarNames(true),invisible);"; %if %length(&IDVarLabel) %then %do; put ' Column("' "&IDVarLabel" '") << Label(1);'; %end; put " );"; put " current data table(Here:Sur_data_1);"; put ' vListBox('; put ' Here:SurFunc1 = Parallel Plot('; put ' Scale Uniformly( 1 ),'; put ' Center at zero( 0 ),'; put ' Y( '; %do i = 1 %to &nBin; put ' :Name("' "%trim(%left(&&wide_lList&i))" '"), '; %end; put ' :Name("' "%trim(%left(&&wide_lList&i))" '")'; put ' ),'; %if ^%length(&TestInData) %then %do; put ' SendToReport('; put ' Dispatch('; put ' {},'; put ' "Parallel Plot",'; put ' OutlineBox,'; put " {Set Title( ""Survival Curves: DataType=&_DataType1"" )}"; put ' ),'; put ' Dispatch( {}, "Parallel Coord", FrameBox, {Frame Size( 450, 225 )} )'; put ' )'; %end; %else %do; put " By( :DataType ),"; put " SendToByGroup("; put ' {:DataType == "Test"},'; put ' SendToReport('; put ' Dispatch('; put ' {},'; put ' "Parallel Plot DataType=Test",'; put ' OutlineBox,'; put ' {Set Title( "Survival Plot: DataType=Test" )}'; put ' ),'; put ' Dispatch('; put ' {"Parallel Plot DataType=Test"},'; put ' "Parallel Coord",'; put ' FrameBox,'; put ' {Frame Size( 450, 220 )}'; put ' )'; put ' )'; put ' ),'; put " SendToByGroup("; put ' {:DataType == "Training"},'; put ' SendToReport('; put ' Dispatch('; put ' {},'; put ' "Parallel Plot DataType=Training",'; put ' OutlineBox,'; put ' {Set Title( "Survival Plot: DataType=Training" )}'; put ' ),'; put ' Dispatch('; put ' {"Parallel Plot DataType=Training"},'; put ' "Parallel Coord",'; put ' FrameBox,'; put ' {Frame Size( 450, 220 )}'; put ' )'; put ' )'; put ' )'; %end; put ' )'; *end of Parallel Plot; put ' ),'; * end of vListBox for Parallel Plot; ***** one plot of distribution of reference survival estimates *****; %if %length(&ColorVar) %then %do; * set color state for rows; put ' for each row('; put ' if(' ":&ColorVarLabel == " '"' "&_vColorVar1" '",'; put ' ColorOf( row state() ) = ColorList[ 1 ];'; %do i = 2 %to &_nColor; put ' ,'; put " :&ColorVarLabel == " '"' "&&_vColorVar&i" '",'; put " ColorOf( row state() ) = ColorList[ &&_iColorVar&i ];"; %end; put ' )'; put ' );'; * create One-Way plot for grouped reference survival estimates; put ' vListBox('; put ' Here:RefSur1 = Oneway('; put ' Y( :Reference Survival Estimate ),'; put " X( :&ColorVarLabel ),"; put ' With Best( 1 ),'; put ' Box Plots( 1 ),'; put ' Mean Diamonds( 1 ),'; put ' Comparison Circles( 1 ),'; put ' Points Jittered( 1 ),'; %if ^%length(&TestInData) %then %do; put ' SendToReport('; put ' Dispatch('; put ' {},'; put ' "Oneway Analysis of Reference Survival Estimate By ' "&ColorVarLabel" '",'; put ' OutlineBox,'; put " Set Title( ""Reference Survival Estimate at &DepVar=&RefTime By &ColorVar.: DataType=&_DataType1"" )"; put ' ),'; put ' Dispatch( {}, "Means Comparisons", OutlineBox, Close( 1 ) )'; put ' )'; %end; %else %do; put " By( :DataType ),"; put ' SendToByGroup('; put ' {:DataType == "Test"},'; put ' SendToReport('; put ' Dispatch('; put ' {},'; put ' "Oneway Analysis of Reference Survival Estimate By ' "&ColorVarLabel" ' DataType=Test",'; put ' OutlineBox,'; put " Set Title( ""Reference Survival Estimate at &DepVar=&RefTime By &ColorVar.: DataType=Test"" )"; put ' ),'; put ' Dispatch( {}, "Means Comparisons", OutlineBox, Close( 1 ) )'; put ' )'; put ' ),'; put ' SendToByGroup('; put ' {:DataType == "Training"},'; put ' SendToReport('; put ' Dispatch('; put ' {},'; put ' "Oneway Analysis of Reference Survival Estimate By ' "&ColorVarLabel" ' DataType=Training",'; put ' OutlineBox,'; put " Set Title( ""Reference Survival Estimate at &DepVar=&RefTime By &ColorVar.: DataType=Training"" )"; put ' ),'; put ' Dispatch( {}, "Means Comparisons", OutlineBox, Close( 1 ) )'; put ' )'; put ' ),'; %end; put ' );'; put ' ),'; %end; %else %do; * create Distribution for referal survival estimates; %if ^%length(&TestInData) %then %do; put ' Distribution( Continuous Distribution( Column( :Reference Survival Estimate ) ) );'; %end; %else %do; put ' Distribution( Continuous Distribution( Column( :Reference Survival Estimate ) ), By( :DataType ) );'; %end; %end; put ' );'; * end of vListBox for Reference Survival Estimate; put ' );'; * end of vListBox; put ');'; %if %length(&TestInData) %then %do; put "InsertInto(DataList,{{"":Here:vars_data"",""Here:Sur_data_1"",""Here:Sur_data_2"",""Here:RefSur_data_1"",""Here:RefSur_data_2""}});"; %end; %else %do; put "InsertInto(DataList,{{"":Here:vars_data"",""Here:Sur_data_1""}});"; %end; put "InsertInto(TabButtonList, ""Results"");"; put "InsertInto(TabCodeList,expr(Here:Survival()));"; put "InsertInto(TabAfterCodeList, "" "");"; put "InsertInto(TabOpenList,1);"; %if (&ProcessName = PartitionTrees) and %upcase(&TreeMethod)=TREE and ^%symexist(CVing) %then %do; data _null_; file "&&&_JSLFile" mod; put "include(jg:LSpath[&LS_Index]||""\JSLFiles\Partition Trees Viewer Selection.jsl"");"; run; %end; run; /* Old code apply overlay plot for drawing survival function data _null_; file "&&&_JSLFile"; put "//!"; put 'Names Default to Here(1);'; put "Here:DataList = {};"; put "Here:TabButtonList = {};"; put "Here:TabCodeList = {};"; put "Here:TabAfterCodeList = {};"; put "Here:TabOpenList = {};"; put " "; * Set 36 colors (may be refined); put '//color list to be applied for groupped coloring'; put 'Here:ColorList = {3,5,6,7,8,4,9,10,11,12,13,14,' / ' 51,53,54,55,56,52,57,58,59,60,61,62,' / ' 67,69,70,71,72,68,73,74,75,76,77,78};'; put " "; put 'Here:Survival=Function({}, {default local},'; put " If(IsEmpty(Here:vars_data),"; put " Here:vars_data=open(""&ClientOutPath.&&&_OutDataVarsName...sas7bdat"",invisible);"; put " );"; put ' current data table(Here:vars_data);'; put ' vListBox('; put ' LineUpBox(NCol(2),Spacing(3),'; put ' Here:tb1a = TextBox(" Predictor Reduction Settings: "),'; put ' Here:tb1b = TextBox(" ' "%nrbquote(&PRS)" '"),'; put ' Here:tb1b << SetWrap(1000);'; %if %length(&ASet) %then %do; put ' Here:tb2a = TextBox(" Analysis Settings: "),'; put " Here:tb2b = TextBox(""\[%nrbquote(&ASet)]\""),"; put ' Here:tb2b << SetWrap(1000);'; %end; put ' Here:tb3a = TextBox(" Final Selected Variables: "),'; put ' Here:tb3b = TextBox(" ' "&SelVarShow" '"),'; put ' Here:tb3b << SetWrap(1000);'; %if (&TestSpec = Yes) %then %do; put ' tb5a = TextBox(" Test Set Criteria: "),'; put ' tb5b = TextBox(" ' "Harrell's C Statistic = &Harrells_C2_2" '"),'; put ' tb5b << SetWrap(1000);'; %end; put ' Here:tb4a = TextBox(" Training Set Criteria: "),'; put ' Here:tb4b = TextBox(" ' "Harrell's C Statistic = &Harrells_C2" '"),'; put ' Here:tb4b << SetWrap(1000);'; %if &nmissingResults1 %then %do; put ' Here:tb7a = TextBox(" Warning: "),'; %if ^&nmissingResults0 %then %do; put ' Here:tb7b = TextBox("' "%trim(&nmissingResults1) observations in training dataset are not used" '"),'; %end; %else %do; put ' Here:tb7b = TextBox("' "%trim(&nmissingResults1) and %trim(&nmissingResults0) observations in ' 'the training and test sets, respectively, are not used" '"),'; %end; put ' Here:tb7b << SetWrap(1000);'; %end; put ' ),'; %if &nmissingResults1 %then %do; put ' Here:tb8=TextBox("Missing results are due either to missing or invalid values for the time, ' 'censoring, frequency or explanatory variables or to invalid operations in generating the ' 'values for some of the explanatory variables."),'; put ' Here:tb8 << SetWrap(1000);'; put ' textbox(" "),'; %end; put ' hListBox('; ***** one plot for suvival function ********; put " If(IsEmpty(Here:Sur_data_1),"; put " Here:Sur_data_1=open(""&ClientOutPath.&_DataTable1"",invisible);"; put " );"; put " current data table(Here:Sur_data_1);"; put ' Here:SurFunc1 = Overlay Plot('; put " X( :&DepVar ),"; put ' Y( '; %do i = 1 %to %eval(&_nSample1-1); put ' :Name("' "&&&_IDName.&i" '"), '; %end; put ' :Name("' "&&&_IDName.%trim(%left(&_nSample1))" '")'; put ' ),'; put ' Overlay Axis << {'; put ' Min( -0.02 ), Max( 1.02 ), Minor Ticks( 1 ),'; put ' Add Ref Line( 0.25, Dotted, "Black" ),'; put ' Add Ref Line( 0.5, Dotted, "Black" ),'; put ' Add Ref Line( 0.75, Dotted, "Black" )'; put ' },'; put ' Separate Axes( 1 ),'; put ' X Axis << {'; put ' Min( -1 ),'; put " Add Ref Line( &RefTime, Dotted, ""Black"" )"; put ' },'; put ' Step(1),'; put ' Show Points(0),'; %if %length(&ColorVar) %then %do; * handle grouped colors here!!!; * may need to refine this part for test dataset!!!; %do i = 1 %to &_nSample1; put ' :Name("' "&&&_IDName.&i" '")' "( Connect Color( ColorList[ %trim(%left(&&&_cIDName.&i)) ] ) ),"; %end; %end; put ' SendToReport('; put ' Dispatch('; put ' {},'; put ' "Overlay Plot",'; put ' OutlineBox,'; put ' Set Title( "Estimated Survival Function: DataType=' "&_DataType1" '" )'; put ' ),'; put ' Dispatch( {}, "", AxisBox( 2 ), Add Axis Label( "Survival Rate" ) ),'; put ' Dispatch( {}, "Overlay Plot", FrameBox, Frame Size( 400, 250 ) ),'; put ' Dispatch( {}, "", LegendBox, {Set Wrap( 5 )} )'; put ' )'; put ' ),'; *end of overlay plot; ***** one plot of distribution of reference survival estimates *****; put " If(IsEmpty(Here:RefSur_data),"; put " Here:RefSur_data=open(""&ClientOutPath.%scan(&SPM_OutData3,2,.).sas7bdat"",invisible);"; put " );"; %if %length(&TestInData) %then %do; put " If(IsEmpty(Here:RefSur_data_1),"; put ' Here:RefSur_data_1 = Here:RefSur_data<