************************************************************************ * Program: writePremodelingCARTcmd.sas; * * Write a batch cmd file for CART to explore data with varying control * * parameters such as splitting rules, priors, cost, minChild * * and atom (make sure minChild <= atom/2). * * referenece code: see stuart's writemarscmd.sas; * * Author: Nancy Cheng * * Date: 01/04/2006 * * Mod: 08/10/2006 * ************************************************************************; * A sample of CART cmd file is as below: LOPTIONS MEANS = NO, PLOTS = NO, TIMING = NO, NOPRINT FORMAT = 4/UNDERFLOW USE "S:\dataset\fv_dc274.sas7bdat" LIMIT MINCHILD = 15, ATOM = 30, DEPTH = 2 MODEL DFS0 KEEP AGEMOS CATEGORY DFS0 Misclassify Cost = 1 Classify 0 as 1 Misclassify Cost = 1 Classify 1 as 0 OUTPUT 'S:\FV\CART\OUTPUT\Premodeling\run1_mc15.dat' ERROR EXPLORATORY METHOD GINI POWER = 0.0000 BUILD MODEL DFS0 KEEP ANFAMINC CATEGORY DFS0 OUTPUT 'S:\FV\CART\OUTPUT\run1_mc15.DAT' ERROR EXPLORATORY METHOD GINI POWER = 0.0000 BUILD ; footnote 'writePremodelingCARTcmd.sas'; * macro to generate CART cmd file ; * Parameters: * firstCall - specifies if this macro is called for the first time in a SAS session, 1=yes, 0=no * cmdfile - external file used to store the commands, including the whole path * usefile - specifies the input data file * yvar - response variable; * xlist - a list of predictors (can be only one), separated by comma or space * catVars - categorical variables * minChild - minimum # of obs allowed in a child node, default=1. * atom - minimum size below which a node will not be split, default=10, note minChild <= atom/2, otherwise CART will set minChild=atom/2; * priors - Prior Class Probabilities (EQUAL, DATA, LEARN, TEST, MIX or SPECIFY 0=, 1= ), EQUAL is default and best chance of good results * misCost01 - misclassification cost if class 0 is misclassified as 1 * misCost10 - misclassification cost if class 1 is misclassified as 0 * splitRuel - splitting rules, GINI is default. * power - used with splitting rules to tune CART away from end-cut split * outDir - specifies a file holder for output files from CARD runs * weightVar - specifies a weight variable if needed; options symbolgen mprint; %macro writePremodelingCARTcmd (firstCall=, cmdfile=, usefile=, yvar=, xlist=, catVars=, minChild=, atom=, priors=, misCost01=, misCost10=, splitRule=, power=, outDir=, weightVar=); %if &firstCall=1 %then %do; %global runNo memCount; * memCount records the number of files in outDir directory; %let runNo=0; *run no.; %if %qsubstr(&cmdfile, 1, 1) ne %str(%') %then %let cmdfile=%str(%')%str(&cmdfile)%str(%'); filename cartcode &cmdfile; data _null_; file cartcode mod; line1='LOPTIONS MEANS = YES, PLOTS = NO, TIMING = NO, NOPRINT'; put line1; line2='FORMAT = 4/UNDERFLOW'; put line2; %if %qsubstr(&usefile, 1, 1) ne %str(%') %then %let usefile=%str(%')%str(&usefile)%str(%'); %let l3=%str(USE &usefile); line3=resolve('&l3'); put line3; run; %end; data _null_; file cartcode mod; /*if both minChild and atom are not specified, then use the default values for both;*/ %if &minChild=%str( ) and &atom = %str( ) %then %do; %let minChild=1; %let atom=10; line=''; %end; /*if only minChild is not specified, then use the default value for minChild;*/ %else %if &minChild=%str( ) and &atom ne %str( ) %then %do; line='LIMIT ATOM = '||trim("&atom")||', DEPTH = 2'; %let minChild=1; %end; /*if only atom is not specified, then use the default value for atom;*/ %else %if &minChild ne %str( ) and &atom = %str( ) %then %do; line='LIMIT MINCHILD = '||trim("&minChild")||', DEPTH = 2'; %let atom=10; %end; /*both minChild and atom are specified;*/ %else %do; line='LIMIT MINCHILD = '||trim("&minChild")||', ATOM = '||trim("&atom")||', DEPTH = 2'; %end; if line ne '' then do; put line; end; run; %fileNumInDir(dir=&outDir); * get # of output files in the output directory (CART runs); %if &runNo=0 %then %let runNo=%eval(&memCount+1); *otherwise, keep current runNo; *position of a variable in xlist; %let i=1; %do %while(%scan(&xlist, &i) ne ); data _null_; file cartcode mod; line1='MODEL '||trim("&yvar"); put line1; %let x=%scan(&xlist, &i); line2='KEEP '||trim("&x"); put line2; %if %index(&catVars, &x) ne 0 %then %let l=%str(CATEGORY &yvar)%str(,)%str( &x); %else %let l=%str(CATEGORY &yvar); line3=resolve('&l'); put line3; %if &priors ne %str( ) %then %do; line4='PRIORS '||trim("&priors"); put line4; %end; run; %if &i=1 %then %do; data _null_; file cartcode mod; %if &misCost01=%str( ) and &misCost10 = %str( ) %then %do; %let misCost01=1; %let misCost10=1; line1=''; line2=''; %end; %else %if &misCost01=%str( ) and &misCost10 ne %str( ) %then %do; %let misCost01=1; line2='Misclassify Cost = '||trim("&misCost10")||' Classify 1 as 0'; put line2; %end; %else %if &misCost01 ne %str( ) and &misCost10 = %str( ) %then %do; %let misCost10=1; line1='Misclassify Cost = '||trim("&misCost01")||' Classify 0 as 1'; put line1; %end; %else %do; line1='Misclassify Cost = '||trim("&misCost01")||' Classify 0 as 1'; put line1; line2='Misclassify Cost = '||trim("&misCost10")||' Classify 1 as 0'; put line2; %end; run; %end; data _null_; file cartcode mod; %let dir=%formatDir(&outDir); %let l1=%str(OUTPUT %'&dir)%str(run&runNo)%str(_mc&minChild)%str(_atom&atom)%str(_01mis&misCost01)%str(_10mis&misCost10)%str(.dat%'); line1=resolve('&l1'); put line1; line2='ERROR EXPLORATORY'; put line2; %if &splitRule=%str( ) %then %let splitRule=%str(GINI); %if &power = %str( ) %then %let power=0.0; line3='METHOD '||trim("&splitRule")||' POWER = '||trim("&power"); put line3; run; %if &weightVar ne %str() %then %do; data _null_; file cartcode mod; line='WEIGHT '||trim("&weightVar"); put line; run; %end; data _null_; file cartcode mod; line='BUILD'; put line; run; %let i=%eval(&i+1); %let runNo=%eval(&runNo+1); %end; %mend ; *macro to format a directory. For a directory (file folder), remove the pair of single quotes if they are in the parameter dir add \ to the end of dir if there isn't one; %macro formatDir(dir); %let len=%length(&dir); %if %qsubstr(&dir, 1, 1) eq %str(%') %then %do; %let len=%eval(&len-2); %let dir=%qsubstr(&dir, 2, &len ); %if %qsubstr(&dir, &len, 1) ne %str(\) %then %str(&dir)%str(\); %else %str(&dir); %end; %else %if %qsubstr(&dir, &len, 1) ne %str(\) %then %str(&dir)%str(\); %else %str(&dir); %mend; *macro to get the number of files in a directory; %macro fileNumInDir(dir=); %let rc=%sysfunc(filename(fileref,&dir)); %if &rc = 0 %then %let dirId=%sysfunc(dopen(&fileref)); *open the dir. and return a dir. id (>0) if the operation is successful; %if &dirId >0 %then %do; %let memCount=%sysfunc(dnum(&dirid)); * number of files in the directory; %end; %let rc=%sysfunc(dclose(&dirId)); * Close the directory; %mend; ************test***********; %let cmdfile=%str('S:\testMacro\Premodeling.cmd'); %let outDir=%str(S:\testMacro\Premodeling\output); %let usefile=%str('S:\testMacro\fv_ctr115.sas7bdat'); %let xlist=%str(AGEMOS, ANFAMINC, COFEED01, DCTOTAL3, DDSP5000, ETHGP2, INSCAT, L10LAC1, L10MUT1, MOMEDU3); %let catVars=%str(DFS0, INSCAT); %writePremodelingCARTcmd(firstCall=1, cmdfile=&cmdfile, usefile=&usefile, yvar=DFS0, xlist=&xlist, catVars=&catVars, minChild=12, atom=30, priors=EQUAL, misCost01=1, misCost10=1, splitRule=GINI, power=0.0, outDir=&outDir, weightVar=); %writePremodelingCARTcmd(firstCall=0, cmdfile=&cmdfile, usefile=&usefile, yvar=DFS0, xlist=&xlist, catVars=&catVars, minChild=25, atom=30, priors=EQUAL, misCost01=2, misCost10=1, splitRule=GINI, power=0.0, outDir=&outDir, weightVar=); %let cmdfile=%str('S:\testMacro\Premodeling2.cmd'); %let outDir=%str(S:\testMacro\Premodeling\output2); %let usefile=%str('S:\testMacro\fv_ctr115.sas7bdat'); %let xlist=%str(AGEMOS, ANFAMINC, ANTIBI1Y_, ANTIBPRG_, BIRTHWT, BOTHPARS, BRELSE01, BRSELF01, BRSTFD01, BRTHORD, CHELSEN, CHSELFN, COFEED01_, DAYCAR01_, DAYOFF01_, DCTOTAL3, DDSP5000, DHCRT_PCT, DKCRT_PCT, ECCBOT01_, ETHGP2, EVRBOT01, FLOPASTE_, FLOSUPPL_, FPPM1, FQPASTE, FRSVIS01, GH1, GH2, HARMSW01, HDSTRT01_, INSCAT, IRONDEF, L10LAC1, L10MUT1, LEADEXP, MALE01, MOMAGE_, MOMEDU3, MOMEMP_, MOMOCC_, MOMPBEXP_, MOMWKHR, NCHILDRN_, NDAYSLPB_, NMEALDAY, NSNKAFT, NSNKDAY, NSNKEVE, NSNKMORN, NTEETH, PARDEC01, PDECAY01_, PDOPAIN, PLAST_VI, PTRTDEC_, SFGH01, STILLB01, SWTBDNOW, SWTBED12, USALAD, WHYAFRAI, WHYCHANC, WHYFAR, WHYNOINS, WHYNONEE, WHYPAIN, WHYTIME, WHYTOOEX, WHYYOUNG, WIC01_, XASNPOPCY, XBLKPOPCY, XHISPOPCY, XHL25KCY, XP0TO5CY); *76 vars; %let catVars=%str(DFS0 DAYOFF01 ETHGP2 INSCAT MOMEMP MOMOCC_ MOMPBEXP PDECAY01 PDOPAIN); %writePremodelingCARTcmd(firstCall=1, cmdfile=&cmdfile, usefile=&usefile, yvar=DFS0, xlist=&xlist, catVars=&catVars, minChild=12, atom=30, priors=EQUAL, misCost01=1, misCost10=1, splitRule=GINI, power=0.0, outDir=&outDir, weightVar=); %let cmdfile=%str('S:\testMacro\Premodeling3.cmd'); %let outDir=%str(S:\testMacro\Premodeling\output3); %let usefile=%str('S:\testMacro\fv_ctr115.sas7bdat'); %let xlist=%str(AGEMOS, ANFAMINC, COFEED01, DCTOTAL3, DDSP5000, ETHGP2, INSCAT, L10LAC1, L10MUT1, MOMEDU3); %let catVars=%str(DFS0, INSCAT); %writePremodelingCARTcmd(firstCall=1, cmdfile=&cmdfile, usefile=&usefile, yvar=DFS0, xlist=&xlist, catVars=&catVars, outDir=&outDir, weightVar=); %writePremodelingCARTcmd(firstCall=0, yvar=DFS0, xlist=&xlist, catVars=&catVars, minChild=15, atom=30, priors=EQUAL, misCost01=2, splitRule=GINI, power=0.0, outDir=&outDir);