Macro CheckAll for Check All That Apply Data

From sasCommunity
Jump to: navigation, search

SUGI22.236.1997: %CheckAll: a macro to produce a frequency of response data set from multiple-response data

Author: Ronald_J._Fehd

Notes:

Programs

Macro CheckAll

 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* MACRO: CHECKALL *
* NOTE: uses macro ARRAY *
* *
* USAGE: 1) %CHECKALL(series-name);*assumes a data set contains vars *
* 2) %CHECKALL(series-name,LIST=var1 var2 var3 .. varN); *
* 4) %CHECKALL(series-name,DATA=dataset); *
* 5) %CHECKALL(series-name,TRIMCHAR=-); *
* 6) %CHECKALL(series-name,PRINT=1); *
* 7) %CHECKALL(series-name,SORT=0); *
* 8) %CHECKALL(series-name,BY_VAR=var-name); *
* 9) %CHECKALL(series-name,BY_VAR=var-name,BY_VALUE='B'); *
* 10) %CHECKALL(series-name,TRUE='Y'); *
* *
* DESCRIPTION: *
* PROCessing of a series of multiple-response variables, *
* usually identified by instruction "Check-all-that-apply", *
* to produce a standardized output dataset *
* containing frequency of each variable *
* vars: Value Label Count Percent *
* to be used as input to graphics routines *
* *
* PROCESS: *
* 1: macro: prepare macro ARRAY of variables *
* 2: macro: if LIST=DATA then concatenate elements into LIST *
* 3: data: prepare subset of DATA *
* 4: macro loop: for each variable *
* 5: PROC: if wanted SORT output dataset *
* 6: data: save various variable widths *
* 7: data: save optimized dataset *
* 8: PROC: if wanted PRINT output dataset *
* *
* NOTES: *
* LIST=DATA: default is data set previously prepared *
* from PROC CONTENTS output data set *
* assumes all variables are labeled *
* assumes macro ARRAY is available *
* picks variables with value = &TRUE, *
* can be used with other than binary-valued data: (0,1) ('0','1') *
* will pick single value from multi-valued data: ('A','B','C'..) *
* *
* KEYWORDS: APPEND array %ARRAY call label dim() dimension FREQ left *
* multiple-response data put trim *
* *
* Author: Ronald Fehd *
* Centers for Disease Control PHPPO DLS *
* 4770 Buford Hwy NE MS-G25 fax: 404/488-7667 *
* Atlanta GA 30341-3724 voice: 404/488-4316 *
e-mail: RJF2@phpdLs1.em.cdc.gov (P H P D eL S one) *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
%MACRO CHECKALL(/*----------------------------------------------------*/
SERIES /* name of series of check-all variables *
/* output dataset name */
,LIST=DATA /* list of variables, *
/* default is DATA previously prepared *
/* whose name is V&SERIES */
,LIBRARY=LIBRARY/* library name */
,DATA=&DATASET./* DATASET is global variable, else hardcode here *
/*****************************************! *
/* DO NOT USE &SERIES AS NAME OF THIS PARM *
/*****************************************! */
,LBL_LBL=&SERIES. Label/*label of variable Label */
,LBLCOUNT=Number of Laboratories Responding/* *
/* label of Freq-Count */
,LBLPCENT=Percentage of Laboratories Responding/* *
/* label of Freq-Percent */
,BY_VAR= /* var for subsetting */
,BY_VALUE='A'/* var for subsetting: value */
,CHARTYPE=Q00BARH/* Chart-Type in (Q00barh Q00barv) */
,PRINT=1 /* ?print output dataset? used while testing */
,TESTING=0 /* ?enable testing features? */
,SORT=1 /* ?output dataset to be sorted by descending Count? */
,TITLE=TEMP /* label of output dataset and title for graphics *
/* TITLE cannot contain commas */
,TRIMCHAR=: /* front-trim label to this char *
/* labels expected to be in form: *
/* "Q04B: Category - Specific" */
/*,TRUE= 1 /* variable counted with this value: numeric one */
,TRUE='1' /* variable counted with this value: character one */
/*,TRUE='Y' /* variable counted with this value: character 'Yes' */
);/*------------------------------------------------------------------*/
%if &PRINT or &TESTING %then %do;
    options mprint notes;
    %end;
%else %do;
    options nomprint nonotes;
    %end;
 
%*1: macro: prepare ARRAY of variables
%*data is subset from PROC CONTENTS: Var-Name is in variable Name;
%local dim_var;
%if "&LIST"="DATA" %then %do;
    %Let DIM_VAR = %ARRAY(dim,DATA=&LIBRARY..V&SERIES.);
    %Put note2: Dim_Var:&Dim_Var.;
    %do I = 1 %to &Dim_Var;
        %local Var&I.;
        %end;   
    %ARRAY(name=VAR,DATA=&LIBRARY..V&SERIES.,VAR=Name);
    %end;
%else %ARRAY(VAR,&LIST.);
 
%*2: macro: if LIST=DATA then concatenate elements into LIST;
%MACRO VAR_LIST;
%do I = 1 %to &DIM_VAR;
    &&VAR&I.
    %end;
%MEND;
%if "&LIST"="DATA" %then %let LIST = %VAR_LIST;
 
%*3: data: prepare subset of DATA: delete obs w/no responses;
DATA CHECKALL;
retain NmbrChkd NmbrResp 0;
do until(EndoFile);
   set &LIBRARY..&DATA (keep = &BY_VAR. &LIST.)
       end = EndoFile
   nobs = NmbrObs ;
   NmbrChkd = 0;
   array CheckAll {*} &LIST.;
   drop I;
   do I = 1 to dim(CheckAll)
      until(NmbrChkd);
      NmbrChkd + (CheckAll{I} = &TRUE.);
      end;
   if NmbrChkd
      %if &BY_VAR ne %then %do;
          and &BY_VAR = &BY_VALUE.
          %end;
      then do;
      output;
      NmbrResp + 1;
      end;
   end;
call symput("NMBROBS" ,trim(left(put(NmbrObs ,8.))));
call symput("NMBRRESP",trim(left(put(NmbrResp,8.))));
stop;
run;
%let PCNTRESP = %eval(100* &NMBRRESP /&NMBROBS);
%put note2: PCNTRESP = <&PCNTRESP.>;
*+;
%if &TESTING %then %let DIM_VAR = 2;
    *Q04B:10;
 
%*4: macro loop: for each variable: FREQ, standardize data, APPEND;
%local I;
%do I = 1 %to &DIM_VAR;
    PROC FREQ data   = CHECKALL (keep = &&VAR&I. &BY_VAR.);
              tables   &&VAR&I. / noprint
              out    = FREQ;
              %if &BY_VAR ne %then %do;
              by       &BY_VAR.;
              %end;
DATA FREQ;
drop &&VAR&I.;
length Value $ 8
Label $ 40;
retain Value "&&VAR&I.";
do until(EndoFile);
   set FREQ end = EndoFile;
       where &&VAR&I. = &TRUE;
   call label(&&VAR&I,Label);
   if index(Label,"&TRIMCHAR") then
      Label = left(substr(Label,index( Label,"&TrimChar")+1));
   output;
   end;
stop;
 
%if &I = 1 %then %do;
    DATA SERIES;
    do until(EndoFile);
       set FREQ end = EndoFile;
       output;
       end;
    stop;
    %end;
%else %do;
    PROC Append base = SERIES
                data = FREQ;
    %end;
%end;/* %do I=1:&DIM_VAR*/
 
%*5: PROC: if wanted SORT output dataset;
%if &SORT %then %do;
    PROC Sort;
    by descending Count;
    %end;
*+;
%*6: data: save various variable widths;
DATA _NULL_;
%local WLABEL WCOUNT WPERCENT;
retain WLabel WCount WPercent 0 ;
do until(EndoFile);
   set SERIES (keep = Label Count Percent) end = EndoFile;
   WLabel   = max(WLabel  ,length( Label ));
   WCount   = max(WCount  ,length(trim(left(put(Count  ,8.0)))));
   WPercent = max(WPercent,length(trim(left(put(Percent,5.1)))));
   end;
call symput('WLABEL' , trim(left(put(WLabel ,2.))));
call symput('WCOUNT' , trim(left(put(WCount ,8.))));
call symput('WPERCENT' , trim(left(put(WPercent,5.))));
stop;
run;
%local LIB; %let LIB = LIBRARY;
%if &TESTING %then %do;
    %let LIB = WORK;
    %PUT WLABEL = <&WLABEL.> WCOUNT=<&WCOUNT.> WPERCEMT=<&WPERCENT.>;
    %end;
 
%*7: data: save optimized dataset;
DATA &LIB..&SERIES. (label = "CHECKALL &SERIES." );
attrib
   %if &BY_VAR ne %then %do;
       Subset label = "subset: &BY_VAR = &BY_VALUE"
       %end;
         %local LEN;
         %let LEN = %length(&TITLE.);
Title    label  = "&SERIES. Title"
         length = $ &LEN. format = $char&LEN..
N_eq     label  = "N=&NMBRRESP data:&DATA Obs:&NMBROBS Resp:&PCNTRESP.%"
         %local LENRESP;
         %let LENRESP = %eval(2 + %length(&NMBRRESP.);
         length = $ &LENRESP. format = $char&LENRESP..
Chartype label  = "&CHARTYPE"
         length = $ 8 format = $char8.
Value    label  = "&SERIES. Variable"
         length = $ 8 format = $char8.
Label    label  = "&LBL_LBL"
         length = $ &WLABEL. format = $char&WLABEL..
Count    label  = "&LBLCOUNT." format = &WCOUNT..0
Percent  label  = "&LBLPCENT." format = &WPERCENT..1;
         ;
retain Title    "&Title."
       N_eq     "N=&NmbrResp."
       Chartype "&CharType";
do until(EndoFile);
   set SERIES
       %if &BY_VAR ne %then %do;
           (rename=(&By_Var = Subset))
           %end;
       end = EndoFile;
   Label = translate(Label,'!',"'");/*change <!> back to squote*/
   ***NmbrResp = 100*Count/Percent;
   output;
   Title    = '.';
   N_eq     = '.';
   Chartype = '.';
   end;
stop;
 
%*8: proc: if wanted PRINT output dataset;
%if &Print %then %do;
    PROC Print data = &LIB..&SERIES. double label noobs;
         title3 "&SERIES.: &TITLE";
         %if &BY_VAR ne %then %do;
             by Subset ;
             id Subset ;
             %end;
    PROC Contents data = &LIB..&SERIES.;
    %end;
%ENDOMAC: run;
%MEND CheckAll;

Test Program

 /* test data: enable by ending this line with slash '/' ------------**/
options details mprint nocenter;
libname LIBRARY 'c:\saswinpd\sasuser';*default*;
 
%let DATASET = SURVEY1;*in-data: global macro-variable used by CHECKALL;
 
*Step 1: label the variables;
DATA LIBRARY.SURVEY1;
label Q01 = 'Fruit'
      Q02A = 'Apple'      Q02B = 'Banana'   Q02C = 'Cherry'
      N031 = 'One'        N032 = 'Two'      N033 = 'Three'
      X08C = 'Commercial' X08I = 'In House' X08M = 'Manufacturer';
input @ 1 Q01 $char1.
      @ 2 Q02A $char1. @ 3 Q02B $char1. @ 4 Q02C $char1.
      @ 2 N031 1.      @ 3 N032 1.      @ 4 N033 1.
      @ 5 X08C $char1. @ 6 X08I $char1. @ 7 X08M $char1.;
cards;
A100.X.
A110..X
A101X..
A001XXX
A100...
;
*Step 2: save CONTENTS of data set;
 
PROC CONTENTS data = LIBRARY.SURVEY1 noprint
              out  = LIBRARY.CONTENTS(keep = Name);
 
*Step 3: make data sets with series of multiple-response variables;
 
data LIBRARY.VQ02 LIBRARY.VN03 LIBRARY.VX08;
set LIBRARY.CONTENTS;
if substr(Name,1,3) = 'Q02' then output LIBRARY.VQ02;
if substr(Name,1,3) = 'N03' then output LIBRARY.VN03;
if substr(Name,1,3) = 'X08' then output LIBRARY.VX08;
 
* end SOP set-up ...................................................**;
%CHECKALL(Q02);
%CHECKALL(N03,TRUE=1);
%CHECKALL(X08,TRUE='X');
%CHECKALL(Q02,BY_VAR=Q01);
/*.................................................... END Test Data */

References

see also:

--Ronald_J._Fehd macro.maven == the radical programmer 06:46, 12 June 2012 (EDT)