Life Imitates Art: ODS Output Data Sets that Look like Listing Output (MWSUG Paper BB-12-2013)

From sasCommunity
Jump to: navigation, search

MWSUG Paper BB-12-2013

Life Imitates Art: ODS Output Data Sets that Look like Listing Output

Dylan Ellis, Mathematica Policy Research, Washington DC

Abstract

Confusingly, the output data set from a summary procedure looks nothing like the display in our output destination.

Have you ever wished for an output data set that looks like the cross-tabulation you see in the listing window? Have you ever run a series of one-way frequencies on a list of variables and wished for a more compact tabular display? This presentation will show how we can leverage string functions and the automatic _TYPE_ variable from PROC TABULATE to reshape the output data set into a more intelligible and practical table visualization. Readers should be familiar with the basics of how to specify a categorical frequency table using Proc Freq or Proc Tabulate.

Online resources


Addenda: Macro routines

Here are the two macro routines as described in the presentation.

This macro will create one-way tabulations of variables on a common scale (e.g. survey items).

 
%macro tabulate_common(data, out, classlist, listname=classvar, valformat=, statistic=N, var=);
 
 
*if datasetname is two parts, pull libname off separately;
%if %index(&data., .) 
%then %do; 
	  %let libname=%upcase(%scan(&data., 1)); 
	  %let memname=%upcase(%scan(&data.,-1)); 
%end;
%else %do; 
	  %let libname=WORK; 
	  %let memname=%upcase(&data.); 
%end;
 
*look up types of vars in classlist and confirm all are the same;
Proc SQL;
Create View classvars As
Select type,   count(distinct type)   as ntypes,
       length, count(distinct length) as nlengths
  From dictionary.columns
 Where LIBNAME="&libname." and MEMNAME="&memname."
   and findw("&classlist",strip(name), ' ', 'i')
;
Quit;
 
Data _null_;
 Set classvars end=lastvar;
 
if ntypes>1 or nlengths>1 then do;
  put "All classification variables are not of same length and type"; 
  abort cancel;
end;
else if lastvar then do;
  call symput('type',type);
  call symput('length',length);
end;
 
Run;
 
 
%let statistic =%upcase(&statistic);
 
proc tabulate missing  
data=&data. 
 out=_uv_allfreq1 (drop=_PAGE_ _TABLE_)
order=data; 
class  &classlist.;
%if "&statistic"="N" %then %do;
tables (&classlist.)*N/printmiss;
%end;
%if "&statistic"="PCT" %then %do;
tables (&classlist.), ALL*COLPCTN/printmiss;
%end;
%if "&statistic"^="N" and "&statistic"^="PCT" %then %do;
var &var.;
tables (&classlist.)*&var.*&statistic./printmiss;
%end;
run;
 
*copy arbitrary class variable rather than look up type;
%let tmplvar = %scan(&classlist., 1);
 
*if datasetname is two parts, pull libname off separately;
%if %index(&data., .) 
%then %do; 
	  %let libname=%upcase(%scan(&data., 1)); 
	  %let memname=%upcase(%scan(&data.,-1)); 
%end;
%else %do; 
	  %let libname=WORK; 
	  %let memname=%upcase(&data.); 
%end;
 
*add check of type against all, abort if differ;
 
Proc SQL noprint;
Select type into :classtype
From dictionary.columns
Where libname="&libname."
  and memname="&memname."
  and    name="&tmplvar."
;
Quit;
 
data _uv_allfreq2;
 set _uv_allfreq1;
 
%if "&statistic"="N" %then %do;
keep &listname. common_range N;
label &listname. = "&listname. (Frequencies)";
%end;
%if "&statistic"="PCT" %then %do;
keep &listname. common_range PctN_0:;
label &listname. = "&listname. (Percentage)";
%end;
%if "&statistic"^="N" and "&statistic"^="PCT" %then %do;
keep &listname. common_range &var._&statistic.;
label &listname. = "&listname. (&var._&statistic.)";
%end;
 
length &listname. $32; *store original variable names;
%if "&type."="char" %then %do; length common_range $ &length.; %end;
%if "&type."="num"  %then %do; length common_range   &length.; %end;
 
array classvars &classlist.;
 
&listname.   = vname(classvars[findc(_type_,'1')]);
common_range = classvars[findc(_type_,'1')];
 
%if %length(&valformat.)
%then %do; 
format common_range &valformat.; 
%end;
 
run;
 
proc transpose
data=_uv_allfreq2
 out=&out. (drop=_NAME_);
by &listname. notsorted;
id common_range;
%if "&statistic"="N" 
%then %do;
var N;
%end;
%if "&statistic"="PCT" %then %do;
var PctN_0:;
%end;
%if "&statistic"^="N" and "&statistic"^="PCT" %then %do;
var &var._&statistic.;
%end;
run;
 
 
proc datasets lib=work nolist; delete _uv_allfreq1 _uv_allfreq2; quit;
 
 
%mend tabulate_common;

Here is an example call:

 
Proc Format;
value yesno
1="yes"
0="no"
;
Run;
 
%tabulate_common(data=sashelp.bweight, out=test1, classlist=black married boy smoke);
%tabulate_common(data=sashelp.bweight, out=test2, classlist=black married boy smoke, listname=indicator, valformat=yesno., statistic=PCT);
%tabulate_common(data=sashelp.bweight, out=test3, classlist=black married boy smoke, listname=indicator, statistic=MEAN, var=weight);




This macro will create tabulations of variables at a single value over a series of by groups (e.g. a surveillance report of indicators where indicator = 1).

 
%macro tabulate_classby(data, out, bygroup, classlist, listvalue=1, statistic=N, var=);
 
*if datasetname is two parts, pull libname off separately;
%if %index(&data., .) 
%then %do; 
	  %let libname=%upcase(%scan(&data., 1)); 
	  %let memname=%upcase(%scan(&data.,-1)); 
%end;
%else %do; 
	  %let libname=WORK; 
	  %let memname=%upcase(&data.); 
%end;
 
*look up types of vars in classlist and confirm all are the same;
Proc SQL;
Create View classvars As
Select type,   count(distinct type)   as ntypes,
       length, count(distinct length) as nlengths
  From dictionary.columns
 Where LIBNAME="&libname." and MEMNAME="&memname."
   and findw("&classlist",strip(name), ' ', 'i')
;
Quit;
 
Data _null_;
 Set classvars end=lastvar;
 
if ntypes>1 or nlengths>1 then do;
  put "All classification variables are not of same length and type"; 
  abort cancel;
end;
else if lastvar then do;
  call symput('type',type);
  call symput('length',length);
end;
 
Run;
 
 
proc tabulate missing  
data=&data. 
 out=_uv_allfreq1 (drop=_PAGE_ _TABLE_)
order=data; 
class  &classlist. &bygroup.;
%if "&statistic"="N" 
%then %do;
tables (ALL &classlist.)*(%sysfunc(translate(&bygroup.,'*',' ')))/printmiss;
%end;
%else %do;
var &var.;
tables (ALL &classlist.)*(%sysfunc(translate(&bygroup.,'*',' ')))*&var.*&statistic./printmiss;
%end;
run;
 
%let classvars = %sysfunc(countw(&classlist.));
 
data _uv_allfreq2 (where=(common_range=&listvalue. or listvar="Total"));
 set _uv_allfreq1;
 
%if "&statistic"="N" 
%then %do;
keep &bygroup. listvar common_range N;
%end;
%else %do;
keep &bygroup. listvar common_range &var._&statistic.;
%end;
 
length listvar $32; *store original variable names;
%if "&type."="char" %then %do; length common_range $ &length.; %end;
%if "&type."="num"  %then %do; length common_range   &length.; %end;
 
array classvars &classlist.;
 
if findc(_type_,'1') <= &classvars. then do;
   listvar      = vname(classvars[findc(_type_,'1', -&classvars.)]);
   common_range = classvars[findc(_type_,'1', -&classvars.)];
end;
else do;
   listvar      = "Total";
end;
 
run;
 
Proc Sort Data = _uv_allfreq2;
       By &bygroup. listvar; 
Run;
 
proc transpose
data=_uv_allfreq2
 out=&out. (drop=_NAME_);
by &bygroup.;
id listvar;
%if "&statistic"="N" 
%then %do;
var N;
%end;
%else %do;
var &var._&statistic.;
%end;
run;
 
proc datasets lib=work nolist; delete _uv_allfreq1 _uv_allfreq2; quit;
 
%mend tabulate_classby;

And an example call:

 
%tabulate_classby(data=sashelp.bweight, out=test1, bygroup=ed, classlist=black married boy smoke,  listvalue=1, statistic=N);
 
data bweight2;
set sashelp.bweight;
 
low_bweight = (weight < 2500);
high_m_gain = (m_wtgain > 35);
heavy_smoke = (smoke=1)*(cigsper > 20); 
no_prenatal = (visit = 0);
 
run;
 
%tabulate_classby(data=bweight2, out=test2, bygroup=ed black married, classlist=low_bweight high_m_gain heavy_smoke no_prenatal,  listvalue=1, statistic=mean, var=weight);

Further reading