Macro Condes

From sasCommunity
Jump to: navigation, search

This is the code for the condes macro function.

/******************************************************************************

condes macro: computes a line by line summary of each variable on a SAS dataset.


Parameters:

dataset - name of dataset, including libname if other than work.
pdf      - a pdf version of the condes is generated (default Y)
html     - an html version of the condes is generated (default N)

Output:
output - a text file with name of dataset and .condes extention
       - a pdf file with name of dataset and .pdf extension
       - a html file with name of dataset and .html extension

Example: %condes(nhs)
         %condes(libnhs.nhs)
         %condes(libnhs.nhs,html=y)
         %condes(libnhs.nhs,html=y,pdf=n)

******************************************************************************/

%macro condes(dataset,pdf=y,html=n) ;

option nocenter nodate nonumber;

%let pdf = %upcase(&pdf); %let html = %upcase(&html);

%if %index(&dataset,.) = 0 %then %let dataset_name = %scan(&dataset,1);
                           %else %let dataset_name = %scan(&dataset,2);

proc contents data=&dataset noprint out=contents (keep=name type length varnum label format formatL);
proc sort; by name;

* Find number of character and numeric variables;
data _null_ ;
    retain c n 0;
    set contents end=done; 
    if type = 1 then do;
        n + 1;
    end;
    else if type = 2 then do;
        c + 1;
        call symputx('c' || left(put(c,5.)),name) ;
    end;
    if done eq 1 then do;
     call symputx('n',left(put(n,5.))) ;
     call symputx('c',left(put(c,5.))) ;
    end;
run;

%if &n > 0 %then %do; 
proc means data=&dataset noprint ;
    output out=count n= ;
    output out=mean mean= ;
    output out=std std= ;
    output out=min min= ;
    output out=max max= ;
run;

data stats;
    set count mean std min max;
    drop _freq_ _type_;
run;

proc transpose data=stats out=stats name=name;
run;

data stats;
    length name $32;
    set stats (rename = (col1=n));
run;
proc sort; by name;
%end;

* Get statistics for character variables;

%if &c > 0 %then %do ;
data charstats;
    set &dataset (keep = _char_)  end=done;

retain _min1-_min&c 'FFFFFFFFFFFFFFFF'X ;
retain _max1-_max&c '0000000000000000'X ;
retain _num1-_num&c 0 ;

length name $32.;

%do i = 1 %to &c ;
      if &&c&i ne ' ' then do ;
          _num&i = _num&i + 1 ;
      if &&c&i gt _max&i then _max&i = &&c&i ;
      if &&c&i lt _min&i then _min&i = &&c&i ;
      end ;
%end ;

* Output one row per character variable with accompanying stats;
if done eq 1 then do ;
    %do i = 1 %to &c ;
        name = "&&c&i" ;
        if _num&i eq 0 then max = '        ' ; else max = _max&i ;
        if _num&i eq 0 then min = '        ' ; else min = _min&i ;
        n = _num&i ;
        output ;
    %end ;
end ;
keep name n max min ;
run; 
%end;

%local charstats;
%if &c > 0 %then %let charstats = charstats;
%local stats;
%if &n > 0 %then %let stats = stats;

* Put stats for numeric and char variables together and add meta data;
* All statistics will be character data eventually;
data stats;
    retain maxvw 8 maxdw 12 ;
    merge &stats &charstats contents end=done; by name;

* Apply date format to date variables - values will be character;

if type ne 2 then do;
    if format = 'MMDDYY' then do;
        mean = put(round(col2),mmddyy8.);
        min  = put(col4,mmddyy8.);
        max = put(col5,mmddyy8.);
    end;
else if format = 'DDMMYY' then do;
    mean = put(round(col2),ddmmyy8.);
    min  = put(col4,ddmmyy8.);
    max = put(col5,ddmmyy8.);
end;
else if format = 'DATE' then do;
    mean = put(round(col2),date7.);
    min  = put(col4,date7.);
    max = put(col5,date7.);
end;
else do;
* Apply best format to rest of numeric variables - values will be character;
  mean = put(col2,best8.);
  min = put(col4,best8.);
  max = put(col5,best8.);
end;
* Apply best format to SD all numeric variables;

std = put(col3,best8.);
end;

* Compute skipvar for inserting blank lines every 5th row in report;
skipvar = int((varnum-1)/5);
length format_full $10.;


if formatL = 0 and format ne ' ' then format_full = cats(format,'.'); else
if formatL > 0 then format_full = cats(format,put(formatL,2.),'.'); 

length type_var $5. ;
if type = 1 then type_var = cats('N',put(length,3.));
if type = 2 then type_var = cats('C',put(length,3.));
if formatL > 0 then format_full = cats(format,put(formatL,2.),'.'); 

* Compute maximum length of variables names and labels so report is condenced;
maxvw = max(maxvw,length(name));
maxdw = max(maxdw,length(label));

if done = 1 then do;
    maxvw = max(maxvw,12);
    maxdw = max(maxdw,20);
    call symputx('maxvw',put(maxvw+1,2.));
    call symputx('maxdw',put(maxdw,3.));
end;
keep name length type_var label format_full skipvar formatL varnum n mean std min max ;
run;


* Sort so variables are in original order;
proc sort; by varnum;

* Rename some variables so proc report does not get confused;
proc datasets;
    modify stats;
    rename n = xn;
    rename mean = xmean;
    rename std = xstd;
    rename min = xmin;
    rename max = xmax;
run;

* define template style for pdf and html file;
option orientation=landscape;
proc template;
    define style styles.condes;
    parent = styles.journal;
    class fonts / 'docFont' = ("Courier",8pt);
    class output/ padding = 5;
end;
run;

* Figure out linesize for text report - cant be more than 256;
* Truncate label if needed to keep under 256;
%let linesize = %eval(81 + &maxvw + &maxdw) ;

%if &linesize > 256 %then %do;
%let linesize = 256;
%let maxdw = %eval(256-81-&maxvw);
%end;

option linesize =  &linesize pagesize=64;
ods listing close;
ods listing file = "&dataset_name..condes"  ;
%if &pdf = Y %then %do; 
    ods pdf  file = "&dataset_name..pdf" style=condes;
%end;
%if &html = Y %then %do; 
    ods html file = "&dataset_name..html";
%end;

proc report data=stats nowindows headline style(header) = [just=left] formchar(2)='-';
    column skipvar varnum name type_var format_full label xn xmean xstd xmin xmax;
    define skipvar / noprint order 'Seq';
    define varnum / 'Seq';
    define name / 'Name';
    define type_var/ 'T';
    define format_full / 'Format';
    %if &maxdw = 20 %then %do;
        define label / 'Variable Label' style(column) = [cellwidth=1.5in] ; 
    %end;
    %else %do;
        define label / 'Variable Label'  ; 
    %end;

    define xn / 'N';
    define xmean / 'Mean';
    define xstd / 'Std Dev';
    define xmin / 'Minimun';
    define xmax / 'Maximum';
    compute after skipvar;
    line ' ';
    endcomp;
    format name $&maxvw.. label $&maxdw..;
    format varnum 4.;
    title "Condescriptive of: &dataset_name";
run;
%if &pdf = Y %then %do;
    ods pdf close;
%end;
%if &html = Y %then %do;
    ods html close;
%end;
ods listing close;
ods listing;

proc datasets;
    delete stats;
run;
%if &c > 0 %then %do;
proc datasets;
    delete charstats;
run;
%end;

%mend;