As the first step in the decommissioning of sasCommunity.org the site has been converted to read-only mode.


Here are some tips for How to share your SAS knowledge with your professional network.


Macros for Data Review

From sasCommunity
Jump to: navigation, search

2013-Jan-11: see another example in Macro_CallText

SUGI23.p7.1998: DEMOXRPT: macros for writing Exception Reports: perform range and logic checks on a data set; write file of exceptions to edit and use for updates

Author: Ronald_J._Fehd

ABSTRACT

Data review can be viewed as a two-step process: 1. Compare: review differences between two data sets. 2. Exception Report: review data consistency with range and logic checks.

A data set may be updated with a file containing sets of these three statements -- ID, assignment and closure:

*  if ID = 1 then do;
* <variable name> = <value>;
*  end;

This paper reviews common problems in writing exception reports for range and logic checking. The product is a file which contains update commands in an easily editable form. Summary information is written at the end of the update file.

The output file is in this form:

if ID = 1 then do;
* error: Var_A gt 24;
* VAR_A  = 28;
end;

The message indicates why the value has been printed.

Macros

;/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
PROGRAM: DEMOXRPT: Exception Report demo, with macros ID and SHOW
 
DESCRIPTION: data _NULL_ report writer reads data set
             writes file of exceptions with brief summary
             writes summary report(s) of exceptions
             file to be used as %included file to update data set
             see also: %COMPARWS
 
MACROS:  ID      : prints: 1: ID line once, 2: exception message
         SHOW    : prints variable or array-reference
         CHKARRAY: check each value in array
         CHKFRMT : check one variable against its format: is in range
         CHKHIGH : check one variable is less than high
         CHKVRIFY: check one variable contains only specified chars
         NOBSVARS: returns NOBS and NVARS of data set
         VARTYPE : returns type of variable in ('C','N')
 
PROCESS: 0. make test data from cards, including formats
         1. set program parameters
         2. macro definitions
         3. data step: report writer
         3.1. initialize vars
         3.2. if testing, change values to invalid
           ***programmer-written section --------------------- begin
         3.3. list of exceptions
           ***programmer-written section ....................... end
         3.4  data step closure, brief summary
         4. summary report(s)
 
NOTES: ID and SHOW are methods for printing variables
       variable type is made available to these methods
       thru mac-function VARTYPE
       or   mac-vars created by CHKARRAY
 
KEYWORDS: "data _NULL_ report writer" "exception report"
          "object methods" "data checking" "data review"
 
AUTHOR: Ronald J. Fehd  1998
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*%*0. TEST DATA  disable after first run *--------------------------**/
*recommend MISSING and INVALID be set in AUTOEXEC.SAS;
*used by CHKFRMT;
%let MISSING=MISSING;
%let INVALID=INVALID;
 
proc FORMAT;%*note three sets of values (valid, blank/missing, invalid);
 value Nfour   100-400  ='OK'  . ="&MISSING." other="&INVALID.";
 value $Cthree '01','34'='OK' '.'="&MISSING." other="&INVALID.";
data DEMOXRPT;
infile cards;
 input @1 CID  $char2.       @1 NID       2.
       @3 CID2 $char1.       @3 NID2      1.
       @4 C1   $char1.       @5 N1        1.
       @6 C21  $char1.       @7 C22  $char1.       @8 C23  $char1.
       @6 N21       1.       @7 N22       1.       @8 N23       1.
      @30 C3   $char2.      @30 N3        2.
      @30 C4   $char3.      @30 N4        3.                     ;
cards;
011A1010101000               019
021A1010110101               129
031A1910101010               341
032A1010101010               341
;/*45678901234567890123456789012345678901234567890
    .    1    .    2    .    3    .    4    .    5*********************/
 
%*1. set program parameters;
 
%let TESTING  = 1;%*set output file to PRINT, see macro TESTDATA*;
%let TESTING  = 0;
 
%let ID1      = CID;%*id is character*;
%let ID1      = NID;%*id is numeric  *;
%let ID2      =;%*secondary id is blank*;
%let ID2      = CID2;
%let ID2      = NID2;
%let ID3      =;%*tertiary  id is blank*;
*LET ID3      = CID3;
*LET ID3      = NID3;
 
%let DATA_SET =    WORK.DEMOXRPT;%*testdata provided*, used by VARTYPE;
*LET DATA_SET = LIBRARY.XRPTDATA;
 
TITLE1 'DEMOXRPT: Exception Report: Data out of range or inconsistent';
TITLE2 "report from &DATA_SET.";
%let TITLEN = 2;%*used for numbering titles in summary report(s)*;
 
%let PRNTFILE = "C:\SAS\ZXRPT.SAS";
*LET PRNTFILE = PRINT;%*TESTING=1 sets PRNTFILE to PRINT in TESTDATA;
 
options linesize = 95   pagesize = 54;*SAS ms 10 portrait duplex:long;
options /*NO*/mprint nocenter pageno=1;
 
%*2. macro definitions;
 
%macro ID(MSG);%* print id line, msg, returns 7 statements -----------*;
if not ID then do;
   if LynzLeft le 4 then put _page_;
                         put "*;if &ID1 = " QID1 +B1 &ID1 +B1 QID1
   %if &ID2 ne %THEN         " and &ID2 = " QID2 +B1 &ID2 +B1 QID2 ;
   %*F &ID3 ne %THEN         " and &ID3 = " QID3 +B1 &ID3 +B1 QID3 ;
                             " then do;";
                         XrptIds + 1; ID = 1;
   *if not ID*;
   end;
put "* &MSG.;";
%mend ID;
 
%macro SHOW(VAR);%* print var or array-ref, returns 7 statements -----*
VAR: variable name  : TYPE is supplied by function %VARTYPE
                      var-name is direct reference
     array reference: array-name{I}
                      TYPE is supplied by mac-vars from MAKARRAY
                      var-name comes from vname ......................*;
%local C_BRACKT;%*curly-bracket: array-reference*;
%let C_BRACKT = %index(&VAR,{);
%if &C_BRACKT %then %do;
    call vname(&VAR,Name);
    %end;
%else %DO;
    Name = "&VAR.";
    %end;
 
if
   %if &C_BRACKT %THEN "&&&%substr(&VAR,1,&C_BRACKT-1)";
   %else                %VARTYPE(&VAR);
   = 'C' then do;
   Q = "'";
   ValueNum =  .   ;
   ValueChr = &VAR.;
   end;
else do;
   Q = '';
   ValueChr = '.'  ;
   ValueNum = &VAR.;
   end;
put @1 "*" @3 Name @12 "= " Q +B1 &VAR. +B1 Q  ";";
/*put @1 "*" @3 Name @12 "= " Q +B1 '.'       Q  ";*set to missing?;";*/
XrptCels + 1;
Xrpts + 1;
output XRPT;
%if &TESTING %THEN %PUT SHOW:VAR=<&VAR.>;
%mend SHOW;
 
%macro CHKARRAY(ARAYNAME,C_N,ELEMENTS
,VALUE=%NRSTR('0','1',' ','.'));%* returns 3+7+7+2=19 statements------*;
%local &ARAYNAME;%*local mac-var available to SHOW;
%let   &ARAYNAME = &C_N.;
array &ARAYNAME {*} %if "&C_N" = "C" %THEN $; &ELEMENTS;
 
do I = 1 to dim(&ARAYNAME.);
   if not(&ARAYNAME.{I} in (&VALUE.))
      then do;
      %ID(array &ARAYNAME not in &VALUE);
      %SHOW(&ARAYNAME.{I});
      end;
   %*do I=1:dim*;
   end;
%if &TESTING %THEN %PUT TYPE(&ARAYNAME)=&&&ARAYNAME;
%mend CHKARRAY;
 
%macro CHKFRMT(VAR,FORMAT);%* returns 1+7+7+1=16 statements ----------*
INVALID is set in either AUTOEXEC or proc FORMAT program, see test data;
if put(&VAR.,&FORMAT.) eq "&INVALID."
   then do;
   %ID(ChkFrmt: &VAR. not in &FORMAT);
   %SHOW(&VAR.);
   end;
%mend CHKFRMT;
 
%macro CHKHIGH(VAR,TYPE,HI);%* returns 1+7+7+1=16 statements ---------*;
if
   %if       "&TYPE" = "C" %THEN (&VAR ne "") and (&VAR. ge "&HI");
   %else %if "&TYPE" = "N" %THEN (&VAR ne . ) and (&VAR. ge  &HI );
   then do;
   %ID(ChkHigh: &VAR. ge &HI);
   %SHOW(&VAR.);
   end;
%mend CHKHIGH;
 
%macro CHKVRIFY(VAR,RANGE);%* returns 1+7+7+1=16 statements ----------*;
if verify(&VAR.,"&RANGE.") then do;
   %ID(ChkVrify: other than <&RANGE.>);
   %SHOW(&VAR.);
   end;
%mend CHKVRIFY;
 
%MACRO NOBSVARS(NAMENOBS,NAMENVAR,DATA=.);%*SAS Macro Ln ref 1e pg 242;
%if &DATA = . %THEN %let DSN = &SYSLAST.;%*resolve mac-vars in name;
%else               %let DSN = &DATA.;
%let DSID = %sysfunc(open(&DSN));
%if &DSID %then %do;
    %let &NAMENOBS = %sysfunc(attrn(&DSID,NOBS));
    %let &NAMENVAR = %sysfunc(attrn(&DSID,NVARS));
    %let RC        = %sysfunc(close(&DSID.));
    %end;
%else %put Open for data set &DATA. failed - %sysfunc(sysmsg());
%put data=&DATA. &NAMENOBS=<&&&NAMENOBS> &NAMENVAR=<&&&NAMENVAR>;
%mend NOBSVARS;
 
%*global *used in EndoFile summary;
%let DATANOBS=;
%let DATAVARS=;
 
%NOBSVARS(DATANOBS,DATAVARS,DATA=&DATA_SET.);
 
%MACRO VARTYPE(VARNAME);/*returns "C" or "N" -------------------------*/
%str(vartype(open(%upcase("&DATA_SET"),"I"),
     varnum (open(%upcase("&DATA_SET"),"I"),"&VARNAME")))
%mend VARTYPE;
 
 
%*3.1. initialize report writer vars;
DATA XRPT(keep = &ID1 &ID2 &ID3 Name ValueChr ValueNum N);
 B1=-1;%*pointer control*;
 format _all_;%*turn off all formats;
 length Name $ 8 ValueChr $ 6 ValueNum 8 Xrpts 4 ID 3;
 retain XrptIds    /*incr by ID,   used in EndoFile summary  */
        XrptCels 0 /*incr by SHOW, used in EndoFile summary  */
        N        1 /*no change,    used in summary report(s) */
        QID1 QID2 QID3 '';
  if %VARTYPE(&ID1) = 'C' then QID1 = "'";%*used by ID;
  if %VARTYPE(&ID2) = 'C' then QID2 = "'";%*disable if unused;
%*if %VARTYPE(&ID3) = 'C' then QID3 = "'";%*disable if unused;
 
 file &PRNTFILE  Linesleft = Lynzleft;
 do until(EndoFile);%*------------------------------------------------*;
  set &DATA_SET end = EndoFile;
  ID    = 0; %*reset by ID *;
  Xrpts = 0; %*incr by SHOW*;
 
%*3.2. if testing, change values to invalid;
%macro TESTDATA;%*----------------------------------------------------*;
%if &TESTING %then %do;
    file PRINT Linesleft = Lynzleft;
    *where &ID1 le 2;
    *C1 = 'Z';
    *C22 = 'Z';
    %end;
%mend TESTDATA;
 
%TESTDATA;
 
%*programmer-written section************************************* begin;
 
%*3.3. list of exceptions;
%CHKVRIFY(C1,AB);
%CHKARRAY(C2,C,C2:);
%CHKARRAY(N2,N,N2:,VALUE=%NRSTR(0,1));
%CHKFRMT (C3,Cthree.);
%CHKFRMT (N4,Nfour.);
%CHKHIGH (C4,C,300);
%CHKHIGH (N4,N,300);
%CHKVRIFY(C4,012345678);
/****************************************/
 
%*programmer-written section*************************************** end;
 
%*3.4  data step closure, brief summary;
 if ID then  put "*;end;*" Xrpts= &ID1= &ID2= /*&ID3=*/ ";";
%*............................................ do until(EndoFile)*;
end;
 
%let DATACELS = %eval(&DATANOBS * &DATAVARS);
PcntNobs = 100 * XrptIds  / &DATANOBS;
PcntCels = 100 * XrptCels / &DATACELS;
format PcntNobs PcntCels 7.3;
retain          Z1 11             Z2 21            Z3 31;
put "**smry :" @Z1 "Ids"         @Z2 "Vars"       @Z3 "Cells;";
put "** data:" @Z1 "&DATANOBS"   @Z2 "&DATAVARS"  @Z3 "&DATACELS   ;";
put "** xrpt:" @Z1   XrptIds                      @Z3   XrptCels " ;";
put "** pcnt:" @Z1   PcntNobs "%"                 @Z3   PcntCels "%;";
stop;
run;
 
%macro PRNTSMRY();
%local SMRYIDS SMRYNAME SMRYVARS;%*----------------*;
%*5. print desired summary report(s);
 
%let SMRYIDS =1;%*?print FREQ of IDS?;
%let SMRYNAME=1;%*?print detail  of variables, by name?;
%let SMRYVARS=1;%*?print FREQ of variables with exceptions?;
 
%if "&PRNTFILE" ne "PRINT" %then %do;
    options pageno=1;%*--------------;
 
    %if &SMRYIDS %then %do;
        proc FREQ data = XRPT;
        %*tables &ID1              /list;
          tables &ID1 * &ID2       /list;
        %*tables &ID1 * &ID2 * &ID3/list;
        TITLE%eval(&TITLEN.+1) "summary: IDs listed";
        %end;
 
    %if &SMRYVARS %then %do;
        proc FREQ data = XRPT;
        tables Name         ;
        TITLE%eval(&TITLEN.+1) "summary: Variables listed";
        %end;
 
    %if &SMRYNAME %then %do;
        proc SORT data = XRPT;
        by  Name ValueChr ValueNum &ID1. &ID2. &ID3.;
 
        proc PRINT data = XRPT;sum N;
        by Name; id Name;
        TITLE%eval(&TITLEN.+1) "detail: by Var-Name";
        %end;
    %* *IF "&PRNTFILE" ne "PRINT";
    %end;
%if not &TESTING %then %do;
    %*print exception report written to file*;
    data _NULL_;
    options pageno=1;
     TITLE%eval(&TITLEN.+1);
     file PRINT;
     do until(EndoFile);
        %local LRECL;
        %let LRECL = 72;
        infile &PRNTFILE end = EndoFile pad lrecl = &LRECL.;
        input @1 Line $char&LRECL..;
        put   @1 Line $char&LRECL..;
        %*do until(EndoFile)*;
        end;
    stop;
   %end;
run;
%mend PRNTSMRY;
%PRNTSMRY;

References

--Ronald_J._Fehd macro.maven == the radical programmer 05:13, 26 June 2012 (EDT)