Skip to content

Instantly share code, notes, and snippets.

@shiyuangu
Last active November 10, 2017 18:27
Show Gist options
  • Save shiyuangu/ec31b0428a8c51ce008d8f5859385174 to your computer and use it in GitHub Desktop.
Save shiyuangu/ec31b0428a8c51ce008d8f5859385174 to your computer and use it in GitHub Desktop.
SAS Notes

exploratory analysis

find a column in a dataset

data _null_; 
dsid=open('outpath.test'); 
check=varnum(dsid,'col_name'); 
if check=0 then put 'Variable does not exist'else put 'Variable is located in column ' check +(_1) '.';
run;

query schema table

/*find the nvar and nobs*/
proc sql; 
select nvar, nobs  
from dictionary.tables
where libname='mylibname' and memname in ('dataset1','dataset2'); 
quit;

Summary stat

* Note that n is number of non-missing values;
proc means data=dev n nmiss min mean median max;
weight weight1;
var &varlist;
output out=dev_profile;	    
run;

Remove missing values

data CompleteCases;
   set A;
   if nmiss(of _NUMERIC_)=0;    /* output complete cases for all numeric vars; nmiss converts char to num. */
run;
data CompleteCases; 
  set Sashelp.Heart;
  if cmiss(of _ALL_)=0;  /* complete cases for all vars; cmiss doesn't convert char/num; */
run;

Manipulation Data

Keep appending rows to a dataset

/*create an initial empty dataset; note that this create an extra artificial column; TODO:this might be a better way */
data outlib.rv;
run; 

%do %until(...);
/*generate a tmp dataset with the extra rows*/
data outlib.rv; 
set outlib.rv tmp; 
run; 

data outlib.rv; 
set outlib.rv; 
if _n_=1 then delete; /*remove the empty row*/
run; 

%end;

split dataset according to data type

proc contents data=inlib.indata out=outlib.contout;
run; 
data outlib.out1 outlib.out2; 
set outlib.contout; 
if type=1 then output outlib.out1; /*type=1 is numerics which includes date and time*/
if type=2 then output outlib.out2; /*type=2 is char*/
run; 

write some data to file

data _null_; 
file "&path/output.sas" mod; 
set indata end=eof; 
if _n_=1 then "%"'let var_list=';
put var; 
if eod then put ";"; 

Advanced Macro

How to loop over all variabales:

%MACRO compare_all_vars(df1=,df2=,vars_skip=);

proc contents data=&df1.(drop=&vars_skip.) out=contout;
run; 

proc sql noprint inobs=100;
select name into :varnames separated by " " 
from contout 
where type=1;  /*type 1 indicate numerics variables*/

%PUT &varnames.;
%let i=1; 
%do %until(%scan(&varnames,&i) eq );
	%let newvar = %scan(&varnames.,&i.); 
	%let i=%eval(&i+1);
	%do_somthing(newvar); 
%end;

%MEND;

generate a list of macro vars based on data

DATA _NULL_;
       SET indata(where=(name ne "&name." and val ne "&val."));
       CALL SYMPUT('VAR'||LEFT(PUT(_N_,4.)),name); /*think of key-value pair: VARi=name where name is the value of the variable name in current row*/
       CALL SYMPUT('NVARS',_N_);   /*this count the total*/
RUN;
/*this is how to use the list of macro variables*/
%DO I=1 %TO %EVAL(&NVARS);
DATA tmp; 
length varname $32; 
set &prefix..&&VAR&I; /*&&VAR&I is replaced by the value of VARi* /
run;
data outdata; 
set outdata tmp; 
run; 
%end; 
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment