SAS tips: October 2007

Saturday, October 27, 2007

format

/* formats.sas7bcat is saved under h:\temp */


libname ex 'h:\temp';

proc format library=ex;
  value sev 1='mile'
            2='moderate'
            3='severe'; 
run;

/* To use the permanent format */
libname ff 'h:\temp';
options fmtsearch=(ff);

SAS/IML - something worth to remember

# sasdataset -> matrix
use sas_data;
read all into M; /* read all variables into a matrix M;
print M;

# matrix -> sasdataset
proc iml;
M={1 2 3, 4 5 6, 7 8 9};

create sas_data from M;
append from M
close sas_data

quit;

# pi
pi=constant('PI');

# rank of a matrix
rank=round(trace(ginv(a)*a));

proc glmpower


data exemplary;
  do variety=1 to 2;
    do exposure=1 to 3;
      input height @@; output;
    end;
  end;
datalines; /* expected population means */
14 16 21 
10 15 16
run;

proc glmpower data=exemplary;
  class variety exposure;
  model height=variety|exposure;
  contrast "variety" variety 1 -1;
  contrast "exp 1 vs 3" exposure 1 0 -1;
  contrast "inter" variety*exposure 1 1 1, -1 -1 -1;
  power stddev=5
  ntotal=60
  power=.;
  plot x=n min=30 max=90;
run;

proc power - sample size & power

/* one sample t-test */

* power = ? ;
proc power;
onesamplemeans
alpha=0.05
sides=2
nullm=20
mean=22
stddev=4
ntotal=44
power=.;
run;

* sample size = ? ;
proc power;
onesamplemeans
alpha=0.01
sides=u /* U: upper one-sided, L: lower one-sided
nullm=20
mean=22
stddev=4
ntotal=.
power=0.8;
run;

/* paired t-test */
proc power;
pairedmeans test=diff
alpha=.01
sides=2
meandiff=3
stddev=3.5
corr=.2
npaires=20 30 40
power=.;
run;

/* independent t-test */
proc power;
twosamplemeans
meandiff=3 to 4 by .5 /* the same as 3 3.5 4 */
stddev=8 to 9 by .5
groupweights=(1 1)
power=0.8
ntotal=.;
plot y=power min=0.5 max=0.99;
run;

* a different way;
proc power;
twosamplemeans
groupmeans=(13 14) (13 14.5) (13 15) /* same as 13|14 14.5 15 */
stddev=1.2 1.7
groupweights=1|1 2 3 /* same as (1 1) (1 2) (1 3) */
power=0.8
ntotal=.;
run;

* Power vs Effect Size
proc power;
twosamplemeans test=diff
meandiff=0 to 2.5 by 0.5
stddev=.5657 1.0 1.4318
power=.
npergroup=10;
plot x=effect interpol=join;
run;

/* Multiple Regression */
proc power;
multreg
model=random
nfullpredictors=7
ntestpredictors=1
partialcorr=0.35
ntotal=100
power=.;
plot x=n min=50 max=150;
run;

/* One-way ANOVA */
proc power;
onewayanova test=overall
alpha=.05
groupmeans=(5 7 3 11)
stddev=4 5 6
npergroup=10 15
power=.;
run;

/* Normal Approximation to test a proportion */
proc power;
onesamplefreq test=z method=normal /* test=adjz with continuity corrrection */
sides=1 /* one-sided */
alpha=.05
nullproportion=0.3
proportion=.2
ntotal=.
power=.8;
run;

/* Fisher's exact test */
proc power;
twosamplefreq test=fisher
proportiondiff=0.10 to 0.15 by 0.01
refproportion=.2
npergroup=150
power=.;
run;

/* LR Chi-square Test for Two Proportions */
/* test=pchi for Pearson Chi-square Test for Two Proportions */
proc power;
twosamplefreq test=lrchi
proportiondiff=0.10 to 0.15 by 0.01
refproportion=.2
npergroup=150
power=.;
run;

/* Correlation */
proc power;
onecorr dist=fisherz
npvars=6
corr=.35
nullcorr=.2
sides=1
ntotal=100
power=.;
run;

/* comparing 2 survival curves */
proc power;
twosamplesurvival test=logrank
gexphs=0.3567 | 0.5978 .6931
grouplossexphazards=(0.3567 0.3567)
accrualtime=1
followuptime=1
groupweights=(1 2)
power=.
ntotal=225;
run;

/* TOST */
proc power;
twosamplemeans test=equiv_ratio
lower=.8
upper=1.25
meanratio=1 1.2
cv=.1 .2 .3
npergroup=.
power= .8 .9;
run;

Simple ODS

# Trace output
ods trace on / label;

proc mixed;
...
run;

ods trace off;

# use it
ods select none;
ods output LSMeans=lsm;
proc mixed;
...
run;
ods select all;

Thursday, October 25, 2007

first. & last.

/* For each family, ped_id starts with 1 */
data pedigree_id;
set family;
by id;
if first.id then ped_id=1;
else ped_id+1;
run;

/* using proc sql */
proc sql;
create table pedigree_id as
select family.*, monotonic() as _n_, calculated _n_-min(calculated _n_)+1 as ped_id
from family
group by id;
quit;

Small Tips

# print first 10 observations only
proc print data=aa (obs=10);

# apply more than 2 restrictions
proc print data=aa (keep=sex grade where=(sex='m'));

# substr
substr(var, position, length)

# get rid of duplications
proc sort data=aa nodupkey;
by id;

# merge summary statistics
data combined;
if _n_=1 then set summary;
set detail;
run;

# coalesce & coalescec : first non-missing
x = coalesce( ., 42, 52)
x = coalescec ('', 'Goodbye', 'Hello')

# get rid of characters after '.' or '('
loc=indexc(crfpage,'.','(');
if loc>0 then crfpage=substr(crfpage,1,loc-1);


# combine strings 
data comb_txt;
  input id $4. +1 categry $7.;
  aa = compress(categry," ,");
  n=length(aa);
  cat=substr(aa,1,1);
  do i=2 to n;
    cat=trim(left(cat))||", "||substr(aa,i,1); 
  end;
  cards;
0149 2, 5
0148 7,
0150 8, 5
0151 3, 4, 6
0152

proc freq

## proc freq oder = data;

## No column totals, row totals and percent
tables var1*var2 / nocol norow nopercent;

## Odds Ratio and Relative Risk
tables var1*var2 / CMH;

## McNemar's test and Kappa
tables var1*var2 / AGREE;

## obtain cell counts
proc freq data= data_name;
tables var1*var2*var3 / sparse out= out_name;

Data Step

/* database file */ filename class 'c:\data\class.dbf'; proc dbf db4=class out=class; run; /* MS access file */ libname file 'c:\data\filename.mdb'; proc print data=file.table; run; /* proc import */ /* DLM indicates a delimited file with the default delimiter of a blank */

PROC IMPORT
  datafile='pathname' out=mylib.data dbms=dlm replace;
  delimiter='!';
  getnames=yes;

/* csv file */
PROC IMPORT
  datafile='pathname' out=mylib.data dbms=csv replace;
  getnames=yes;

/* proc export (csv) */ proc export data=sasuser.houses outfile="/myfiles/class.csv" dbms=csv; /* input */ input (x1-x35)($1.); input name $10. +3 x1 7. x2 6. x3 6.; input (name x1-x3)($10. +3 7. 2*6.); input (name x1-x3)($10. +3 7. 2*6.) (y1-y5)(2*6. 3. 2*8.); /* character -> numeric */ input(num, 8.0); /* numeric -> character */ left(char); put(char, 1.) /* input 2*3 table */

do i='No','Yes';

do i=1,2,3,4;

data twobythree;
  do i = 1 to 2;
    do j =1 to 3;
      input x @@; output;
    end;
  end;
cards;
11 12 13 21 22 23
run;

/* datastep for 1 obs for a line */
data methods;
  input irrig $ @@;
  do bloc=1 to 8;
    input fruitwt @@;
    logfwt=log(fruitwt); output;
  end;
  datalines;
trickle 450 469 249 125 280 352 221 251
basin 358 512 281 58 352 293 283 186
spray 331 402 183 70 258 281 219 46
sprnkler 317 423 379 63 289 239 269 357
flood 245 380 263 62 336 282 171 98
run;

/* generate 2 datasets */
data males females;
  set class;
  if sex='M' then output males;
  else output females;
run;


/* Missing */
data class;
  set aa;
  array x _numeric_;
  do over x;
    if x=999 then x=.; /* missing */
  end;
run;

/* random number */
data random(drop=n);
  do n=1 to 10;
    x=uniform(0); /* 0 means the seed number depends on current time */
    output;
  end;
run;

/* random number generator */ rannor(seed); ranuni(seed); uniform(seed); ranbin(seed,n,p); rancau(seed); ranexp(seed); rangam(seed, alpha); ranpoi(seed, lambda); rantbl(seed, p1, p2...,pn);

SAS tips