***********************************************************************; * This program computes the Pearson correlation using a * simple empirical bootstrap through repeated sampling with * replacement. The program was written by Michael Brannick. ***********************************************************************; data d1; input X Y; *********************************** * the program wants the two * variables side-by-side, so that * each person(observation) * is a row. ***********************************; array items X Y; do over items; if items eq . then delete; end; *********************************** * the above statements delete any * missing observations ***********************************; cards; 10 12 12 22 1 2 12 8 5 6 10 10 4 4 5 5 6 7 6 8 8 7 9 5 8 9 8 10 8 11 11 8 10 11 10 13 8 12 7 11 8 5 *proc print; proc iml; *********************************************************************; use d1 var{X Y}; **********************************************************************; * the dataset name that you want to analyze follows the word 'use'. * The name of the variables in the dataset are named after 'var'. **********************************************************************; read all var{X} into X; read all var{Y} into Y; Dat = X||Y; N = nrow(Dat); *print dat; ********************************************* * find observed correlation *********************************************; corr1 = corr(Dat); print 'Sample size is' N; print 'observed correlation is' corr1; *********************************************************************** *** Set up bootstrap estimates ***********************************************************************; iter = 10000; *the number of samples for the empirical sampling distribution; samdat = j(N,2,-9); out1 = j(iter,1,-9); * placeholder for estimates from each sample; do boot = 1 to iter; * main loop to compute number of estimates; ******************************************************************; do sam = 1 to N; * pick a sample with replacement; row = uniform(0)*N; row = round(row); if row < 1 then row = 1; if row > N then row =N; samdat[sam,] = Dat[row,]; end; * samdat should fill with data; *print g1dat g2dat; *******************************************************************; * Compute mean difference *******************************************************************; samcorr = corr(samdat); ******************************************************************; out1[boot,] = samcorr[2,1]; * Save estimate for the Nth sample; *print diff; end; *******************************************************************; * For a two-tailed test based on 10000 iterations, the extremes are * 2500 and 9750. *******************************************************************; * Find bootstrap confindence interval; rankvar = rank(out1[,1]); boot_low = out1[loc(rankvar=250),1]; boot_up = out1[loc(rankvar=9750),1]; print 'empirical (bootstrap) confidence interval' boot_low boot_up; print 'result is significant if empirical confidence interval DOES NOT contain zero'; *Output the results; Create out1 from out1; append from out1; data d2; set out1; proc univariate plot normal; quit; run;