************************************************** * A monte carlo program to sample studies from * existing meta analyses taken from JAP, PPsych * and Academy of Mgmt Journal. **************************************************; * grab data on meta analyses from Excel **************************************************; PROC IMPORT OUT= WORK.meta DATAFILE= "\\file1.cas.usf.edu\home\mbrannic\Desktop\all_metas_11.17.xls" DBMS=EXCEL REPLACE; SHEET="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES; RUN; *output average n skew and kurtosis by study; * output datset is nbar; proc means data= work.meta mean kurtosis skew nway; class MA; var N; output out=nbar mean=m_nbar kurtosis=k_nbar skew=s_nbar; * get the mega distribution of the statistics from each M-A; *proc univariate data=nbar plot; *var m_nbar k_nbar s_nbar; *get medians; proc means data=nbar median nway; var m_nbar k_nbar s_nbar; *we want to group the meta-analyses into four groups based on average sample size and on skew of sample size; *median splits; Data conditions; set nbar; if m_nbar le 168.5689655 then nbardi=0; if m_nbar gt 168.5689655 then nbardi=1; *proc print; *look at the design; *proc freq data=conditions; *tables skewdi*nbardi; *Run; *make groups; data conditionsrevised; set conditions; if nbardi=0 then group=1; if nbardi=1 then group=2; keep ma group; *proc print; *descriptives by group; *proc means data=conditionsrevised; *class group; *var m_nbar k_nbar s_nbar; *we want each meta-analysis represented by a row of sample sizes; *dropping variables and transposing; data dropped; set work.meta; keep MA n; proc sort data=dropped; by MA; proc transpose data=dropped out=drop2; by MA; run; *this has produced a matrix with MA number and then the number of sample sizes * contained in the study, one study per column; * print; *merge all info; data all; merge drop2 conditionsrevised; by MA; *proc print data=all; Proc sort data=all; by group; data final; set all; keep col1-col97 group; *proc print; data group1; set final; if group=1; *proc print; data group2; set final; if group=2; *proc print; run; * read data into a matrix for each group; proc iml; use group1 var _ALL_; read all var _ALL_ into x1; numbmetas1= nrow(x1); numbcol1= ncol(x1)-1; use group2 var _ALL_; read all var _ALL_ into x2; numbmetas2= nrow(x2); numbcol2= ncol(x2)-1; *print x1 x2 x3 x4; *print numbmetas1 numbcol1 numbmetas2 numbcol2 numbmetas3 numbcol3 numbmetas4 numbcol4; * set up bootstrap estimates; r= {1 .0, .0 1}; s = r; ***********************************; *set population values for rho and SDrho; * rho values .10, .22 and .44; * SDrho values .02, .11, .18; Prho=.10; PSDrho=.02; **************************************************; maxrho = .96; flag1 =0; *counter for number of times maxrho is exceeded; rhocount = 0; *counter for total number of rho values sampled; maxr = .99; flag2 =0; *counter for number of times maxr is exceeded; rcount = 0; *counter for total number of r values sampled; ***********************************; *Input the number of iterations or simulated meta-analyses, i; i=5000; print Prho PSDrho maxrho maxr i; out10 = j(i,14,-9); *placeholder for results of a meta-analysis; Y = j (i,28,-9); * Y is the placeholder for all the outputs from all the groups ************************************************************************************** * 1. first, we have to pick the group we want to sample from. * 2. then, we have to pick the study from the group. * 3. then we have to find the number of samples in the study. * 4. then we have to sample that number of samples of the given sizes for that study **************************************************************************************; * 1. Pick the group; **************************************************************************************; p=2; do group = 1 to p;* p is the number of iterations and group is its counter; * group is the main loop for different groups; if group = 1 then x = x1; if group = 2 then x = x2; *********************************** **************************************; * start to randomly pick meta from the chosen group; **************************************; do l = 1 to i; * i is the number of iterations and l is its counter; * l is the main loop; *print x; **********************************************************************************; * 2. Choose the study from the group; **********************************************************************************; rowcount = nrow(x); * find the number of rows in X; *print rowcount; study = round(uniform(0)*rowcount+.5); *find a random number from 1 to the number of studies; if study < 1 then study = 1; if study > rowcount then study = rowcount; *limit the values of study just in case; *print group study; ***********************************************************************************; * 3. find the number of samples in the study ***********************************************************************************; samples = x[study,]; *print samples; Ns = 0; do a = 1 to ncol(x) until (samples[,a]<4); Ns = a-1; if Ns = 98 then Ns = 97; end; holder1 = j(Ns,2,-9); *placeholder for the correlations for this meta-analysis; *********************************************************************************** * Ns is the number of studies in the chosen meta analysis ***********************************************************************************; * 4. Sample the number of studies of the given sample sizes from the population * of interest; ***********************************************************************************; sizes = samples[,1:Ns]; *pick the columns with nonzero sample sizes; sizes = sizes`; *transpose it to be a column vector; *print sizes; **************************************************************************************** * Now we can begin the main loop, by sampling data for a meta-analysis ****************************************************************************************; *Find data for one study; *****************************************************; **Set the sample size to be the first column in the sample1 matrix *****************************************************; k = Ns; *print k; do h=1 to k; N = sizes[h,1]; dat1=normal(j(N,2,0)); *print N dat1; ***********************************; * Sample rho from a distribution; * Note the distribution was set up above; **********************************; s[2,1]=Prho+normal(0)*PSDrho; *set the population correlation to be the mean plus a random deviate (RE case); if s[2,1] > maxrho then flag1 = flag1+1; if s[2,1] < (-1*maxrho) then flag1 = flag1+1; if s[2,1] > maxrho then s[2,1]= maxrho; if s[2,1] < (-1*maxrho) then s[2,1]=(-1*maxrho); *set the population mean to a smaller value if too large; rhocount=rhocount+1; s[1,2] = s[2,1]; *********************************** * compute the sample correlation * ***********************************; dat = dat1*half(s); *print s dat; corr1=corr(dat); *print corr1; if corr1[2,1] > maxr then flag2 = flag2+1; if corr1[2,1] > maxr then corr1[2,1]=maxr; rcount = rcount+1; holder1[h,1]=corr1[2,1]; holder1[h,2]=N; *COMPUTE Hotelling's transformation; *out1[h,4]=(corr1[2,1]-((1/(2*(N-3)))*(corr1[2,1]*(1-(corr1[2,1]##2))))); *COMPUTE Fischer's r to z transformation; *out1[h,5]=.5*log((1+out1[h,4])/(1-out1[h,4])); end; *end loop to create data for one meta analysis; *print holder1; ****************************************************************************** * Holder1 has the data for 1 meta analysis ******************************************************************************; ******************************************************************************* * Compute meta analyses *******************************************************************************; *********************************** **Schmidt and Hunter Bare Bones***; ***********************************; out1=holder1; N = out1[,2]; sumn=N[+,]; obsr = out1[,1]; Nr= obsr`*N; SHaver=Nr/sumn; varr1= obsr - SHaver; varr2=N`* varr1##2; varr=varr2/sumn; samperr = (1-SHaver**2)**2/((sumn/k)-1); SHresr=varr-samperr; if SHresr < 0 then SHresr = 0; SHsdrho=SHresr**.5; SHrho=SHaver; out10[l,1]=SHrho; out10[l,2]=SHsdrho; *print N obsr SHresr SHsdrho SHrho; **************************************** Unit weight model ****************************************; unitvec=j(k,1,1); unave = (obsr`*unitvec)/k; var1 = obsr-unave; var2 = unitvec`*var1##2; varun = var2/k; sampru = (1-unave**2)**2/((sumn/k)-1); unresr = varun - sampru; if unresr < 0 then unresr = 0; unsdrho = unresr**.5; out10[l,9] = unave; out10[l,10] = unsdrho; *print N unitvec obsr SHaver unave varr1 var1 varr2 var2; *print SHrho SHsdrho unave unsdrho varr varun samperr sampru; ******************************************************************* *RE Model (Hedges & Vevea), with weight as "(N-1)/((1-r^2)^2)" * *Computations; * *******************************************************************; ***** Find initial inverse variance weights (sampling var of r) ******************; w= j(k,2,-9); do count = 1 to k; w[count,1] = out1[count,2]-1; w[count,2] = (1-out1[count,1]#out1[count,1])#(1-out1[count,1]#out1[count,1]); end; *print w; w = w[,1]/w[,2]; *print w; *Compute initial weighted average**************************************************; M_HVr= (obsr`*w)/sum(w); *print M_HVr; out10[l,3]= M_HVr; *****************************************************************************; *Find REVC *****************************************************************************; *Find Q; q1= obsr - M_HVr; q=w`* q1##2; *Compute sum of w and w-squared; sumw=w[+]; w2=w#w; *Compute c; sumw2=w2[+]; c1=sumw2/sumw; c=sumw-c1; *Compute Estimated variance in rho = REVC; if q >= (k-1) then TauHVr=(q-(k-1))/c; if q < (k-1) then TauHVr=0; TauHVr= TauHVr**.5; *print TauHVr; *if c <=0 then do; *print w q c c1 TauHVr; *end; ************************************************************************** * Compute revised weighted average *************************************************************************; v1 = 1/w; *print v1; revw = 1/(v1 + j(k,1,TauHVr)); rM_HVr = (obsr`*revw)/sum(revw); *print rM_HVr; out10[l,4]=TauHVr; out10[l,5]= rM_HVr; *print TauHVr rM_HVr; * if rwave > .9 then do; *print M_HVr; **************************************************************************** * HV model (same as above) with Empirical Bayes Estimates as replacement estimates * for r (smoothed weights?). This will pull r values toward the mean * depending on their local variance and the REVC. ****************************************************************************; TotN = sum(N); Vm1 = ((1-rM_HVr#rM_HVr)#(1-rM_HVr#rM_HVr))/(TotN-1); *SEM; Vm2 = TauHVr; Wm = 1/(Vm1+Vm2); *weight for the mean is based on squared SE of mean plus Vrho; Wtm = Wm#rM_HVr; * compute weighted mean; sums = Wtm+(obsr#w); eb = sums#1/(Wm+w); * compute empirical Bayes estimates; webs= j(k,2,-9); do count = 1 to k; webs[count,1] = out1[count,2]-1; webs[count,2] = (1-eb[count,1]#eb[count,1])#(1-eb[count,1]#eb[count,1]); end; web =webs[,1]/webs[,2]; M_eb= (obsr`*web)/sum(web); *compute mean based on EB estimates; d1=eb-M_eb; d1 = sum(web#(d1#d1)); Veb = d1/sum(web); *compute weighted variance of EB estimates; if Veb < 0 then Veb = 0; * I added this statement; *print totN Vm1 Vm2 Wm w Wtm sums rM_HVr TauHVr M_eb Veb; *print N obsr eb web M_eb Veb; out10[l,11] = M_eb; out10[l,12] = Veb; **************************************************************************** * Combined model with estimated mean by HVr approach and estimated REVC via SH approach * Hence, we may take advantage of merits from both approaches; ****************************************************************************; HV_SHrho= rM_HVr; variance1= obsr - HV_SHrho; variance2=N`* variance1##2; variance=variance2/TotN; samplvar = (1-HV_SHrho**2)**2/((TotN/k)-1); HV_SHresr=variance-samplvar; if HV_SHresr < 0 then HV_SHresr = 0; HV_SHsdrho=HV_SHresr**.5; out10[l,13]=HV_SHrho; out10[l,14]=HV_SHresr; *print TotN samplvar HV_SHresr HV_SHrho; *there were a lot of zero for HV_SHresr for some reason; ************************************************************************* *RE Model (Hedges & Vevea), with weight as "N-3" and r converted into z * *Computations; * *************************************************************************; ***** Find initial weights (sampling var of r) ******************; wz= j(k,1,-9); do count = 1 to k; wz[count,1] = out1[count,2]-3; end; *print wz; *****convert r to z **************************************; z=j(k,1,-9); do count = 1 to k; z[count,1] = .5*log((1+out1[count,1])/(1-out1[count,1])); end; obsr= z[,1]; *******Compute initial weighted average************************************; M_HVz= (obsr`*wz)/sum(wz); *print M_HVz; **********************************************************************; *Find REVC ******************************************; *Find Q; q2= obsr - M_HVz; q3=wz`* q2##2; *Compute sum of w and w-squared; sumwz=wz[+]; wz2=wz#wz; *Compute c; sumwz2=wz2[+]; c2=sumwz2/sumwz; c3=sumwz-c2; *Compute Estimated variance in rho = REVC; if q3 >= (k-1) then TauHVz=(q3-(k-1))/c3; if q3 < (k-1) then TauHVz=0; TauHVz= TauHVz**.5; *print TauHVz; *if c3 <=0 then do; *print wz q3 c3 c2 TauHVz; *end; ************************************************************************** * Compute revised weighted average *************************************************************************; v2 = 1/wz; *print v1; revwz = 1/(v2 + j(k,1,TauHVz)); rM_HVz = (obsr`*revwz)/sum(revwz); *print rM_HVz; *Convert z-unit back to r unit; M_HVz = (EXP(M_HVz*2)-1)/(EXP(M_HVz*2)+1); rM_HVz = (EXP(rM_HVz *2)-1)/(EXP(rM_HVz*2)+1); out10[l,6]=M_HVz; out10[l,7]=TauHVz; out10[l,8]= rM_HVz; *print M_HVz TauHVz rM_HVz; * if rM_HVz > .9 then do; *print M_HVz TauHVz rM_HVz; end; * end main loop; ********************************************************* * Find average and RMS across iterations *********************************************************; popmean = j(i, 1, Prho); poptau = j(i, 1, PSDrho); *schmidt hunter; rmsrSHM = (out10[,1]-popmean)#(out10[,1]-popmean); rmsrSHM = sqrt(sum(rmsrSHM)/i); rmsrSHT = (out10[,2]-poptau)#(out10[,2]-poptau); rmsrSHT = sqrt(sum(rmsrSHT)/i); wtmeanSH = sum(out10[,1])/i; wtTauSH = sum(out10[,2])/i; abserrSHM =abs(out10[,1]-popmean); abserrSHM = sum(abserrSHM)/i; abserrSHT = abs(out10[,2]-poptau); abserrSHT = sum (abserrSHT)/i; *hedges r; rmsrHVrM = (out10[,3]-popmean)#(out10[,3]-popmean); rmsrHVrM = sqrt(sum(rmsrHVrM)/i); rmsrHVrT = (out10[,4]-poptau)#(out10[,4]-poptau); rmsrHVrT = sqrt(sum(rmsrHVrT)/i); wtmeanHVr = sum(out10[,3])/i; wtTauHVr = sum(out10[,4])/i; abserrHVrM =abs(out10[,3]-popmean); abserrHVrM = sum(abserrHVrM)/i; abserrHVrT = abs(out10[,4]-poptau); abserrHVrT = sum (abserrHVrT)/i; *hedges z; rmsrHVzM = (out10[,6]-popmean)#(out10[,6]-popmean); rmsrHVzM = sqrt(sum(rmsrHVzM)/i); rmsrHVzT = (out10[,7]-poptau)#(out10[,7]-poptau); rmsrHVzT = sqrt(sum(rmsrHVzT)/i); wtmeanHVz = sum(out10[,6])/i; wtTauHVz = sum(out10[,7])/i; abserrHVzM =abs(out10[,6]-popmean); abserrHVzM = sum(abserrHVzM)/i; abserrHVzT = abs(out10[,7]-poptau); abserrHVzT = sum (abserrHVzT)/i; *unit; rmsrunM = (out10[,9]-popmean)#(out10[,9]-popmean); rmsrunM = sqrt(sum(rmsrunM)/i); rmsrunT = (out10[,10]-poptau)#(out10[,10]-poptau); rmsrunT = sqrt(sum(rmsrunT)/i); wtmeanun = sum(out10[,9])/i; wtTauun = sum(out10[,10])/i; abserrunM =abs(out10[,9]-popmean); abserrunM = sum(abserrunM)/i; abserrunT = abs(out10[,10]-poptau); abserrunT = sum (abserrunT)/i; *empirical Bayes; rmsrebM = (out10[,11]-popmean)#(out10[,11]-popmean); rmsrebM = sqrt(sum(rmsrebM)/i); rmsrebT = (out10[,12]-poptau#poptau)#(out10[,12]-poptau#poptau); rmsrebT = sqrt(sum(rmsrebT)/i); wtmeaneb = sum(out10[,11])/i; wtTau2eb = sum(out10[,12])/i; * combined HVr and SH (REVC); rmsrH_SM = (out10[,13]-popmean)#(out10[,13]-popmean); rmsrH_SM = sqrt(sum(rmsrH_SM)/i); rmsrH_ST = (out10[,14]-poptau#poptau)#(out10[,14]-poptau#poptau); rmsrH_ST = sqrt(sum(rmsrH_ST)/i); wtmeanH_S = sum(out10[,13])/i; wtTau2H_S = sum(out10[,14])/i; *****************************************; print wtmeanSH wtmeanHVr wtmeanHVz wtmeanun wtmeaneb wtmeanH_S wtTauSH wtTauHVr wtTauHVz wtTauun wtTau2eb wtTau2H_S rmsrSHM rmsrHVrM rmsrHVzM rmsrunM rmsrebM rmsrH_SM rmsrSHT rmsrHVrT rmsrHVzT rmsrunT rmsrebT rmsrH_ST abserrunM abserrSHM abserrHVrM abserrHVzM abserrunT abserrSHT abserrHVrT abserrHVzT; *print out10; print rhocount flag1 rcount flag2; Y[,((group-1)*14+1)] = out10[,1]; Y[,((group-1)*14+2)] = out10[,2]; Y[,((group-1)*14+3)] = out10[,3]; Y[,((group-1)*14+4)] = out10[,4]; Y[,((group-1)*14+5)] = out10[,5]; Y[,((group-1)*14+6)] = out10[,6]; Y[,((group-1)*14+7)] = out10[,7]; Y[,((group-1)*14+8)] = out10[,8]; Y[,((group-1)*14+9)] = out10[,9]; Y[,((group-1)*14+10)] = out10[,10]; Y[,((group-1)*14+11)] = out10[,11]; Y[,((group-1)*14+12)] = out10[,12]; Y[,((group-1)*14+13)] = out10[,13]; Y[,((group-1)*14+14)] = out10[,14]; end; * end the loop for the 4 groups; *print Y; create final from Y; append from Y; *print final; quit; data last; set final; SHrho_ln = col1; SHresr_ln = col2; MHVr_ln = col3; TauHVr_ln = col4; rMHVr_ln= col5; MHVz_ln = col6; TauHVz_ln = col7; rMHVz_ln = col8; Uwrho_ln = col9; Uwtau_ln = col10; Ebrho_ln = col11; Ebtau_ln = col12; H_Srho_ln = col13; H_Stau_ln = col14; SHrho_hn = col15; SHresr_hn = col16; MHVr_hn = col17; TauHVr_hn = col18; rMHVr_hn= col19; MHVz_hn = col20; TauHVz_hn = col21; rMHVz_hn = col22; Uwrho_hn = col23; Uwtau_hn = col24; Ebrho_hn = col25; Ebtau_hn = col26; H_Srho_hn = col27; H_Stau_hn = col28; Keep Uwrho_ln Uwrho_hn SHrho_ln SHrho_hn rMHVr_ln rMHVr_hn MHVr_ln MHVr_hn MHVz_ln MHVz_hn rMHVz_ln rMHVz_hn Ebrho_ln Ebrho_hn H_Srho_ln H_Srho_hn Uwtau_ln Uwtau_hn SHresr_ln SHresr_hn TauHVr_ln TauHVr_hn TauHVz_ln TauHVz_hn Ebtau_ln Ebtau_hn H_Stau_ln H_Stau_hn; file 'H:\research\M-A simulation paper\Monte Carlo\Publication\HVr and HVz original weight\StatMosnew2(.10,.02).dat'; put (Uwrho_ln Uwrho_hn SHrho_ln SHrho_hn rMHVr_ln rMHVr_hn MHVr_ln MHVr_hn MHVz_ln MHVz_hn rMHVz_ln rMHVz_hn Ebrho_ln Ebrho_hn H_Srho_ln H_Srho_hn Uwtau_ln Uwtau_hn SHresr_ln SHresr_hn TauHVr_ln TauHVr_hn TauHVz_ln TauHVz_hn Ebtau_ln Ebtau_hn H_Stau_ln H_Stau_hn) (7.3); run; proc univariate data=last plot; var Uwrho_ln Uwrho_hn SHrho_ln SHrho_hn rMHVr_ln rMHVr_hn MHVr_ln MHVr_hn MHVz_ln MHVz_hn rMHVz_ln rMHVz_hn Ebrho_ln Ebrho_hn H_Srho_ln H_Srho_hn Uwtau_ln Uwtau_hn SHresr_ln SHresr_hn TauHVr_ln TauHVr_hn TauHVz_ln TauHVz_hn Ebtasu_ln Ebtau_hn H_Stau_ln H_Stau_hn; proc means data=last; var Uwrho_ln Uwrho_hn SHrho_ln SHrho_hn rMHVr_ln rMHVr_hn MHVr_ln MHVr_hn MHVz_ln MHVz_hn rMHVz_ln rMHVz_hn Ebrho_ln Ebrho_hn H_Srho_ln H_Srho_hn Uwtau_ln Uwtau_hn SHresr_ln SHresr_hn TauHVr_ln TauHVr_hn TauHVz_ln TauHVz_hn Ebtau_ln Ebtau_hn H_Stau_ln H_Stau_hn; Run; *proc univariate data=last plot; *proc means data=last; *var SHrho_g1 SHresr_g1 rM_HVr_g1 TauHVr_g1 rM_HVz_g1 TauHVz_g1 SHrho_g2 SHresr_g2 rM_HVr_g2 TauHVr_g2 rM_HVz_g2 TauHVz_g2 SHrho_g3 SHresr_g3 rM_HVr_g3 TauHVr_g3 rM_HVz_g3 TauHVz_g3 SHrho_g4 SHresr_g4 rM_HVr_g4 TauHVr_g4 rM_HVz_g4 TauHVz_g4; *proc plot data=last; *plot col2*col4 col7*col9 col12*col14 col17*col19; *data l1; *set last; *type = 1; *y = col1; *keep type y; *data l2; *set last; *type = 2; *y = col5; *keep type y; *data all; *set l1 l2; *proc shewhart; *boxchart y*type; *run;