2014年7月10日 星期四

常用的資料整理程式

/*依 ID,Merge 兩個資料檔 ,再依某變項 (cmdecod),保留唯一的值*/

/*方法一*/
DATA t ; merge cm(in=a )  treat ; by subjid  ;  if a ;
run;
proc sort data=T    nodupkey out=t3;
by subjid cmdecod;
run;

/*方法二*/
DATA t ; merge cm(in=a )  treat ; by subjid  ;  if a ;
run;
data t2  ; set t ; by subjid cmdecod;
if first.cmdecod;
run;

/*方法三*/
proc sql
   noprint;
   create table cmtosum as
      select unique (c.cmdecod) as cmdecod, c.subjid, t.trtcd
         from cm as c, treat as t
         where c.subjid = t.subjid
         order by subjid, cmdecod;
quit;


/*依組別(藥名),把資料檔由直向拉成橫向*/

/*方法一*/
Proc sort    data = counts;       by cmdecod trtcd; run;
Data cc2 ; set counts; by cmdecod trtcd;
/*記住3個變項值,不然數據長成一斜線*/
Retain n1-n3;
/*新組別時,先清空數據*/
Array nn {3} n1-n3;
   if first.cmdecod then do i =1 to 3;
nn{i}=.;
   end;
/*指定數值,亦可用Array的方式 nn{trtcd} ,但本例 trtcd有missing,固改用 if then 的方式*/
  if trtcd=. then n1=frequency;
  if trtcd=0 then n2=frequency;
  if trtcd=1 then n3=frequency;
/*輸出每組的最後一筆*/
if last.cmdecod;
run;

/*方法二*/
Proc sort    data = counts;  by cmdecod trtcd; run;
/*利用 Merge ,分組別進行*/
Data cm;
   merge counts(where = (trtcd = 1) rename = (frequency = count1))
         counts(where = (trtcd = 0) rename = (frequency = count2))
         counts(where = (trtcd = .) rename = (frequency = count3))
         end = eof;
      by cmdecod;
run;

/*方法三*/
/*較不建議,若是 ID 的變項值為 missing ,資料會被刪除*/
Proc transpose data=counts prefix=nn out=cc ;
by cmdecod ; id trtcd;
var Frequency;
run;




部分程式引用,Ref:  Jack Shostak, SAS Programming in the Pharmaceutical Industry.

沒有留言: