Data defines the model by dint of genetic programming, producing the best decile table.


Spreading and Summing Multiple (Monthly) Obserations into a Single Observation
~ Monthly Quantitative Fields are NOT Summed ~
Bruce Ratner, Ph.D.

data raw;
input @1 id @3 a $2. @6 Sales 3.0 @9 SaleMonth mmddyy10.;
cards;
1 a1 22 1/12/2001
1 a1 22 12/12/2001
1 a1 22 7/12/2002
1 a1 22 1/12/2001
2 a2 33 1/12/2008
3 a3 44 11/12/2001

title'raw';proc print;run;

data static;
set raw;
title'static';proc print;run;

proc sort data=static ;by id ;run;
data onerec_s ;
set raw ; by id;
if first.id then output;
run;
title'onerec_s';proc print;run;
 
proc summary data=raw ;by id;
var Sales ;
output out=aggreg sum=Sales ; 
title'aggreg';proc print;run;
 
proc sort data=aggreg (drop=_type_ rename=( _freq_=rec_ct));by id;
title'aggreg';proc print;run;

data final;
merge onerec_s aggreg;
by id;
title'final';proc print;run;

data final;
set final;
format SALES dollar14.2;
mon = month(SaleMonth);
year = year(SaleMonth);
run;

proc sort;by ID year mon;run;
%let dsn = final;

data _NULL_;
set &dsn end=EOF;
retain maxyr 0000 minyr 9999;
maxyr = max(maxyr,year);
minyr = min(minyr,year);
if EOF then do;
rangemon = 12*(sum(maxyr,-minyr) + 1);
call symput('allmon',trim(left(put(rangemon,6.))));
call symput('first',trim(left(put(minyr,6.))));
end;

data final_spread ;
retain ID begyr mon1-mon&allmon ;
set &dsn;
by ID;
array mons (&allmon) mon1-mon&allmon;
if first.ID then do;
do i= 1 to &allmon;
mons(i) = .;
end;
begyr = year;
end;
diff = year - &first;
calcmon = diff*12 + mon;
mons(calcmon) = sales;
if last.ID then do;
output;
end;
drop mon year SaleMonth i diff calcmon sales;
run;

proc contents data=final_spread;run;

For more information about this article, call Bruce Ratner at 516.791.3544; or e-mail at br@dmstat1.com.
Sign-up for a free GenIQ webcast: Click here.