head 1.1; access; symbols; locks wmc:1.1; strict; comment @# @; 1.1 date 2004.03.02.20.31.21; author wmc; state Exp; branches; next ; desc @@ 1.1 log @Initial revision @ text @function lund,y1,t0,ssef,sser,a1=a1,a2=a2,mu1=mu1,mu2=mu2,ar=ar,mur=mur,c=c,mdi=mdi,t1=t1,t2=t2,tr=tr,y=y,sig=sig,noisy=noisy ; Purpose: Compute F_c, the statistic for jump-in-series ; Category: statistics ; Reference: Lund and Reeves, J Climate, p2547, 2002 ; Status: tested-but-use-at-your-own-risk ; ; Inputs: y1 - vector of data. Missing data permitted: default 999 ; t0 - vector of time. Defaults to findgen(n). ; The routine is happy if the time is irregular. I'm not sure whether or not ; this affects the statistics. ; mdi=mdi - alternative missing data value (or, if string, conditional, in the form: ; "y1 ne mdi" or "y1 lt mdi" or whatever) ; Returns: - F_c of Lund ; Outputs: ssef - full model SSE values (vector) ; sser - reduced model values (scalar) ; c - point of highest F_c. Note (this will only be obvious with short series) ; that the fit is (0...c)(c+1...n-1), ie the break is between c and c+1, ; not at c itself ; a1,mu1,t1 - fit for up-to-c (ie, y=a1*t1+mu1) ; a2,mu2,t2 - fit for past-c (ie, y=a2*t2+mu2) ; ar,mur,tr - fit for whole data (ie, y=ar*tr+mu4) (nb: tr=t0, if no missing data) ; sig=sig - whether max(F_c) is signficant (1) at 95% (ses lund_sig) or not (0) ; Examples: ; To simply see if there is a breakpoint, just print the value of "sig" returned ; junk=lund(data,sig=sig) & print,sig ; To plot the Fc function ; plot,lund(data) ; To tell it that 999 is missing data ; plot,lund(data,mdi=999) ; For more examples, see lund_plot ; ; See-also: lund_plot; lund_sig ; ; I think it works now... ; message,/cont,'Caution: work-in-progress' n=n_elements(y1) if (keyword_set(noisy)) then print,'Original series has ',n,' points' if (n_elements(mdi) eq 0) then mdi=999 if (n_elements(t0) eq 0) then t0=findgen(n) ; Define the space to hold the "full model" (ie with breakpoint) sum-of-squares-of-error ssef=fltarr(n) ; Handle missing data if (datatype(mdi) eq 7) then $ junk=execute('ii=where('+mdi+',n)') $ else $ ii=where(y1 ne mdi,n) t=t0(ii)+0.0 & tr=t y=y1(ii) if (keyword_set(noisy)) then begin print,'Mdi filtered series has ',n,' points' print,' range: ',makerange(y) print,' time range: ',makerange(t) endif ; Compute parameters for LS fit to whole series. Slight complication for possible irregular time. ; p2549 (eq 2.8). Note we use total(t^2) not n(n+1)(n-1)/12 because t may not be integer. ym=avg(y) tm=avg(t) ar= (total((y-ym)*(t-tm))) / total((t-tm)^2) mur=total(y-ar*t) / n ; eq 2.7 sum-of-squares-of-error for whole series sser=total((y-mur-ar*t)^2) ; Now loop over the series (avoiding 2 points at each end, because the LS fit is ; perfect with only 2 points! for i=2,n-3 do begin j=indgen(i+1) k=indgen(n-i-1)+i+1 ; eq 2.2 m1=avg(y(j)) m2=avg(y(k)) t1m=avg(t(j)) t2m=avg(t(k)) a1=total( (t(j)-t1m)*(y(j)-m1) ) / total( (t(j)-t1m)^2 ) a2=total( (t(k)-t2m)*(y(k)-m2) ) / total( (t(k)-t2m)^2 ) ; eq 2.3 mu1=m1-a1*t1m mu2=m2-a2*t2m ; eq 2.6 ssef(i)=total((y(j)-mu1-a1*t(j))^2) $ +total((y(k)-mu2-a2*t(k))^2) endfor ; eq 2.5. Compute F_c, remembering we have thrown away 2 points at each end j=indgen(n-4)+2 fc = [0,0,((sser-ssef(j))/2.0) / (ssef(j)/(n-4.0)),0,0] ; Now find the max of fc; deduce that this is the break; re-calc slopes for this point ; Note that later on we'll decide if this is sig or not. junk=max(fc,i) c=i j=indgen(i+1) k=indgen(n-i-1)+i+1 m1=avg(y(j)) m2=avg(y(k)) t1m=avg(t(j)) t2m=avg(t(k)) t1=t(j) t2=t(k) a1=total( (t(j)-t1m)*(y(j)-m1) ) / total( (t(j)-t1m)^2 ) a2=total( (t(k)-t2m)*(y(k)-m2) ) / total( (t(k)-t2m)^2 ) mu1=m1-a1*t1m mu2=m2-a2*t2m ; Guess significance sig=lund_sig(n) if (sig lt max(fc)) then sig=1 else sig=0 return,fc end @