Commit 6f9b5988 authored by Carl Schreck's avatar Carl Schreck
Browse files

Automated Nightly Commit - Thu Aug 27 00:00:10 EDT 2015

parent 4cc3d044
%% wind_model.m %%
% A multinomial logistic regression model to predict wind over Germany
% Predictors are leading time-extended PCs of spatial EOFs of filtered OLR
%
% Code by Larry Gloeckler
% Updated: 2013-12-02
clear all; close all; clc;
season_str = ['DJF';'JFM';'FMA';'MAM';'AMJ';'MJJ';'JJA';'JAS';'ASO';'SON';'OND';'NDJ'];
seasons = [12,1,2;1,2,3;2,3,4;3,4,5;4,5,6;5,6,7;6,7,8;7,8,9;8,9,10;9,10,11;10,11,12;11,12,1];
%-------------------------------------------------------------------------%
% Load files containing PCs of EEOFs
% filename = '';
% load(filename);
%-------------------------------------------------------------------------%
% Load wind data (1 Jan 1979 - 31 Dec 2013)
WindData = load('/pr12/midori/Dropbox/WindProject/Data/daily/wind/german_wind_v2.csv');
windSpeed = WindData(:,5);
cutter = datenum('1979-01-01')-datenum('01-jan-1962');
windSpeed(1:cutter,:) = [];
%-------------------------------------------------------------------------%
% Create index vectors (if necessary) to ensure that all data is properly
% aligned
climoDates = datevec(datenum(1979,1,1):datenum(2013,12,31));
trainIdx = find(climoDates(:,1) > 1978 & climoDates(:,1) < 2014);
%-------------------------------------------------------------------------%
% Some Parameters
sm = 3; % Smoother -> 2*sm+1 indicates N-day running mean
maxLag = 45; % 2*maxLag+1 indicates N-day season
forecast = 30; % Number of forecast days (not including day 0)
%-------------------------------------------------------------------------%
% DOY Series
dateSeries = datestr(datenum(1979,1,1):datenum(1978,12,31) + length(trainIdx));
yearSeries = str2num(dateSeries(:,8:11));
doySeries = zeros(length(dateSeries),1);
seas = findseason('01-jan-1979','31-dec-2015',[12 1 2]);
% Find the day of the year
for i = 1:length(doySeries);
doySeries(i) = datenum(dateSeries(i,:)) - datenum(strcat('31-Dec-',...
num2str(yearSeries(i)-1)));
end
[yy, mm, dd] = datevec(datenum('01-jan-1979'):datenum('31-mar-2015'));
yyseas = yy(seas);
% Predefine matrices for storage
windFcst = zeros(length(seas),forecast+1,3);
lowConf = zeros(length(seas),forecast+1,3);
highConf = zeros(length(seas),forecast+1,3);
verification = zeros(length(seas),forecast+1);
%-------------------------------------------------------------------------%
% Compute wind speed anomalies (optional)
% *If anomalies aren't computed, change all instances of windSpeedAnom to
% windSpeed*
windSpeedAnom = createanomalies(windSpeed,4,1,1,0);
% Smooth wind data (centered 7-day running mean, or however long you desire)
windSm = zeros(size(windSpeedAnom));
for i = 1:length(windSpeedAnom)
if i < sm+1
windSm(i,:,:) = nanmean(windSpeedAnom(i:2*sm+1,:,:),1);
elseif i > length(windSpeedAnom)-sm
windSm(i,:,:) = nanmean(windSpeedAnom(i-2*sm:i,:,:),1);
else
windSm(i,:,:) = nanmean(windSpeedAnom(i-sm:i+sm,:,:),1);
end
end
% Reassign smoothed wind
windSpeedAnom = windSm;
predictors_retain = zeros(length(seas),31,64);
coeffs = zeros(length(seas),31,66);
for hindcast = 1:length(seas)
disp('SON | olr + H500 + H50 + U850')
disp([num2str(hindcast/length(seas) * 100),' % Complete \n'])
date = datestr(dateSeries(seas(hindcast),:));
yearNow = str2num(date(end-3:end));
% Index the present DOY
doyNow = (datenum(date)) - datenum(strcat('31-Dec-',num2str(yearNow-1)));
[~,mmNow,~] = datevec(datenum(date));
if hindcast == 1
mmprev = mmNow;
end
if hindcast ~= 1 && mmprev ~= mmNow
%% Preprocess Data %%
% Load EEOF files or files containing information about variance
hgt500EOFs = matfile(['EOFs/hgt500/',season_str(mmNow,:),'.mat']);
hgt500PC = hgt500EOFs.PCs;
hgt50EOFs = matfile(['EOFs/hgt50/',season_str(mmNow,:),'.mat']);
hgt50PC = hgt50EOFs.PCs(:,1:4);
u850EOFs = matfile(['EOFs/u850/',season_str(mmNow,:),'.mat']);
u850PC = u850EOFs.PCs;
olrEOFs = matfile(['EOFs/olr/',season_str(mmNow,:),'.mat']);
olrPC = olrEOFs.PCs;
end
trainyear = find(yy ~= yy(seas(forecast)));
doyseriestrain = doySeries(trainyear);
%-------------------------------------------------------------------------%
% Set a training vector centered on the current doy and all matching doys
% back to the beginning of the climatology
for w = -maxLag:maxLag
if doyNow+w < 1
doyInd(w+maxLag+1,1) = doyNow+w+366;
elseif doyNow+w > 366
doyInd(w+maxLag+1,1) = doyNow+w-366;
else
doyInd(w+maxLag+1,1) = doyNow+w;
end
end
% Create a training vector
for n = 1:length(doyInd)
if n == 1
train = find(doyseriestrain == doyInd(n));
else
train = [train; find(doyseriestrain == doyInd(n))];
end
end
% Sort the training vector
train = sort(train,'ascend');
% Cut the training vector
cut = find(train + forecast > length(doyseriestrain));
train(cut) = [];
%-------------------------------------------------------------------------%
% Find the boundaries for high wind, low wind, and everything in between
% Currently set at upper/lower tercile boundaries, but can be changed to
% anything
highWind = prctile(windSpeedAnom(seas),66.67);
lowWind = prctile(windSpeedAnom(seas),33.33);
%-------------------------------------------------------------------------%
% Build in a 30-day lag into the predictand matrix, indexed by the training
% vector
windArray = zeros(length(train),31);
% Currently run from current day (day 0) to 30 days from current day (day
% 30), but this can be changed
% Keep in mind that *31* forecasts (including day 0) are currently being
% generated
for i = 1:length(train)
for t = 0:forecast
windArray(i,t+1) = windSpeedAnom(trainyear(train(i))+t);
end
end
for i = 0:forecast
verification(hindcast,i+1) = double(ordinal(windSpeedAnom(seas(hindcast)+i),...
{'1','2','3'},[],[min(windSpeedAnom(seas)), ...
lowWind,highWind,max(windSpeedAnom(seas))]));
end
%-------------------------------------------------------------------------%
% Option to standardize predictors by day-of-year standardized anomalies
%-------------------------------------------------------------------------%
% Find most recent predictor values
%% Generate forecast %%
% Loop over lead windows and run data through the logistic regression model
for t = 0:forecast
% fprintf('Forecast day %1.0f\n', t);
% Build a predictor matrix
X = [hgt500PC(trainyear(train),:),hgt50PC(trainyear(train),:),...
u850PC(trainyear(train),:),olrPC(trainyear(train),:)];
Xnow = [hgt500PC(seas(hindcast),:),hgt50PC(seas(hindcast),:),...
u850PC(seas(hindcast),:),olrPC(seas(hindcast),:)];
I = find(~isnan(X(:,64)));
X = X(I,:);
% Create an ordinal response variable (one that assumes a natural ordering
% among the response categories) using Matlab's ordinal function
y = double(ordinal(windArray(I,t+1),{'1','2','3'},[],[min(windSpeedAnom(seas)), ...
lowWind,highWind,max(windSpeedAnom(seas))]));
% Fit an ordinal response model for the ordinal response variable temps
[B, ~, stats] = mnrfit(X, y, 'model', 'ordinal');
% Use p value filtering to identify statistically significant
% predictors to train the model
pCut = stats.p(3:end);
pSig = find(pCut < 0.05);
niter = 1;
while length(pSig) < length(pCut)
if niter == 1
I = pSig;
end
% fprintf('Iteration %1.0f\n', niter);
% Redefine the predictors
X = X(:,pSig);
Xnow = Xnow(:,pSig);
% Retrain the ordinal model on the newly identified statistically
% significant predictors
[B, ~, stats] = mnrfit(X, y, 'model', 'ordinal');
% Repeat p value filtering
pCut = stats.p(3:end);
pSig = find(pCut < 0.05);
I = I(pSig);
niter = niter+1;
end
% Output probability estimates and confidence bound estimates for each
% of the multinomial categories
[prob, dlow, dhi] = mnrval(B, Xnow, stats, 'model', 'ordinal');
windFcst(hindcast,t+1,:) = prob;
lowConf(hindcast,t+1,:) = dlow;
highConf(hindcast,t+1,:) = dhi;
predictors_retain(hindcast,t+1,I) = 1;
coeffs(hindcast,t+1,[1;2;I+2]) = B;
end
end
%% Save data %%
save('hindcast_output_SON.mat','windFcst','lowConf','highConf','seas',...
'verification','predictors_retain','coeffs');
% (Optional) Write to CSV file
% csvwrite();
\ No newline at end of file
%% wind_model.m %%
% A multinomial logistic regression model to predict wind over Germany
% Predictors are leading time-extended PCs of spatial EOFs of filtered OLR
%
% Original Code by Larry Gloeckler
% Code modified by Nicholas Schiraldi
%
% Updated: 2014-02-10
% Written to predict the WindPower timeseries in realtime
clear all; close all; clc;
%% Specifiy dates
startDate = '1-jan-1979';
endDate = '1-sep-2012';
fileout = 'DJF_79_12.mat';
dateList = datenum(startDate):datenum(endDate);
[yy,mm,dd]=datevec(dateList);
idx = find(mm == 12 | mm == 1 | mm == 2 | mm == 11);
dateList = dateList(idx);
clear yy mm dd
season_str = ['DJF';'JFM';'FMA';'MAM';'AMJ';'MJJ';'JJA';'JAS';'ASO';'SON';'OND';'NDJ'];
seasons = [12,1,2;1,2,3;2,3,4;3,4,5;4,5,6;5,6,7;6,7,8;7,8,9;8,9,10;9,10,11;10,11,12;11,12,1];
%-------------------------------------------------------------------------%
% Load wind data (1 Jan 1979 - 31 Dec 2013)
% WindData = load('/pr11/nschiral/scripts/Germany_Wind.mat');
% windSpeed = WindData(:,4); clear WindData
% cutter = datenum('1979-01-01')-datenum('01-jan-1962');
% windSpeed(1:cutter,:) = [];
% windSpeed(13298:end) = [];
% I = find(isnan(windSpeed));
% windSpeed(I) = nanmean(windSpeed(I-1:I+1));
WindData = load('/pr12/midori/Dropbox/WindProject/Data/daily/wind/german_wind_v2.csv');
windSpeed = WindData(:,5);
cutter = datenum('1979-01-01')-datenum('01-jan-1962');
windSpeed(1:cutter,:) = [];
windSpeed(13298:end) = [];
I = find(isnan(windSpeed));
windSpeed(I) = nanmean(windSpeed(I-1:I+1));
%-------------------------------------------------------------------------%
% Create index vectors (if necessary) to ensure that all data is properly
% aligned
climoDates = datevec(datenum(1979,1,1):datenum(2014,12,31));
trainIdx = find(climoDates(:,1) > 1978 & climoDates(:,1) < 2015);
%-------------------------------------------------------------------------%
% Some Parameters
sm = 3; % Smoother -> 2*sm+1 indicates N-day running mean
maxLag = 45; % 2*maxLag+1 indicates N-day season
forecast = 30; % Number of forecast days (not including day 0)
%-------------------------------------------------------------------------%
% DOY Series
[yy, mm, dd] = datevec(datenum('01-jan-1979'):datenum('31-dec-2014'));
doySeries = (datenum(yy,mm,dd) - datenum(yy-1,12,31))';
% Predefine matrices for storage
windFcst = zeros(length(dateList),forecast+1,3);
lowConf = zeros(length(dateList),forecast+1,3);
highConf = zeros(length(dateList),forecast+1,3);
verification = zeros(length(dateList),forecast+1);
predictors_retain = zeros(length(dateList),31,64);
coeffs = zeros(length(dateList),31,66);
upperBound = zeros(length(dateList),forecast+1);
lowerBound = zeros(length(dateList),forecast+1);
rawVer = zeros(length(dateList),forecast+1);
PCamp = nan(size(predictors_retain));
groupContr = nan(length(predictors_retain),31,4);
predictContr = nan(length(predictors_retain),31,4);
%-------------------------------------------------------------------------%
% Compute wind speed anomalies (optional)
% *If anomalies aren't computed, change all instances of windSpeedAnom to
% windSpeed*
windSpeedAnom = createanomalies(windSpeed,4,1,1,0);
% Smooth wind data (centered 7-day running mean, or however long you desire)
windSm = zeros(size(windSpeedAnom));
windSmRaw = zeros(size(windSpeedAnom));
for i = 1:length(windSpeedAnom)
if i < sm+1
windSm(i,:,:) = nanmean(windSpeedAnom(i:2*sm+1,:,:),1);
windSmRaw(i,:,:) = nanmean(windSpeed(i:2*sm+1,:,:),1);
elseif i > length(windSpeedAnom)-sm
windSm(i,:,:) = nanmean(windSpeedAnom(i-2*sm:i,:,:),1);
windSmRaw(i,:,:) = nanmean(windSpeed(i-2*sm:i,:,:),1);
else
windSm(i,:,:) = nanmean(windSpeedAnom(i-sm:i+sm,:,:),1);
windSmRaw(i,:,:) = nanmean(windSpeed(i-sm:i+sm,:,:),1);
end
end
% Reassign smoothed wind
windSpeedAnom = windSm;
for hindcast = 1:length(dateList)
PCStartDate = '01-jan-1979';
initDate = dateList(hindcast);
[yyFcst,mmFcst,ddFcst] = datevec(initDate);
doyNow = datenum(yyFcst,mmFcst,ddFcst) - datenum(yyFcst-1,12,31);
currIdx = initDate - datenum(PCStartDate) + 1;
disp([num2str(hindcast/length(dateList) * 100),' % Complete \n'])
currMM = mmFcst;
if hindcast ~= 1
[~,prevMM,~] = datevec(initDate-1);
end
% Load EEOF files or files containing information about variance
if hindcast ~= 1 && currMM ~= prevMM
disp('Loading EOFS')
hgt500EOFs = matfile(['EOFs/hgt500/',season_str(currMM,:),'.mat']);
hgt500PC = hgt500EOFs.PCs;
hgt50EOFs = matfile(['EOFs/hgt50/',season_str(currMM,:),'.mat']);
hgt50PC = hgt50EOFs.PCs(:,1:4);
u850EOFs = matfile(['EOFs/u850/',season_str(currMM,:),'.mat']);
u850PC = u850EOFs.PCs;
olrEOFs = matfile(['EOFs/olr/',season_str(currMM,:),'.mat']);
olrPC = olrEOFs.PCs;
disp('Finished')
elseif hindcast == 1
disp('Loading EOFS')
hgt500EOFs = matfile(['EOFs/hgt500/',season_str(currMM,:),'.mat']);
hgt500PC = hgt500EOFs.PCs;
hgt50EOFs = matfile(['EOFs/hgt50/',season_str(currMM,:),'.mat']);
hgt50PC = hgt50EOFs.PCs(:,1:4);
u850EOFs = matfile(['EOFs/u850/',season_str(currMM,:),'.mat']);
u850PC = u850EOFs.PCs;
olrEOFs = matfile(['EOFs/olr/',season_str(currMM,:),'.mat']);
olrPC = olrEOFs.PCs;
disp('Finished')
end
seas = findseason('01-jan-1979','31-dec-2014',seasons(mmFcst,:));
trainyear = find(yy ~= yyFcst);
doyseriestrain = doySeries(trainyear);
%-------------------------------------------------------------------------%
% Set a training vector centered on the current doy and all matching doys
% back to the beginning of the climatology
for w = -maxLag:maxLag
if doyNow+w < 1
doyInd(w+maxLag+1,1) = doyNow+w+366;
elseif doyNow+w > 366
doyInd(w+maxLag+1,1) = doyNow+w-366;
else
doyInd(w+maxLag+1,1) = doyNow+w;
end
end
% Create a training vector
for n = 1:length(doyInd)
if n == 1
train = find(doyseriestrain == doyInd(n));
else
train = [train; find(doyseriestrain == doyInd(n))];
end
end
% Sort the training vector
train = sort(train,'ascend');
train = trainyear(train);
% Cut the training vector
cut = find(train + forecast > length(doySeries));
train(cut) = [];
%-------------------------------------------------------------------------%
% Find the boundaries for high wind, low wind, and everything in between
% Currently set at upper/lower tercile boundaries, but can be changed to
% anything
highWind = prctile(windSpeedAnom(seas),66.67);
lowWind = prctile(windSpeedAnom(seas),33.33);
upperBound(hindcast,:) = repmat(highWind,[1, 31]);
lowerBound(hindcast,:) = repmat(lowWind,[1, 31]);
%-------------------------------------------------------------------------%
% Build in a 30-day lag into the predictand matrix, indexed by the training
% vector
windArray = zeros(length(train),31);
% Currently run from current day (day 0) to 30 days from current day (day
% 30), but this can be changed
% Keep in mind that *31* forecasts (including day 0) are currently being
% generated
for i = 1:length(train)
for t = 0:forecast
windArray(i,t+1) = windSpeedAnom(train(i)+t);
end
end
%% Store Verification
for t = 0:forecast
verification(hindcast,t+1) = double(ordinal(windSpeedAnom(currIdx+t),...
{'1','2','3'},[],[min(windSpeedAnom(seas)), ...
lowWind,highWind,max(windSpeedAnom(seas))]));
rawVer(hindcast,t+1) = windSpeedAnom(currIdx+t);
end
%% Generate forecast %%
% Loop over lead windows and run data through the logistic regression model
for t = 0:forecast
% Build a predictor matrix
X = [hgt500PC(train,1:20),hgt50PC(train,1:4),u850PC(train,1:20),....
olrPC(train,1:20)];
Xnow = [hgt500PC(currIdx,1:20),hgt50PC(currIdx,1:4),...
u850PC(currIdx,1:20),olrPC(currIdx,1:20)];
% Create an ordinal response variable (one that assumes a natural ordering
% among the response categories) using Matlab's ordinal function
I = find(~isnan(X(:,64)));
X = X(I,:);
y = double(ordinal(windArray(I,t+1),{'1','2','3'},[],[min(windSpeedAnom(:)), ...
lowWind,highWind,max(windSpeedAnom(:))]));
% Fit an ordinal response model for the ordinal response variable temps
[B, ~, stats] = mnrfit(X, y, 'model', 'ordinal');
% Use p value filtering to identify statistically significant
% predictors to train the model
I = (1:64)';
pCut = stats.p(3:end);
pSig = find(pCut < 0.05);
I = I(pSig);
niter = 1;
while length(pSig) < length(pCut)
% fprintf('Iteration %1.0f\n', niter);
% if niter == 1
% I = pSig;
% end
% Redefine the predictors
X = X(:,pSig);
Xnow = Xnow(:,pSig);
% Retrain the ordinal model on the newly identified statistically
% significant predictors
[B, ~, stats] = mnrfit(X, y, 'model', 'ordinal');
% Repeat p value filtering
pCut = stats.p(3:end);
pSig = find(pCut < 0.05);
I = I(pSig);
niter = niter+1;
end
[prob, dlow, dhi] = mnrval(B, Xnow, stats, 'model', 'ordinal');
windFcst(hindcast,t+1,:) = prob;
lowConf(hindcast,t+1,:) = dlow;
highConf(hindcast,t+1,:) = dhi;
predictors_retain(hindcast,t+1,I) = 1;
coeffs(hindcast,t+1,[1;2;I+2]) = B;
rawVer(hindcast,t+1) = windSpeedAnom(currIdx+t);
% store PC amplitudes
Xnow = [hgt500PC(currIdx,1:20),hgt50PC(currIdx,1:4),...
u850PC(currIdx,1:20),olrPC(currIdx,1:20)];
PCamp(hindcast,t+1,I) = Xnow(I);
predictContr(hindcast,t+1,I) = exp(Xnow(I).*B(3:end)');
groupContr(hindcast,t+1,1) = prod(predictContr(hindcast,t+1,I(I<=20)));
groupContr(hindcast,t+1,2) = prod(predictContr(hindcast,t+1,I(I>=21 & I <=24)));
groupContr(hindcast,t+1,3) = prod(predictContr(hindcast,t+1,I(I>=25 & I <=44)));
groupContr(hindcast,t+1,4) = prod(predictContr(hindcast,t+1,I(I>=45 & I <=64)));
end
end
save(['hindcasts/',fileout],'windFcst','lowConf','highConf','dateList',...
'verification','predictors_retain','coeffs','PCamp','groupContr',...
'predictContr','upperBound','lowerBound','rawVer');
sendEarthRiskEmail('nschiraldi@gmail.com','DJF Run Complete','analyze',[])
......@@ -25,10 +25,10 @@ begin ; main
; These are some parameters that could be useful to have up top
if( .not.isvar("bandName") ) then
bandName = "high"
bandName = "mid"
end if
if( .not.isvar("varName") ) then
varName = "mwe"
varName = "wind"
end if
plotType = "png"
......
......@@ -32,20 +32,20 @@ begin ; main
minLat = -30
maxLat = 30
if( .not.isvar("bandName") ) then
bandName = "low"
bandName = "mid"
end if
print( bandName )
if( bandName.eq."low" ) then
filtFileName = "olr.40-low.nc"
filtFileName = "olr.100-low.nc"
nAveLong = -999
nAveShort = 20
nAveShort = 40
else if( bandName.eq."mid" ) then
filtFileName = "olr.40-100.nc"
nAveLong = 20
filtFileName = "olr.20-100.nc"
nAveLong = 40
nAveShort = 9
else if( bandName.eq."high" ) then
filtFileName = "olr.40-high.nc"
nAveLong = 20
filtFileName = "olr.100-high.nc"
nAveLong = 40
nAveShort = -999
else if( bandName.eq."raw" ) then
filtFileName = "olr.anom.nc"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment