classdef NNTrainedProduct < RetrievalDatabaseProduct % base class for any ANN-trained product % % NNTrainedProduct properties: % % targets - fields to be used as training targets % % NNTrainedProduct Methods: % % make_and_train_network - % loadnet - % storenet - % evaluate_test_data - % evaluate_and_summarise_testdata - % % See also: RetrievalDatabaseProduct, IceMusicNNIWP, CollocatedNNIWP, % compare_ice_retrievals, compare_collocated_iwp_products, % InstrumentVisualiser % Changes in SPARE-ICE: % % version 0.1: fixed bug in storage P_CLOUD: should be float, not int % version 0.2: fixed bug in lacking filtering for various flagged values % version 0.3: added surface elevation % version 0.4: set surface elevation to 0 if z<0 % version 0.5: include some measure of std.dev of z % version 0.5b: fixed re-introduced bug lacking filtering, new network % version 0.6: use less info for patternnet than for fitnet % version 0.7: add corrected meta-info % version 0.8: properties %selection net trdat % options for plotting scatter-plot opts = struct('trans', @(x)log10(x), ... 'invtrans', @(x)10.^(x), ... 'axprops', struct(... 'xscale', 'log', ... 'yscale', 'log', ... 'xgrid', 'on', ... 'xminorgrid', 'off', ... 'ygrid', 'on', ... 'yminorgrid', 'off'), ... 'bins', linspace(-1, 4, 50), ... 'medprops', struct('LineWidth', 2, 'LineStyle', '-', 'Color', [0 0 0]), ... 'diagonal', struct('LineWidth', 1, 'LineStyle', ':', 'Color', [0 0 0])) % chanstr; % freqstr; % hackish way; if an earlier version of the product had what's % needed, don't need to recollocate altfilename; altbasedir; % different versions with different network paths. % may be set to cell array of strings. networks; % to homog or not to homog, that is the question homog; end properties (Abstract) % How many entries to use at most in the training? If you have % more data, the rest will go into the testing data. The data in % maxsize will be divided between training and validation. maxsize % Structure with one field for each target. % Must have: % - transform transformation to apply, i.e. log10 % - invtransform inverse therefor, i.e. @(x)10.^x % - lims limits to apply before doing anything % May have: % - classify Also train a classification net with this as % the cloudy/cloudfree cutoff. % - bins Default bins over which to do statistics. % - relevant Minimum x-value to consider in bestnet. % - optimise_field What field (as returned by get_performance) % to optimise for in 'bestnet'. % % targets.IWP.transform = @log10 % targets.IWP.invtransform = @(x)10.^(x) % targets.IWP.lims = [realmin realbax] % targets.IWP.classify = 10; targets % Structure with one field for each input. % % inputs end methods function self = NNTrainedProduct(varargin) self = self@RetrievalDatabaseProduct(varargin{:}); self.setmembers(); self.setcols(); % self.freqstr = sprintf('%s ', self.freqs{self.chans}); % self.chanstr = sprintf('%d', self.chans); if isempty(self.homog) self.homog = false; end end function make_and_train_network(self, varargin) % make_and_train_network Create and train NN with self.data % % FORMAT % % nniwp.make_and_train_network([noise_level[, count]]) % % IN % % noise_level double Gaussian noise, σ. If % given, constant for all inputs. % If not given or [], take from % self.inputs.(field).noise. % % count integer Number of networks to train. % Defaults to 1. Can be set % higher to choose e.g. 'best of % 5'. [noise_level, count] = optargs(varargin, {[], 1}); if isempty(self.data) error(['atmlab:' mfilename ':nodata'], ... 'No data found. Did you run .getdata(...)?'); end %% prepare data, including noise (perhaps) logtext(atmlab('OUT'), 'Preparing data\n'); self.limit_data(); dat = self.data; %(self.selection, :); if self.homog dat = self.homog_inputs(dat, self.localcols); end [x_all, x_all_cols] = self.select_and_prepare_data(dat, self.localcols, self.inputs); [y, y_cols] = self.select_and_prepare_data(dat, self.localcols, self.targets); try y_class = self.get_classification(dat); classify = true; catch ME switch ME.identifier case {['atmlab:' mfilename ':noclassify']} classify = false; otherwise ME.rethrow(); end end % get rid of data ALREADY nan before flagging anything valid_overall = ~any(isnan(x_all), 1); % limit further to finite data % test using imag~=0 because isreal will return a scalar when % given an array x_all(imag(x_all)~=0) = NaN; y(imag(y)~=0) = NaN; %fin = all(isfinite(x_all), 1) & all(isfinite(y), 1); fin = all(isfinite(x_all), 1) & ... self.select_for_regression(y, y_cols); all(isfinite(y), 1); if ~all(fin) logtext(atmlab('OUT'), 'Further reducing to %d for finiteness\n', sum(fin)); end %x = x(:, fin); %y = y(:, fin); % Seed random number generator so I can later obtain the exact % some noise, should I want to seed = now(); rng(seed); if isempty(noise_level) % apply per field noise = bsxfun(@times, randn(size(x_all)), self.get_noise_vector(x_all_cols).'); elseif isscalar(noise_level) noise = randn(size(x_all)) * noise_level; else noise = bsxfun(@times, randn(size(x_all)), noise_level.'); end % make sure I keep track of noiseless data, store this along % with seed x_orig = x_all; x_all = x_all + noise; % might not use same inputs for both nets % any field in self.inputs that has self.inputs.(field).net = % 'fit' will only be used for fitting, and any field that has % 'pat' will only be used for pattern-matching [x_fit, x_fit_cols] = self.subselect_inputs(x_all, x_all_cols, self.inputs, 'fit'); [x_orig_fit, ~] = self.subselect_inputs(x_orig, x_all_cols, self.inputs, 'fit'); if classify [x_pat, x_pat_cols] = self.subselect_inputs(x_all, x_all_cols, self.inputs, 'pat'); [x_orig_pat, ~] = self.subselect_inputs(x_orig, x_all_cols, self.inputs, 'pat'); end logtext(atmlab('OUT'), 'Getting %d network(s)\n', count); % loaded = false; for net_count = 0:count-1 %% Bail out if no need for any network f = self.calculate_network_file_name(noise_level, net_count); if exist(f, 'file') && self.overwrite==0 % if loaded % continue % else logtext(atmlab('OUT'), 'Already exists, loading from file %s\n', f); self.loadnets(f); if isa(self.net, 'network') || (classify && ~isfield(self.net, 'pat')) logtext(atmlab('OUT'), ... 'On second thought, I lack a pattern net, retraining!\n'); % elseif length(self.net.pat.divideParam.trainInd) + length(self.net.pat.divideParam.valInd) + length(self.net.pat.divideParam.testInd) < length(self.data) % logtext(atmlab('OUT'), ... % 'On second thought, we have different data now, retraining"\n'); else loaded = true; continue; end % end end %% parallelise if matlabpool('size') == 0 matlabpool('open', 6); end %% Set up neural networks % Create a Fitting and a Pattern Network hiddenLayerSize = 10; %min([size(x, 1) 10]); logtext(atmlab('OUT'), 'Creating networks to store at %s, hidden layer %d nodes\n', f, hiddenLayerSize); fitter = fitnet(hiddenLayerSize); % Choose Input and Output Pre/Post-Processing Functions % For a list of all processing functions type: help nnprocess %self.net.inputs{1}.processFcns = {'removeconstantrows','mapminmax'}; %self.net.outputs{2}.processFcns = {'removeconstantrows','mapminmax'}; fitter.inputs{1}.processFcns = {'removeconstantrows','mapstd'}; fitter.outputs{2}.processFcns = {'removeconstantrows','mapstd'}; % Setup Division of Data for Training, Validation, Testing % For a list of all data division functions type: help nndivide fitter.divideFcn = 'divideind'; fitter.divideMode = 'sample'; % Divide up every sample % make my own division divall = zeros(1, size(y, 2)); rr = rand(1, size(y, 2)); trfrac = min([(self.maxsize * (2/3)) / size(y, 2), 0.5]); divall(rr <= trfrac) = 1; % train divall(rr > trfrac & rr <= trfrac*1.5) = 2; % val divall(rr > trfrac * 1.5) = 3; % test (not used in training) % but for the fitter, exclude nonfinite values fitter.divideParam.trainInd = find(divall==1 & fin); fitter.divideParam.valInd = find(divall==2 & fin); fitter.divideParam.testInd = find(divall==3 & fin); % fitter.divideParam.trainRatio = 50/100; if classify patter = patternnet(hiddenLayerSize); patter.divideFcn = 'divideind'; patter.divideMode = 'sample'; % for the patter, I should include cloud-free cases patter.divideParam.trainInd = find(divall==1 & valid_overall); patter.divideParam.valInd = find(divall==2 & valid_overall); patter.divideParam.testInd = find(divall==3 & valid_overall); patter.trainParam.show = 10; end % For help on training function 'trainlm' type: help trainlm % For a list of all training functions type: help nntrain % See http://www.mathworks.se/help/nnet/ug/speed-and-memory-comparison-for-training-multilayer-networks.html fitter.trainFcn = 'trainlm'; % Levenberg-Marquardt % choose trainscg, lower on memory. % self.net.trainFcn = 'trainscg'; % self.net.trainFcn = 'trainbr'; % Bayesian regularisation % self.net.trainFcn = 'trainbfg'; % BFGS quasi-Newton backpropagation fitter.trainParam.show = 10; fitter.efficiency.memoryReduction = 50; % Choose a Performance Function % For a list of all performance functions type: help nnperformance fitter.performFcn = 'mse'; % Mean squared error fitter.userdata.inputs = self.inputs; fitter.userdata.targets = self.targets; % Train the Networks logtext(atmlab('OUT'), 'Training fitting network (%d inputs, %d hidden, %d target; %d/%d/%d training elements)\n', ... size(x_fit, 1), fitter.layers{1}.size, ... size(y, 1), length(fitter.divideParam.trainInd), ... length(fitter.divideParam.valInd), ... length(fitter.divideParam.testInd)); [fitter, fit_trdat] = train(fitter, double(x_fit), double(y), ... 'useParallel', 'yes', ... 'showResources', 'yes', ... 'reduction', 100); if classify logtext(atmlab('OUT'), 'Training cloud-filter pattern network (%d inputs, %d hidden, %d target; %d/%d/%d training elements)\n', ... size(x_pat, 1), patter.layers{1}.size, ... size(y_class, 1), length(patter.divideParam.trainInd), ... length(patter.divideParam.valInd), ... length(patter.divideParam.testInd)); % for some reason, training a patternnet really doesn't % like to see NaNs even in part of the data that aren't % used, but set aside for testing or even not used at all. % As a workaround, put zeroes here instead, they aren't % used anyway...! new_y_class = zeros(size(y_class), 'double'); new_y_class(:, patter.divideParam.trainInd) = y_class(:, patter.divideParam.trainInd); new_y_class(:, patter.divideParam.valInd) = y_class(:, patter.divideParam.valInd); new_x_pat = zeros(size(x_pat), 'double'); new_x_pat(:, patter.divideParam.trainInd) = x_pat(:, patter.divideParam.trainInd); new_x_pat(:, patter.divideParam.valInd) = x_pat(:, patter.divideParam.valInd); [patter, pat_trdat] = train(patter, new_x_pat, new_y_class, ... 'useParallel', 'yes', ... 'showResources', 'yes', ... 'reduction', 60); end %[self.net, self.trdat] = train(self.net, x, y, 'useParallel', 'no', 'showResources', 'yes'); % storing full traindata, valdata, testdata with network. % The trainInd, valInd, testInd are only useful if I'm 100% % sure that I get the same data, and with making % subselections or getting more collocations, this is not % the case fitter.userdata.alldata = [x_orig_fit; y]; % fitter.userdata.traindata = [x_orig_fit(:, fit_trdat.trainInd); y(:, fit_trdat.trainInd)]; % fitter.userdata.valdata = [x_orig_fit(:, fit_trdat.valInd); y(:, fit_trdat.valInd)]; % fitter.userdata.testdata = [x_orig_fit(:, fit_trdat.testInd); y(:, fit_trdat.testInd)]; fitter.userdata.x_cols = x_fit_cols; fitter.userdata.y_cols = ... structfun(@(x)x+size(x_orig_fit, 1), ... y_cols, 'UniformOutput', false); if classify patter.userdata.traindata = ... [x_orig_pat(:, pat_trdat.trainInd); ... y_class(:, pat_trdat.trainInd)]; patter.userdata.valdata = ... [x_orig_pat(:, pat_trdat.valInd); ... y_class(:, pat_trdat.valInd)]; patter.userdata.testdata = ... [x_orig_pat(:, pat_trdat.testInd); ... y_class(:, pat_trdat.testInd)]; patter.userdata.cols = x_pat_cols; patter.userdata.randseed = seed; patter.userdata.noiselevel = noise_level; end fitter.userdata.randseed = seed; fitter.userdata.noiselevel = noise_level; logtext(atmlab('OUT'), 'Storing network\n'); logtext(atmlab('OUT'), 'Output: %s\n', f); fitter.userdata.stored = f; fitter.userdata.no = net_count; fitter.userdata.note = self.name; self.net = struct(); self.net.fit = fitter; self.trdat.fit = fit_trdat; if classify patter.userdata.stored = f; patter.userdata.no = net_count; patter.userdata.note = fitter.userdata.note; self.net.pat = patter; self.trdat.pat = pat_trdat; end self.storenets(f); end end function [y_class, y_dat] = get_classification(self, dat) % from data subselection, get classification targets % % must have exactly one classification target % % two outputs: % y_class [2 x N] matrix, cloud/cloudfree % y_data [1 x N] vector, corresponding measurement self.localcols; tgs = fieldnames(self.targets); y_class = zeros(2, size(dat, 1)); done = false; for i = 1:length(tgs) tg = tgs{i}; if isfield(self.targets.(tg), 'classify') && self.targets.(tg).classify if done error(['atmlab:' mfilename ':twofields'], ... 'Two fields with classify?!'); end y_class(1, :) = dat(:, self.localcols.(tg))<=self.targets.(tg).classify; y_class(2, :) = dat(:, self.localcols.(tg))>self.targets.(tg).classify; y_dat = dat(:, self.localcols.(tg)); % FIXME: why do I get any where both are false? done = true; end end if ~done error(['atmlab:' mfilename ':noclassify'], ... 'No fields with classify?!'); end end function storenets(self, path) % store artificial neural network and training data to 'path' nt = self.net; %#ok tr = self.trdat; %#ok save(path, 'nt', 'tr'); end function loadnets(self, varargin) % Loads previously-trained neural-net % % FORMAT % % nn.loadnets(p) % % IN % % p either full path or string corresponding to one of the % members of self.networks, which translates to a full % path. Can also be omitted completely, will then try % to load the latest version based on self.version. if isempty(self.version) narginchk(2, 2); p = varargin{1}; else narginchk(1, 2); p = optargs(varargin, {['version_', strrep(self.version, '.', '_')]}); end if isfield(self.networks, p) path = self.networks.(p); else path = p; end logtext(atmlab('OUT'), 'Loading network and training info from %s\n', ... path); self.net = loadvar(path, 'nt'); self.trdat = loadvar(path, 'tr'); ud = self.net.fit.userdata; if isempty(fieldnames(self.inputs)) self.inputs = ud.inputs; self.setmembers(); self.setcols(); else if ~isequal(self.inputs, ud.inputs) error(['atmlab:' mfilename ':inconsistent'], ... 'Inconsistent input matrices!'); end end end function [best, perfs] = bestnet(self, noise_level) % when multiple nets were trained, select the best one logtext(atmlab('OUT'), 'Selecting best-performing network\n'); % consider all targets targs = fieldnames(self.targets); % infinite loop to count how many networks we have total = 0; while true f = self.calculate_network_file_name(noise_level, total); if ~exist(f, 'file') break end total = total + 1; end perfs = cell(total, length(targs)); %perf = zeros(total, length(targs)); perf_fit = zeros(total, length(targs)); classify = zeros(length(targs), 1); for i = 0:total-1 f = self.calculate_network_file_name(noise_level, i); self.loadnets(f); % What input to test the performance for? How to % weigh 'best' when there are different inputs? for t = 1:length(targs) targ = targs{t}; [xx, p] = self.get_performance(noise_level, targ, ... self.targets.(targ).bins); perfs{i+1, t} = {xx, p}; perf(i+1, t) = p; classify(t) = self.targets.(targ).classify; if classify(t) [~, fp, fn] = self.test_patternnet_performance(); classify(t) = true; perf_fit(i+1, t) = mean(fp+fn); end end end quals = cell(length(targs), 1); for t = 1:length(targs) targ = targs{t}; relevant = (all([perf(:, t).count]>100, 2)); if isfield(self.targets.(targ), 'relevant') relevant = relevant & (perfs{1, t}{1}>self.targets.(targ).relevant); end allperf = [perf(:, t).(safegetfield(self.targets.(targ), 'optimise_field', 'rel_medfracerr'))]; quals{t} = mean(allperf(relevant, :)); if classify(t) quals{t} = quals{t} + vec2row((perf_fit(:, t)./median(perf_fit(:, t)) > 1.2) * 100); % this means the cloud filter went horribly wrong end end [~, best] = min(mean(vertcat(quals{:}), 1)); best = best - 1; % count from 0 % load the best net self.loadnets(self.calculate_network_file_name(noise_level, best)); end function [x, y_ref, y_retr, x_cols, y_cols, y_retr_nonoise] ... = evaluate_test_data(self, varargin) % evaluates test-data for NN % % FORMAT % % [x, y_ref, y_retr, x_cols, y_cols, y_retr_nonoise] = prod.evaluate_test_data(noise_level) % % IN % % noise_level double noise, optional, defaults to % training noise % % OUT % % x % y_ref % y_retr % x_cols % y_cols ud = self.net.fit.userdata; indy_dat = self.data(self.trdat.fit.testInd, :); [y_ref, y_cols] = get_columns(indy_dat, ... self.localcols, ... fieldnames(ud.targets)); [x, x_cols] = self.select_and_prepare_data(indy_dat, self.localcols, self.inputs); x = x.'; %[x, x_cols] = get_columns(indy_dat, ... % self.localcols, ... % structfun(@(X) safegetfield(X, 'chans', -1), ud.inputs, 'UniformOutput', false)); if isfield(ud, 'alldata') x_cols = ud.x_cols; seed = ud.randseed; [xx, xx_cols] = self.select_and_prepare_data(... ud.alldata.', ud.x_cols, ud.inputs); xx = xx(:, self.trdat.fit.testInd).'; [yy_ref, yy_cols] = self.select_and_prepare_data(... ud.alldata.', ud.y_cols, ud.targets); yy_ref = yy_ref(:, self.trdat.fit.testInd).'; assert(isequal(xx, x)); % targs = fieldnames(y_cols); % for t=1:length(targs) % targ = targs{t}; % y_ref(:, t) = self.targets.(targ).invtransform(y_ref(:, t)); % end end rng('shuffle'); noise_level = optargs(varargin, {ud.noiselevel}); % FIXME: vertical correlation in error... if isempty(x) noise = x; elseif isscalar(noise_level) noise = randn(size(x)) * noise_level; elseif isempty(noise_level) noise = bsxfun(@times, randn(size(x)), self.get_noise_vector(x_cols)); else noise = bsxfun(@times, randn(size(x)), noise_level); end y_retr_raw = sim(self.net.fit, (x + noise).').'; y_retr_raw_nonoise = sim(self.net.fit, x.').'; % invert for each retrieved field y_retr = y_retr_raw; y_retr_nonoise = y_retr_raw_nonoise; fns = fieldnames(ud.targets); for i = 1:length(fns) fn = fns{i}; y_retr(:, y_cols.(fn)) = ud.targets.(fn).invtransform(y_retr_raw(:, y_cols.(fn))); y_retr_nonoise(:, y_cols.(fn)) = ud.targets.(fn).invtransform(y_retr_raw_nonoise(:, y_cols.(fn))); end %y_retr = invtrans(y_retr_raw); %y_retr = y_retr(:); end function dev_selftest = evaluate_and_summarise_testdata(self, varargin) % evaluate test-data and perform several statistical tests % % % - make a scatter density plot % % IN % % noise_level % % optional, defaults to same as training. % Can be either a scalar, or a % vector corresponding to the number of channels, or [] % which means same as training. % % See also: scatter_density_plot, plot_scatter_perf noise_simul = optargs(varargin, {self.net.userdata.noiselevel}); %% self-test figure('visible', 'off'); [x, y] = self.evaluate_test_data(noise_simul); filt = all(x>0, 2) & ... all(y>0, 2) & ... all(isfinite(x), 2) & ... all(isfinite(y), 2); [h_ax, h_cb] = self.plot_scatter_perf(x(filt), y(filt)); ylabel(h_cb, 'Number of retrievals'); xlabel(h_ax, 'Independent IWP [g/m^2]'); ylabel(h_ax, 'Retrieved IWP [g/m^2]'); if isfield(self.net.userdata, 'realnoise') && self.net.userdata.realnoise train_noise_str = 'realnoise'; elseif isscalar(self.net.userdata.noiselevel) train_noise_str = sprintf('%.2fK', self.net.userdata.noiselevel); else train_noise_str = [sprintf('%.2f,', self.net.userdata.noiselevel) 'K']; end if isequal(noise_simul, self.net.userdata.noiselevel) simul_noise_str = 'trainnoise'; elseif isscalar(noise_simul) simul_noise_str = sprintf('%.2fK', noise_simul); else simul_noise_str = [sprintf('%.2f,', noise_simul) 'K']; end title(sprintf('NN-IWP self-test, Evans data, noise %s/%s\nchannels %s', train_noise_str, simul_noise_str, self.freqstr)); print(gcf(), '-depsc', fullfile(cscol('plot_base'), sprintf('nniwp_evansdb_selftest_noise%s_%s_chans%s.eps', ... train_noise_str, simul_noise_str, self.chanstr))); %% calculate median fractional error per bin, store results in plotdata xbins = self.opts.bins; dev_selftest = self.binned_median_fractional_error(x(filt), y(filt), xbins); M = [xbins.', dev_selftest.']; %#ok of = fullfile(cscol('plotdata_base'), sprintf('nniwp_evansdb_selftest_fracerr_noise%s_%s_chans%s.dat', ... train_noise_str, simul_noise_str, self.chanstr)); save(of, 'M', '-ascii'); end function [xx, S] = get_performance(self, noise_level, varargin) % get many performance statistics % % Default, old-style, is hard-coded logarithmically for IWP. % For new behaviour, pass appropriate variables. % % IN % % noise_level % variable (must be in self.targets) % bins [x, y_ref, y_retr, x_cols, y_cols] = self.evaluate_test_data(noise_level); filt = (y_ref>0) & (y_retr>0) & isfinite(y_ref) & isfinite(y_retr); [variable, bins] = optargs(varargin, {'IWP', []}); y_ref = y_ref(all(filt, 2), y_cols.(variable)); y_retr = y_retr(all(filt, 2), y_cols.(variable)); if isempty(bins) bins = floor(min(log10(y_ref(:)))):.2:ceil(max(log10(y_ref(:)))); end bins = vec2col(bins); func = self.targets.(variable).transform; defunc = self.targets.(variable).invtransform; % calculate different ways to get performance S.abs_medad = cellfun(@(yy) mad(yy, 1), bin(func(y_ref), y_retr, bins)); S.abs_meanad = cellfun(@(yy) mad(yy, 0), bin(func(y_ref), y_retr, bins)); S.abs_rmeanse = cellfun(@(dy) rms(dy), bin(func(y_ref), y_retr-y_ref, bins)); S.abs_rmedse = cellfun(@(dy) sqrt(median(dy.^2)), bin(func(y_ref), y_retr-y_ref, bins)); S.abs_ir = cellfun(@iqr, bin(func(y_ref), y_retr, bins)); %S.rel_medfracerr = cellfun(@median, bin(func(y_ref), abs(y_retr-y_ref)./y_ref, bins)); S.rel_medfracerr = cellfun(@median, bin(func(y_ref), defunc(abs(func(y_retr) - func(y_ref)))-1, bins)); S.abs_medfracerr = S.rel_medfracerr .* defunc(bins); S.rel_rmeanse_frac = cellfun(@(dy) rms(dy), bin(func(y_ref), abs(y_retr-y_ref)./y_ref, bins)); S.abs_rmeanse_frac = S.rel_rmeanse_frac .* defunc(bins); S.count = cellfun(@(yy) size(yy, 1), bin(func(y_ref), y_retr, bins)); xx = defunc(bins); end function [bn, fp, fn, fp_med_ref, fp_med_retr, fn_med_ref, fn_med_retr] = test_patternnet_performance(self) % Test how well the cloudy/cloudfree recognition works % %p = self.net.pat; % % Returns: % % bins, false positives, false negatives [y_class, y_class_data] = self.get_classification(self.data); ref_clear = y_class(1, self.net.pat.divideParam.testInd); ref_cloudy = y_class(2, self.net.pat.divideParam.testInd); ref_data = y_class_data(self.net.pat.divideParam.testInd); [x, x_cols] = self.select_and_prepare_data(self.data, self.localcols, self.inputs); x_pat = self.subselect_inputs(x, x_cols, self.inputs, 'pat'); x_fit = self.subselect_inputs(x, x_cols, self.inputs, 'fit'); yclrt = self.net.pat(x_pat(:, self.net.pat.divideParam.testInd)); % use the pattern testInd for the fit network here, because I'm % interested in retrieved IWP in particular where the reference % is 0 ydatrt = 10.^self.net.fit(x_fit(:, self.net.pat.divideParam.testInd)); valid = (isfinite(ref_data)&ref_data>=0).'; bn = linspace(0, 1, 70); fp = zeros(size(bn)); fn = zeros(size(bn)); fp_med_ref = zeros(size(bn)); fp_med_retr = zeros(size(bn)); fn_med_ref = zeros(size(bn)); fn_med_retr = zeros(size(bn)); for i = 1:length(bn) frc = bn(i); retr_cloudy = yclrt(2, :) > frc; retr_clear = ~retr_cloudy; fp(i) = sum(retr_cloudy & ref_clear & valid)./length(find(valid)); fn(i) = sum(retr_clear & ref_cloudy & valid)./length(find(valid)); fp_med_ref(i) = median(ref_data(valid & retr_cloudy & ref_clear)); fp_med_retr(i) = median(ydatrt(valid & retr_cloudy & ref_clear)); fn_med_ref(i) = median(ref_data(valid & ref_cloudy & retr_clear)); fn_med_retr(i) = median(ydatrt(valid & ref_cloudy & retr_clear)); end end function [ptls, ptls_nonoise] = iwc_profile_performance(self, ptiles) % get profile performance, return percentiles for ratio iwc/ref % % profile_performace([1, 25, 50, 75, 99]) % % See also: prctile [x, y_ref, y_retr, x_cols, y_cols, y_retr_nonoise] = self.evaluate_test_data(); fy = y_ref(:, y_cols.IWC) ./ y_retr(:, y_cols.IWC); fy_nonoise = y_ref(:, y_cols.IWC) ./ y_retr_nonoise(:, y_cols.IWC); ptls = prctile(fy, ptiles); ptls_nonoise = prctile(fy_nonoise, ptiles); end function dy = akm(self) % get averaging kernel matrix for all testdata % % OUT % % dy matrix of size (N, p, q) with averaging kernel matrix % per measurement, perturbed per noise-level. Resulting % fractional change in IWC. % construct a N x p x p matrix where each page is n_inputs = self.net.inputs{1}.size; n_outputs = self.net.outputs{end}.size; n_ret = length(self.trdat.testInd); [x_unpert, x_cols] = get_columns(self.data(self.trdat.testInd, :), ... self.localcols, ... fieldnames(self.inputs)); dx = self.get_noise_vector(x_cols); bsxfun(@times, eye(n_inputs), dx); x_pert = bsxfun(@plus, ... repmat(x_unpert, [1, 1, n_inputs]), ... reshape(bsxfun(@times, eye(n_inputs), dx), ... [1, n_inputs, n_inputs])); % x_pert = bsxfun(@times, ... % repmat(x_unpert, [1, 1, n_inputs]), ... % reshape(1 + eye(n_inputs)*pert_frac, [1, n_inputs, n_inputs])); y_unpert = sim(self.net, x_unpert.').'; y_pert = permute(reshape(self.net(reshape(permute(x_pert, [2, 3, 1]), ... [n_inputs, n_inputs*n_ret, 1])), ... [n_outputs, n_inputs, n_ret]), ... [3, 1, 2]); dy = 10.^bsxfun(@minus, y_pert, y_unpert)-1; end function plot_akm(self, dy) % plot akm % FIXME: move to other class n_inputs = size(dy, 2); n_outputs = size(dy, 1); %pcolor(1:n_inputs, linspace(4.5, 15, n_outputs), squeeze(dy(1, :, :))); sanepcolor(1:n_inputs, linspace(4.5, 15, n_outputs), dy); shading faceted; colormap(drywet(0.1)); cb = colorbar(); ylabel(cb, '%IWC change per noise level measurement'); zerobright(); hold on; %plot([12, 12], [4.5, 15], 'LineWidth', 2.5, 'Color', 'black'); xlabel(sprintf('Channel (%s)', strjoin(vec2row(fieldnames(self.inputs)), ', '))); ylabel('Height [km]'); title(['Sensitivity analysis ' strrep(self.name, '_', ' ')]); save_figure_multi(gcf(), fullfile(cscol('plot_base'), [self.name '_mean_akm']), 'png', 'eps', 'fig'); end function nm = calculate_network_file_name(self, noise_level, i) inps = fieldnames(self.inputs); tgts = fieldnames(self.targets); inpcell = cell(size(inps)); for k = 1:length(inpcell) inp = inps{k}; if isfield(self.inputs.(inp), 'chans') inpcell{k} = sprintf('%s,%s', ... inp, ... strjoin(arrayfun(@num2str, ... self.inputs.(inp).chans, ... 'UniformOutput', false), ... ',')); else inpcell{k} = inp; end end tgtcell = cell(size(tgts)); for k = 1:length(tgtcell) tgt = tgts{k}; if isfield(self.targets.(tgt), 'regression_range') tgtcell{k} = sprintf('%s,%.4g-%.4g', ... tgt, ... self.targets.(tgt).regression_range(1), ... self.targets.(tgt).regression_range(2)); else tgtcell{k} = tgt; end end if length(unique(noise_level))==1 noise_str = sprintf('%.2f', unique(noise_level)); else noise_str = sprintf('%2f,', noise_level); end nm = sprintf('/scratch/uni/u237/user_data/gholl/neuralnets/%s_%s_to_%s_noise%s_v%s_%d.mat', ... self.name, ... sprintf('%s,', strjoin(vec2row(inpcell), ',')), ... sprintf('%s,', strjoin(vec2row(tgtcell), ',')), ... noise_str, ... self.version, ... i); end function res = retrieve(self, data, cols, varargin) % retrieve data % % iwp = ccniwp.retrieve(data, cols[, cutoff]) % % This method: % % - applies limitations (lat/lon) % - applies transforms and invtransforms % - applies both neural networks % - decides cloud/cloudfree with 'cutoff' % - returns retrieved IWP in g/m^2 % % IN % % data matrix with data, must have at least lat/lon and % all the inputs % cols description of columns. NB: must match 'inputs' % cutoff cloud/nocloud? Defaults to 0.5 % % OUT % % res - structure with many fields, depends on the exact % product cutoff = optargs(varargin, {0.5}); if isequal(self.net, []) logtext(atmlab('OUT'), 'No networks loaded yet, will load default one for this version\n'); self.loadnets(); end ud = self.net.fit.userdata; [x, x_cols] = self.select_and_prepare_data(data, cols, ud.inputs); flds = fieldnames(x_cols); for i = 1:length(flds) fld = flds{i}; if isfield(ud.inputs.(fld), 'lims') lims.(fld) = ud.inputs.(fld).lims; end end % FIXME still hardcoded; somtimes wrong in neural net lims.B_SZA = [0, 85]; lims.B_LAA = [-180, 180]; lims.B_SAA = [-180, 180]; %limmer = collocation_restrain(inp, limstruct2limmat(lims, inp_cols)); limmer = collocation_restrain(x.', limstruct2limmat(getfields(lims, intersect(fieldnames(lims), fieldnames(x_cols))), x_cols)); %inp = inp(limmer, :); x = x(:, limmer); data = data(limmer, :); % for lat/lon/time still ekep track lat = data(:, cols.LAT1); lon = data(:, cols.LON1); %cloudpat = self.net.pat(inp.').'; cloudpat = self.net.pat(self.subselect_inputs(x, x_cols, ud.inputs, 'pat')).'; cloudy = cloudpat(:, 2)>cutoff; %iwp = self.net.fit(inp.').'; iwp = self.net.fit(self.subselect_inputs(x, x_cols, ud.inputs, 'fit')).'; % FIXME: not so hardcoded to MEAN_IWP_2C please? res.raw_iwp = ud.targets.MEAN_IWP_2C.invtransform(iwp); res.iwp = res.raw_iwp; res.iwp(~cloudy) = 0; res.cloud_probability = cloudpat(:, 2); res.lat = lat; res.lon = lon; res.time = data(:, cols.TIME1); % also put inputs here inps = fieldnames(self.inputs); for i = 1:length(inps) inp = inps{i}; if isfield(cols, inp) res.(inp) = data(:, cols.(inp)); else res.(inp) = x(x_cols.(inp), :); end end % res.MEAN_AVHRR_Y = data(:, cols.MEAN_AVHRR_Y); % res.AVHRR_FLAG_3AB = data(:, cols.AVHRR_FLAG_3AB); % res.MHS = data(:, cols.B_BT(3:5)); % res.B_LZA = data(:, cols.B_LZA); % res.B_LAA = data(:, cols.B_LAA); % res.B_SZA = data(:, cols.B_SZA); % res.B_SAA = data(:, cols.B_SAA); % res.Surface_elevation = x(x_cols.Surface_elevation, :); % if isfield(x_cols, 'Surface_elevation_std') % res.Surface_elevation_std = x(x_cols.Surface_elevation_std, :); % end end function [data, info] = retrieve_granule(self, granule, spec) % res = retrieve_granule(granule, spec) % % See also: retrieve, retrieve_and_store_granule altfile = self.get_alt_file(granule, spec); if isempty(altfile) gotdata = false; else S = self.read_alt_granule(granule, spec); if isempty(S.TIME) logtext(atmlab('OUT'), 'Read no data, skipping\n') data = []; info = struct(); return else gotdata = true; % [M, cc] = self.struct2mat(getfields(S, intersect(fieldnames(S), fieldnames(self.cols)))); [M, cc] = self.struct2mat(S); end end if ~gotdata D = datasets(); [result, additional_results, ~] = D.collocation_mhs_avhrr.collocate_granule(granule, spec, spec, {D.associated_mhs_avhrr, D.collapsed_mhs_avhrr}, false); % lockfile = get_lock(atmlab('WORK_AREA'), 'collocations.lock'); % cl1 = onCleanup(@()delete(lockfile)); if isempty(result) logtext(atmlab('ERR'), 'Found no data, skipping\n'); data = []; info = struct(); return end [M, cc] = D.associated_mhs_avhrr.merge_matrix(result, D.collocation_mhs_avhrr.cols, additional_results{1}, D.associated_mhs_avhrr.cols); clear result; [M, cc] = D.collapsed_mhs_avhrr.merge_matrix(M, cc, additional_results{2}, D.collapsed_mhs_avhrr.cols); clear additional_results; % logtext(atmlab('OUT'), 'Clearing lockfile\n'); % delete(lockfile); end logtext(atmlab('OUT'), 'Retrieving %d values\n', size(M, 1)); res = self.retrieve(M, cc, 0.5); logtext(atmlab('OUT'), 'Done\n'); % FIXME: make this more flexible, for going back to older % versions etc. data = zeros(size(res.iwp, 1), max(structfun(@(X)max(X), self.cols))); data(:, self.cols.LAT) = res.lat; data(:, self.cols.LON) = res.lon; data(:, self.cols.TIME) = res.time; data(:, self.cols.P_CLOUD) = res.cloud_probability; data(:, self.cols.IWP_RAW) = res.raw_iwp; data(:, self.cols.IWP) = res.iwp; %data(:, self.cols.MEAN_AVHRR_Y) = res.MEAN_AVHRR_Y; % and the inputs inps = fieldnames(self.inputs); for i = 1:length(inps) inp = inps{i}; if isfield(self.cols, inp) if (min(size(res.(inp))) ~= size(self.cols.(inp), 2)) && max(size(res.(inp))) > 3 data(:, self.cols.(inp)) = res.(inp)(:, self.inputs.(inp).chans); else data(:, self.cols.(inp)) = res.(inp); end end end % data(:, self.cols.AVHRR_FLAG_3AB) = res.AVHRR_FLAG_3AB; % data(:, self.cols.MHS) = res.MHS; % data(:, self.cols.B_LZA) = res.B_LZA; % data(:, self.cols.B_LAA) = res.B_LAA; % data(:, self.cols.B_SZA) = res.B_SZA; % data(:, self.cols.B_SAA) = res.B_SAA; % data(:, self.cols.Surface_elevation) = res.Surface_elevation; % if isfield(res, 'Surface_elevation_std') && isfield(self.cols, 'Surface_elevation_std') % data(:, self.cols.Surface_elevation_std) = res.Surface_elevation_std; % end % FIXME: less hard-coded please info.Conventions = 'CF-1.6'; info.title = 'AVHRR+MHS IWP trained by collocations with 2C-ICE'; info.version = self.version; %info.history = self.changelog; info.source = 'Retrieved from collocated CPR/MHS 2C-ICE-trained ANN'; info.nnet = self.net.pat.userdata.stored; info.references = 'Holl, G., S. Eliasson, J. Mendrok, and S. A. Buehler (submitted 2013), SPARE-ICE: synergistic IWP from passive operational sensors, J. Geophys. Res.'; % info.stored_lims = struct2string_compact(storelims); % FIXME: more additional info end function retrieve_and_store_granule(self, granule, spec) % ccniwp.retrieve_and_store_granule(granule, spec) % % Apply neural network retrieval to 'granule' / 'spec' and % store result according to self.find_granule_by_datetime. % If a retrieval has been performed previously for this % granule, this method reads the input data from the earlier % version. Otherwise, it will read all inputs needed, perform % collocations (this will take some minutes). Then, it applies % the neural network (loaded via self.loadnets) and stores the % result. % % IN % % granule datetime-vector describing granule start time % spec string describing satellite (e.g. 'noaa18') % % See also: retrieve, loadnets % % Example: D.col_syn_iwp.retrieve_and_store_granule([2007, 8, 1, 1, 48], 'noaa18'); outfile = self.find_granule_by_datetime(granule, spec); if exist(outfile, 'file') && ~self.overwrite logtext(atmlab('OUT'), 'Already exists: %s, continue\n', outfile); return end % if exist(outfile, 'file') % switch self.overwrite % case 0 % case 1 % continue normally % gotdata = false; % case 2 % S = self.read_granule(granule, spec, fieldnames(self.cols)); % if isempty(S.time) % no data % logtext(atmlab('OUT'), 'Read no data, skipping\n') % return % end % [M, cc] = self.struct2mat(S); % gotdata = true; % end % else % altfile = self.get_alt_file(granule, spec); % if isempty(altfile) % gotdata = false; % else % S = self.read_alt_granule(granule, spec); % if isempty(S.TIME) % logtext(atmlab('OUT'), 'Read no data, skipping\n') % % return % else % gotdata = true; % [M, cc] = self.struct2mat(S); % end % end % end % if ~gotdata % D = datasets(); % % [result, additional_results, also] = D.collocation_mhs_avhrr.collocate_granule(granule, spec, spec, {D.associated_mhs_avhrr, D.collapsed_mhs_avhrr}, false); % lockfile = get_lock(atmlab('WORK_AREA'), 'collocations.lock'); % cl1 = onCleanup(@()delete(lockfile)); % if isempty(result) % logtext(atmlab('ERR'), 'Found no data, skipping\n'); % return % end % [M, cc] = D.associated_mhs_avhrr.merge_matrix(result, D.collocation_mhs_avhrr.cols, additional_results{1}, D.associated_mhs_avhrr.cols); % clear result; % [M, cc] = D.collapsed_mhs_avhrr.merge_matrix(M, cc, additional_results{2}, D.collapsed_mhs_avhrr.cols); % clear additional_results; % logtext(atmlab('OUT'), 'Clearing lockfile\n'); % delete(lockfile); % end % % logtext(atmlab('OUT'), 'Retrieving %d values\n', size(M, 1)); % res = self.retrieve(M, cc, 0.5); % logtext(atmlab('OUT'), 'Done\n'); % % % % FIXME: make this more flexible, for going back to older % % versions etc. % data = zeros(size(res.iwp, 1), 20); % data(:, self.cols.LAT) = res.lat; % data(:, self.cols.LON) = res.lon; % data(:, self.cols.TIME) = res.time; % data(:, self.cols.P_CLOUD) = res.cloud_probability; % data(:, self.cols.IWP_RAW) = res.raw_iwp; % data(:, self.cols.IWP) = res.iwp; % data(:, self.cols.MEAN_AVHRR_Y) = res.MEAN_AVHRR_Y; % data(:, self.cols.AVHRR_FLAG_3AB) = res.AVHRR_FLAG_3AB; % data(:, self.cols.MHS) = res.MHS; % data(:, self.cols.B_LZA) = res.B_LZA; % data(:, self.cols.B_LAA) = res.B_LAA; % data(:, self.cols.B_SZA) = res.B_SZA; % data(:, self.cols.B_SAA) = res.B_SAA; % data(:, self.cols.Surface_elevation) = res.Surface_elevation; % data(:, self.cols.Surface_elevation_std) = res.Surface_elevation_std; % % % FIXME: less hard-coded please % info.title = 'AVHRR+MHS IWP trained by collocations with 2C-ICE'; % info.version = 1; % %info.history = self.changelog; % info.source = 'Retrieved from collocated CPR/MHS 2C-ICE-trained ANN'; % info.nnet = self.net.pat.userdata.stored; % % info.stored_lims = struct2string_compact(storelims); % % FIXME: more additional info [data, info] = self.retrieve_granule(granule, spec); if isempty(data) % no data logtext(atmlab('OUT'), 'Got no data, therefore not storing\n'); return end logtext(atmlab('OUT'), 'Storing\n'); % ugly hack; I don't want to extend, but rewrite this one, % therefore temporarily changing self.overwrite if self.overwrite == 2 self.overwrite = 1; self.store(granule, spec, data, info); self.overwrite = 2; else self.store(granule, spec, data, info); end end function retrieve_and_store_period(self, date1, date2, spec) D = datasets(); % weirdness, see http://stackoverflow.com/q/19406551/974555 m = D.mhs; allgrans = cached_evaluation(@m.find_granules_for_period, date1, date2, spec, 'EXTRA', self.name, m.name); for i = 1:size(allgrans, 1) logtext(atmlab('OUT'), 'Processing: %s\n', num2str(allgrans(i, :))); try self.retrieve_and_store_granule(allgrans(i, :), spec); catch ME switch ME.identifier case {'atmlab:find_granule_by_datetime', 'atmlab:collocate', 'atmlab:CollocatedDataset:noother', 'MATLAB:nomem'} logtext(atmlab('ERR'), 'Cannot retrieve for %s %s: %s\n', ... spec, num2str(allgrans(i, :)), ME.message); continue otherwise ME.rethrow(); end end end end function setcols(self) fs = fieldnames(self.members); N = 1; for i = 1:length(fs) f = fs{i}; if isfield(self.members.(f), 'dims') L = self.members.(f).dims{2}; else L = 1; end self.cols.(f) = N:(N+L-1); N = N + L; end end function setmembers(self) % FIXME: this should be more flexible... self.members.LAT.type = 'float'; self.members.LAT.atts.long_name = 'Latitude'; self.members.LAT.atts.valid_range = [-90 90]; self.members.LAT.atts.units = 'degrees_north'; self.members.LON.type = 'float'; self.members.LON.atts.long_name = 'Longitude'; self.members.LON.atts.units = 'degrees_east'; self.members.LON.atts.valid_range = [-180 180]; self.members.TIME.type = 'int'; self.members.TIME.atts.long_name = 'Measurement time'; self.members.TIME.atts.units = 'seconds since 1970-01-01 00:00:00'; self.members.P_CLOUD.type = 'float'; self.members.P_CLOUD.atts.long_name = 'Cloud probability'; self.members.P_CLOUD.atts.valid_range = [0, 1]; self.members.IWP_RAW.type = 'float'; self.members.IWP_RAW.atts.long_name = 'Raw retrieved Ice Water Path, not considering cloud probability'; self.members.IWP_RAW.atts.valid_min = 0; self.members.IWP_RAW.atts.units = 'g/m^2'; self.members.IWP_RAW.atts.standard_name = 'atmosphere_cloud_ice_content'; self.members.IWP.type = 'float'; self.members.IWP.atts.long_name = 'Ice Water Path'; self.members.IWP.atts.valid_min = 0; self.members.IWP.atts.units = 'g/m^2'; self.members.IWP.atts.standard_name = 'atmosphere_cloud_ice_content'; inp_fields = setdiff(fieldnames(self.inputs), {'LAT1'}); for i = 1:length(inp_fields) f = inp_fields{i}; if isfield(self.inputs, f) && isfield(self.inputs.(f), 'stored') self.members.(f) = self.inputs.(f).stored; if isfield(self.inputs.(f), 'chans') self.members.(f).dims = {[f, '_CHANS'], length(self.inputs.(f).chans)}; end end end % self.members.MEAN_AVHRR_Y.type = 'float'; % self.members.MEAN_AVHRR_Y.atts.long_name = 'AVHRR measurement averaged over MHS footprint'; % self.members.MEAN_AVHRR_Y.atts.valid_min = 0; % self.members.MEAN_AVHRR_Y.atts.units = 'reflectance or BT [K]'; % self.members.MEAN_AVHRR_Y.atts.origin = 'Based on AVHRR l1a data from NOAA CLASS archive'; % self.members.MEAN_AVHRR_Y.dims = {'AVHRR_CHANS', 5}; % % self.members.AVHRR_FLAG_3AB.type = 'int'; % self.members.AVHRR_FLAG_3AB.atts.long_name = 'AVHRR-3 flag: 3A/3B'; % self.members.AVHRR_FLAG_3AB.atts.valid_range = [0, 1]; % self.members.AVHRR_FLAG_3AB.atts.origin = 'Based on AVHRR L1 data from NOAA CLASS archive'; % % self.members.MHS.type = 'float'; % self.members.MHS.atts.long_name = 'MHS radiance, channels 3-5'; % self.members.MHS.atts.valid_range = [100, 400]; % self.members.MHS.atts.units = 'K'; % self.members.MHS.atts.origin = 'Copied from MHS L1 data from NOAA CLASS archive'; % self.members.MHS.dims = {'MHS_HUM_CHANS', 3}; % % self.members.B_LZA.type = 'float'; % self.members.B_LZA.atts.long_name = 'MHS local zenith angle'; % self.members.B_LZA.atts.valid_range = [0, 180]; % self.members.B_LZA.atts.units = 'degrees'; % self.members.B_LZA.atts.origin = self.members.MHS.atts.origin; % % self.members.B_LAA.type = 'float'; % self.members.B_LAA.atts.units = 'degrees'; % self.members.B_LAA.atts.long_name = 'MHS local azimuth angle'; % self.members.B_LAA.atts.valid_range = [0, 360]; % self.members.B_LAA.atts.origin = self.members.MHS.atts.origin; % % self.members.B_SZA.type = 'float'; % self.members.B_SZA.atts.units = 'degrees'; % self.members.B_SZA.atts.long_name = 'Solar zenith angle'; % self.members.B_SZA.atts.valid_range = [0, 180]; % self.members.B_SZA.atts.origin = self.members.MHS.atts.origin; % % self.members.B_SAA.type = 'float'; % self.members.B_SAA.atts.units = 'degrees'; % self.members.B_SAA.atts.long_name = 'Solar azimuth angle'; % self.members.B_SAA.atts.valid_range = [0, 360]; % self.members.B_SAA.atts.origin = self.members.MHS.atts.origin; % % self.members.Surface_elevation.type = 'float'; % self.members.Surface_elevation.atts.units = 'km'; % self.members.Surface_elevation.atts.long_name = 'Surface_elevation'; % self.members.Surface_elevation.atts.valid_range = [0, 10]; % self.members.Surface_elevation.atts.origin = 'NOAA ETOPO1 1 Arc-Minute Global Relief Model'; % self.members.Surface_elevation_std.type = 'float'; % self.members.Surface_elevation_std.atts.units = 'km'; % self.members.Surface_elevation_std.atts.long_name = 'Surface_elevation_std'; % self.members.Surface_elevation_std.atts.description = 'Standard deviation of 9 surface elevations in gridbox + neighbours'; % self.members.Surface_elevation_std.atts.valid_min = 0; % self.members.Surface_elevation.atts.origin = 'Based on NOAA ETOPO1 1 Arc-Minute Global Relief Model'; end function cc = fix_cols_in_postprocessing(~, cc) end function [x_new, x_new_cols] = subselect_inputs(~, x_all, x_all_cols, inps, mod) flds = fieldnames(inps); x_new = zeros(size(x_all)); k = 0; for i = 1:length(flds) fld = flds{i}; if isfield(inps.(fld), 'net') if ~isequal(inps.(fld).net, mod) continue end end % chans are already taken care of in % select_and_prepare_data! % if isfield(inps.(fld), 'chans') % chans = inps.(fld).chans; % else % chans = 1:length(x_all_cols.(fld)); % end %newslc = (k+1):(k+length(x_all_cols.(fld))); newslc = (k+1):(k+length(x_all_cols.(fld))); %oldslc = par(x_all_cols.(fld), chans); x_new(newslc, :) = x_all(x_all_cols.(fld), :); x_new_cols.(fld) = newslc; k = k + length(x_all_cols.(fld)); end x_new(k+1:end, :) = []; end % function [M, cc] = struct2mat(self, S) % % copy it all over to the matrix % flds = fieldnames(self.cols); % M = zeros(size(S.time, 1), max(cellfun(@(X)max(self.cols.(X)), fieldnames(self.cols)))); % for i = 1:length(flds) % fld = flds{i}; % M(:, self.cols.(fld)) = S.(fld); % end % cc = self.cols; % % column names are different, channels etc... % % expand this % cc = self.fix_cols_in_postprocessing(cc); % end % attempt (so far) to make this more generic % function [M, cc] = struct2mat(self, S) % copy it all over to the matrix flds = fieldnames(S); L = max(structfun(@(X) size(X, 1), S)); %M = zeros(size(S.(flds{1}), 1), sum(structfun(@(X) size(X, 2), S))); N = 1; for i = 1:length(flds) fld = flds{i}; if ~(size(S.(fld), 1)==L) continue; end nn = size(S.(fld), 2); if isfield(self.inputs, fld) && isfield(self.inputs.(fld), 'chans') && length(self.inputs.(fld).chans) < size(S.(fld), 2) nn = length(self.inputs.(fld).chans); end nc = N:(N+nn-1); N = N + nn; cc.(fld) = nc; if isfield(self.inputs, fld) && isfield(self.inputs.(fld), 'chans') && length(self.inputs.(fld).chans) < size(S.(fld), 2) M(:, nc) = S.(fld)(:, self.inputs.(fld).chans); else M(:, nc) = S.(fld); end end %cc = self.cols; % column names are different, channels etc... % expand this cc = self.fix_cols_in_postprocessing(cc); end function [fn, s, altfile, altbase] = get_alt_file(self, granule, spec) % return path to earlier file, if available, and version to % which it belongs cr_fn = self.filename; cr_bd = self.basedir; co1 = onCleanup(@()setfield(self, 'filename', cr_fn)); %#ok co2 = onCleanup(@()setfield(self, 'basedir', cr_bd)); %#ok if ~isempty(self.altfilename) self.filename = self.altfilename; end if ~isempty(self.altbasedir) self.basedir = self.altbasedir; end % try old versions curver = self.version; co = onCleanup(@()setfield(self, 'version', curver)); %#ok for s = {'0.7', '0.6', '0.5b', '0.5', '0.4', '0.3', '0.2', '0.1'} s = s{1}; self.version = s; fn = self.find_granule_by_datetime(granule, spec); if exist(fn, 'file') % success break else % reset fn = []; end end self.version = curver; altfile = self.filename; altbase = self.basedir; % if isempty(fn) % no luck yet % % cr_fn = self.filename; % cr_bd = self.basedir; % co1 = onCleanup(@()setfield(self, 'filename', cr_fn)); % co2 = onCleanup(@()setfield(self, 'basedir', cr_bd)); % self.filename = self.altfilename; % self.basedir = self.altbasedir; % % cand = % fn = self.find_granule_by_datetime(granule, spec); % if ~exist(fn, 'file') % fn = []; % end self.filename = cr_fn; self.basedir = cr_bd; self.pos2re(); % fix regexp that may have been messed up % s = 0; % altfile = self.altfilename; % altbase = self.altbasedir; % end % self.filename = cr_fn; % if ~b % try with old versions % curver = self.version; % co2 = onCleanup(@()setfield(self, 'version', curver)); % for s = {'0.5b', '0.5', '0.4', '0.3', '0.2', '0.1'} % self.version = s; % end % end end function S = read_alt_granule(self, granule, spec) % cr_fn = self.filename; % self.filename = self.altfilename; % co = onCleanup(@()setfield(self, 'filename', cr_fn)); %#ok % ud = self.net.fit.userdata; % fields = fieldnames(ud.inputs); % needsproc = cellfun(@(X)isfield(ud.inputs.(X), 'process'), fieldnames(ud.inputs)); [~, vers, altfile, altbase] = self.get_alt_file(granule, spec); crvers = self.version; crfile = self.filename; crbase = self.basedir; self.version = vers; self.filename = altfile; self.basedir = altbase; co1 = onCleanup(@()setfield(self, 'version', crvers)); %#ok co2 = onCleanup(@()setfield(self, 'filename', crfile)); %#ok co3 = onCleanup(@()setfield(self, 'basedir', crbase)); %#ok self.pos2re(); try S = self.read_granule(granule, spec, fieldnames(self.members)); catch ME switch ME.identifier case 'atmlab:SatDataset:cannotread' % perhaps it has additional inputs compared to % 'back then'. Try to read only the ones I really % need, obtain the rest through post-processing inpfields = fieldnames(self.inputs); % flds = [intersect(fieldnames(self.members), ... flds = cellfun(@(X) ... safegetfield(self.inputs.(X).stored, 'realname', X), ... inpfields(structfun(@(X) ~isfield(X, 'process'), self.inputs)), ... 'UniformOutput', false); %inpfields(structfun(@(X) ~isfield(X, 'process'), self.inputs))); 'LAT']; S = self.read_granule(granule, spec, flds); S = self.reader_processor(self, S, flds); otherwise ME.rethrow(); end end %S = self.read_granule(granule, spec); self.version = crvers; self.filename = crfile; self.basedir = crbase; self.pos2re(); % fix regexp that may have been messed up %S = self.read_granule(granule, spec, fieldnames(self.members)); %S.Surface_elevation = get_surface_elevation(S.lat, S.lon); % self.filename = cr_fn; end end methods (Access = {?SatDataset}) function [S, strattr] = read_homemade_granule(self, file, varargin) fields = optargs(varargin, {{}}); fields = union(vec2row(fields), {'LAT', 'LON', 'TIME'}); [S, strattr] = read_homemade_granule@RetrievalDatabaseProduct(self, file, fields); S.lat = S.LAT; S.lon = S.LON; S.time = S.TIME; % BUGFIX 2015-03-04: Unlike other HomemadeDatasets, where I % have one file per day and seconds since the start of the % day, here I have one file per orbit and seconds since unix % epoch. This means epoch should be 0, not start of the day. S.epoch = 0; S = rmfield(S, setdiff({'LAT', 'LON', 'TIME'}, fields)); end end methods (Access = protected) function noisevec = get_noise_vector(self, cols) % get a noise-vector responding to the columns of input data % % takes a structure 'cols', each field should correspond to a % self.inputs.(field) which must have a 'noise' vector inp_fields = fieldnames(self.inputs); %noisevec = zeros(self.net.inputs{1}.size, 1); for i = 1:length(inp_fields) fn = inp_fields{i}; if isfield(self.inputs.(fn), 'chans') noisevec(cols.(fn)) = self.inputs.(fn).noise(self.inputs.(fn).chans); else noisevec(cols.(fn)) = self.inputs.(fn).noise; end end end function mask = select_for_regression(self, y, y_cols) % get a mask of those y-targets we want for regression % % If self.targets.(field).regression_range is set, select those % inside the range. Otherwise, select all that are finite. % % WARNING! Input data from y is expected to have passed % through self.select_and_prepare_data, meaning that % 'transform' has been applied! % % IN % % y % y_cols mask = all(isfinite(y), 1); fields = fieldnames(y_cols); for i = 1:length(fields) field = fields{i}; if isfield(self.targets.(field), 'regression_range') rng = self.targets.(field).regression_range; % unapply transformation dat = self.targets.(field).invtransform(y(y_cols.(field), :)); mask = mask & (dat > rng(1)); mask = mask & (dat < rng(2)); end end end end methods (Static) function dat = homog_inputs(dat, cols) % homogeonise in latitude (equal area-density) % Calculate how many entries to choose per % 1°-latitude band. bins = -89.5:1:89.5; vals = bin(dat(:, cols.LAT1), 1:size(dat, 1), bins); %[C, X] = hist(dat(:, cols.LAT1), floor(min(dat(:, cols.LAT2)))-0.5:1:ceil(max(dat(:, cols.LAT2)))+0.5); no_entries_per_lat = round(floor(par(cellfun(@length, vals), 45)/1000)*1000*cosd(bins)); inds = arrayfun(@(i) vals{i}(1:min([length(vals{i}), no_entries_per_lat(i)])), 1:180, 'UniformOutput', false); dat = dat(sort(vertcat(inds{:})), :); end end end