classdef CollocatedDataset < SatDataset & HomemadeDataset % Defines a collocated dataset, between two (other) datasets % % Work in progress! % % $Id$ properties primary; secondary; end % read-only props (may be alterable with methods) properties (SetAccess = protected) % associated datasets, e.g. additional data, meandata, etc. associated = {}; end properties (Constant) % http://www.unidata.ucar.edu/software/netcdf/docs/netcdf-c/NetCDF_002d3-Variable-Types.html#NetCDF_002d3-Variable-Types members = struct(... 'START1', struct(... 'type', 'int', ... % won't work after 2038-01-19 03:14:07 UTC 'atts', struct(... 'long_name', 'primary granule start time', ... 'units', 'seconds since 1970-01-01 00:00:00')), ... 'LINE1', struct(... 'type', 'int', ... 'atts', struct(... 'long_name', 'primary scanline number')), ... 'POS1', struct(... 'type', 'byte', ... 'atts', struct(... 'long_name', 'primary scanline position')), ... 'TIME1', struct(... 'type', 'int', ... 'atts', struct(... 'long_name', 'primary measurement time', ... 'units', 'seconds since 1970-01-01 00:00:00')), ... 'LAT1', struct(... 'type', 'double', ... 'atts', struct(... 'long_name', 'primary latitude', ... 'units', 'degrees_north', ... 'valid_range', [-90 90])), ... 'LON1', struct(... 'type', 'double', ... 'atts', struct(... 'long_name', 'primary longitude', ... 'units', 'degrees_east', ... 'valid_range', [-180 180])), ... 'START2', struct(... 'type', 'int', ... 'atts', struct(... 'long_name', 'secondary granule start time', ... 'units', 'seconds since 1970-01-01 00:00:00')), ... 'LINE2', struct(... 'type', 'int', ... 'atts', struct(... 'long_name', 'secondary scanline number')), ... 'POS2', struct(... 'type', 'byte', ... 'atts', struct(... 'long_name', 'secondary scanline position')), ... 'TIME2', struct(... 'type', 'int', ... 'atts', struct(... 'long_name', 'secondary measurement time', ... 'units', 'seconds since 1970-01-01 00:00:00')), ... 'LAT2', struct(... 'type', 'double', ... 'atts', struct(... 'long_name', 'secondary latitude', ... 'units', 'degrees_north', ... 'valid_range', [-90 90])), ... 'LON2', struct(... 'type', 'double', ... 'atts', struct(... 'long_name', 'secondary longitude', ... 'units', 'degrees_east', ... 'valid_range', [-180 180]))); end methods function self = CollocatedDataset(prim, sec, varargin) self = self@SatDataset(varargin{:}); % call parent constructor % check that both are SatDataset for x = {prim, sec} assert(isa(x{1}, 'SatDataset'), ['atmlab:' mfilename ':TypeError'], ... 'Collocations must be between 2 x SatDataset, got %s instead', ... class(x{1})); x{1}.add_collocated_dataset(self); end self.primary = prim; self.secondary = sec; if isempty(self.name) self.name = ['collocations_' prim.name '_' sec.name]; end % make 'cols'-structure for internal data specification fnms = fieldnames(self.members); nfields = length(fnms); fv = 1:nfields; self.cols = cell2struct(num2cell(fv), fnms.', 2); end function secondary_granules = overlap_granule(self, date1, spec2) % secondary granules that, judging from filenames, might overlap % % FIXME DOC % % date1, spec2 % % Assumes granules contain data for at most one day. % %% get duration of a granule of sensor1 duration = self.primary.granule_duration; %% find starting time in unixsecs granule_start = date2unixsecs(date1(1), date1(2), date1(3), date1(4), date1(5)); granule_end = granule_start + duration; %% find all granules yesterday/today/tomorrow today_num = datenum(date1(1), date1(2), date1(3)); yesterday = datevec(today_num - 1); tomorrow = datevec(today_num + 1); threedays = [... self.secondary.find_granules_by_date(yesterday, spec2, false); ... self.secondary.find_granules_by_date(date1, spec2, false); ... self.secondary.find_granules_by_date(tomorrow, spec2, false)]; threedays = sortrows(threedays); granules_unixsecs = date2unixsecs(... threedays(:, 1), threedays(:, 2), threedays(:, 3), threedays(:, 4), threedays(:, 5)); % last granule starting after the reference granule % those are all 'too late' b = find(granule_start - colloc_config('interval') > granules_unixsecs, 1, 'last'); % first granule ending before the reference granule % those are all 'too early' e = find(granule_end + colloc_config('interval') < granules_unixsecs, 1, 'first'); % if all match, none are too early or too late if isempty(b) && isempty(e) okej = (granule_start < granules_unixsecs) & (granule_end > granules_unixsecs); elseif ~isempty(b) && ~isempty(e) % anything not too early, not too late, is good okej = b:e; else % the non-empty one as index X={b,e}; okej = X{[~isempty(b) ~isempty(e)]}; end secondary_granules = threedays(okej, :); end function [result, additional_results, also] = collocate_granule(self, date1, spec1, spec2, varargin) % collocate_granule Collocate granules from sat/sensor pairs % % date1: datevec for 1. % % spec1: specification for 1 (e.g. satellite). % % spec2: specification for 2 (e.g. satellite). % % additionals, cell array of AssociatedDataset-objects (or, % naturally, one of its subclasses) % % oneday % % TO BE DOCUMENTED [additionals, oneday] = optargs(varargin, {{}, false}); additional_results = cell(size(additionals)); % collect all extra fields, for all additional datasets extra_args_primary = {}; extra_args_secondary = {}; for i = 1:length(additionals) addi = additionals{i}; primfields = addi.primary_arguments(); secofields = addi.secondary_arguments(); extra_args_primary = [extra_args_primary primfields]; %#ok extra_args_secondary = [extra_args_secondary secofields]; %#ok end extra_args_primary = unique(extra_args_primary); extra_args_secondary = unique(extra_args_secondary); %% output fid = atmlab('OUT'); eid = atmlab('ERR'); %% find date/times for which to search for secondary granules % overlap check 1: primary filename, secondary filename secondary_granules = self.overlap_granule(date1, spec2); if size(secondary_granules, 1)==0 error(['atmlab:' mfilename ':noother'], ... 'No overlapping granules found for [%s] %s/%s with %s/%s', ... num2str(date1), spec1, self.primary.name, spec2, self.secondary.name); end logtext(fid, 'Found %d other granules to collocate with\n', size(secondary_granules, 1)); %% result = zeros(0, length(fieldnames(self.cols))); %% read data for sat1/sensor1 % Here, I decide to keep the duplicates for now. Simple is better than % complex. My collocation output are rows and column indices, and if I already % remove duplicates /before/ collocation, the row-indices will be % incorrect/ need to be corrected for. This causes more problems than it % solves, so I will remove the duplicates in the postprocessing instead. try data1 = self.primary.read_granule(date1, spec1, extra_args_primary, false, false); % keep duplicates for now catch ME switch ME.identifier case {'MATLAB:load:couldNotReadFile', 'atmlab:invalid_data'} logtext(eid, 'Collocating failed: %s\n', ME.message); return otherwise ME.rethrow(); end end also.version{1} = data1.version; also.version{2} = '?'; % secondary version not yet available if isempty(data1.time) logtext(fid, 'Primary is empty, nothing to collocate\n'); % self.version{2} = 'N/A'; return end % keep effective range, check time overlap before actually reading the % secondary data, because the primary might be sparse. E.g. when judging % from the filenames the granules contain overlapping periods, but if the % primary in reality only contains data for a certain part of this period % (e.g. in the case of collocations), should check the primary data with % the secondary filename in order not to read files one knows will not % contain overlap anyway. % self.primary and self.secondary may actually be the same % object, do not store any 'temporary' data there. data1.time_orig = data1.time; % store because of subsequent unify_time_axis can lead to errors data1.eff_range = double(data1.epoch) + ... [min(data1.time)-colloc_config('interval'), ... max(data1.time)+colloc_config('interval')]; %% loop through all the granules to collocate with for i = 1:size(secondary_granules, 1) datecell = num2cell(secondary_granules(i, 1:5)); logtext(atmlab('OUT'), 'Collocating with ''%s'' @ %04d-%02d-%02d %02d:%02d\n', ... self.secondary.name, datecell{1:5}); % Overlap check 2; primary data with secondary filename data2_start = date2unixsecs(datecell{1:5}); data2_end = data2_start + self.secondary.granule_duration; if (data2_start > data1.eff_range(2)) || (data2_end < data1.eff_range(1)) % no possible time overlap; data1 range already compensated for % colloc_config('interval'); no use even reading granule2 logtext(atmlab('OUT'), 'Not reading, no overlap with primary\n'); continue end %% read secondary try data2 = self.secondary.read_granule(secondary_granules(i, :), spec2, extra_args_secondary, false); catch ME switch ME.identifier case {'atmlab:find_datafile_by_date', 'atmlab:invalid_data', 'atmlab:find_granule_by_datetime'} logtext(eid, 'Error in reading datafile %4d-%02d-%02d %02d:%02d: %s. SKIPPING\n', ... datecell{:}, ME.message); continue otherwise ME.rethrow(); end end also.version{2} = data2.version; data2.time_orig = data2.time; % store, see note at data1.time_orig if isempty(data2.time) logtext(fid, 'Secondary is empty, skipping\n'); continue end %% switch sign(data1.epoch - data2.epoch) case -1 % data1 epoch is earliest data2.time = data2.time + double(data2.epoch - data1.epoch); data2.epoch = data1.epoch; case 0 % do nothing case 1 % data2 epoch is earliest % FIXME: Bug? primary changes also for next loop % iteration? Or should be ok as long as % epoch/time consistent, maybe will iteratively % increase but at least consistent data1.time = data1.time + double(data1.epoch - data2.epoch); data1.epoch = data2.epoch; otherwise error(['atmlab:' mfilename ':bug'], 'Reached impossible place. Bug.'); end % overlap check 3: primary data, secondary data [iv1, iv2] = find_common_time(data1.time, data2.time, colloc_config('interval')); % limit to one day (if needed) switch oneday case 1 % only consider data on the same date as the start for data1 if data1.time(1) < 86400 sameday = data1.time < 86400; else sameday = data1.time < 2*86400; end if ~all(sameday) logtext(fid, 'taking only %d/%d that are on the same day\n', ... sum(sameday), length(data1.time)); end iv1 = iv1 & sameday; case 2 % only consider data on date after the start date for data1 sameday = data1.time > 86400; iv1 = iv1 & sameday; if ~all(sameday) logtext(fid, 'taking from primary only %d/%d that are on the next day\n', ... sum(sameday), length(data1.time)); end end if ~(any(iv1) && any(iv2)) % no overlap logtext(fid, 'upon checking actual data, no actual time overlap, so nothing to collocate\n'); continue end %% perform collocations collocations = collocate(data1.time(iv1), ... data1.lat(iv1, :), ... data1.lon(iv1, :), ... data2.time(iv2), ... data2.lat(iv2, :), ... data2.lon(iv2, :), ... colloc_config('distance'), colloc_config('interval')); if any(collocations) % compensate for the fact that we passed only a subset to collocate collocations(:, 1) = collocations(:, 1) + find(iv1, 1, 'first') - 1; collocations(:, 3) = collocations(:, 3) + find(iv2, 1, 'first') - 1; else logtext(fid, 'No collocations\n'); continue end %% process core % should return a matrix with info logtext(fid, 'Collecting info for %d collocations\n', size(collocations, 1)); this_result = self.process(collocations, data1, date1, spec1, data2, secondary_granules(i, :), spec2); result = [result; this_result]; %#ok %% process additional % % additionals are all subclasses of AssociatedDataset able % to process data and knowing how to store it, how to deal % with it, etc. % for copying, a special AssociatedDataset shall take care % of this % FIXME: associated datasets % FIXME: additional fields copied for j = 1:length(additionals) additional = additionals{j}; % FIXME: pass more, what else? % FIXME: multiple datasets from one additional, or % second additional needing output from 1st additional % --> dependencies % FIXME: concatenation in subfunction or so, will be % needed in collocate_date and date_range as well this_additional_result = additional.process_granule(this_result, data1, date1, spec1, data2, secondary_granules(i, :), spec2); if isempty(additional_results{j}) additional_results{j} = this_additional_result; else additional_results{j} = [additional_results{j}; this_additional_result]; end end logtext(fid, 'Info collected\n'); end end function [result, additional_results, also] = collocate_date(self, date, spec1, spec2, varargin) % collocate_date Collect all collocations for given date % % This method collects all collocations for the given date. % % FORMAT % % [M, addis] = collocate_date([year, month, day], spec1, spec2) % % IN % % datevec vector [year month day] % spec1 % spec2 % additionals (optional) cell array % % OUT % % M matrix results... % addis cell array results corresponding to additionals % % FIXME DOC % % $Id$ additionals = optargs(varargin, {{}}); fid = atmlab('OUT'); year = date(1); month = date(2); day = date(3); % find granules for primary dataset; if the length equals one day, do not % take the day before as it's already sorted per day grans = self.primary.find_granules_by_date(date, spec1, ... self.primary.granule_duration~=86400); if isempty(grans) logtext(atmlab('ERR'), 'no granules found %s/%s %d-%d-%d\n', ... self.primary.name, spec1, year, month, day); end ngrans = size(grans, 1); % pre-allocate; I know 'result's right size, but do not know % the right size for additional_results because I don't know % the sizes of the involved fields result = zeros(0, length(fieldnames(self.cols))); additional_results = cell(size(additionals)); anysuccess = false; also = struct(); for i = 1:ngrans % keep track, because first granule is probably yesterday thisyear = grans(i, 1); thismonth = grans(i, 2); thisday = grans(i, 3); hour = grans(i, 4); minute = grans(i, 5); logtext(fid, 'Collocating %s %s %04d-%02d-%02d %02d:%02d with %s %s \n', ... self.primary.name, spec1, thisyear, thismonth, thisday, hour, minute, self.secondary.name, spec2); if ~isequal([thisyear thismonth thisday], [year month day]); % only take collocations happening in part of granule occuring on % the day requested oneday = 2; else % take any collocations happening on the day requested oneday = 1; end try [result_granule, result_granule_addis, new_also] = self.collocate_granule(... [thisyear, thismonth, thisday, hour, minute], spec1, spec2, additionals, oneday); if ~isempty(fieldnames(also)) && ~isequal(also, new_also) warning(['atmlab:' mfilename ':inconsistent'], ... ['Additional information structures not consistent within the day. ' ... 'Expected: ' struct2string_compact(also) ... 'Got: ' struct2string_compact(new_also)]); end also = new_also; anysuccess = true; catch ME switch ME.identifier case {'atmlab:find_datafile_by_date', 'atmlab:find_granule_by_datetime'} logtext(atmlab('ERR'), 'Error in searching for datafile %4d-%02d-%02d %02d:%02d %s %s: %s. SKIPPING\n', ... thisyear, thismonth, thisday, hour, minute, self.primary.name, spec1, ME.message); continue case {'atmlab:collocate', 'atmlab:atovs_get_l1c:zamsu2l1c', 'atmlab:CollocatedDataset:noother', 'atmlab:collocate_granule:noother'} logtext(atmlab('ERR'), 'Error in collocating with datafile at %4d-%02d-%02d %02d:%02d %s %s: %s. SKIPPING\n', ... thisyear, thismonth, thisday, hour, minute, self.primary.name, spec1, ME.message); continue case {'MATLAB:hdfinfo:invalidFile', 'MATLAB:imagesci:hdfinfo:invalidFile'} logtext(atmlab('ERR'), 'Cannot read datafile %s %s %4d-%02d-%02d %02d:%02d: %s. SKIPPING\n', ... self.primary.name, spec1, thisyear, thismonth, thisday, hour, minute, ME.message); continue otherwise ME.rethrow(); end end result = [result; result_granule]; %#ok for j = 1:length(additional_results) if isempty(additional_results{j}) additional_results{j} = result_granule_addis{j}; else additional_results{j} = [additional_results{j}; result_granule_addis{j}]; end end end if ~anysuccess error('atmlab:collocate_date:nosource', 'no source data found at all'); end end function collocate_and_store_date(self, date, spec1, spec2, varargin) % collocate_and_store_date Collect collocations and store appropiately % % For a given date, check whether a collocation datafile exists. % If it doesn't (or colloc_config('overwrite') is set), collocate the indicated % satellites and sensors with each other and store the result in the % appropiate datafile. % % TODO: % use existing collocation but then read additional datasets % % FORMAT % % collocate_and_store_date(date, spec1, spec2, additionals) % % IN % % datevec % spec1 % spec2 % additionals (optional) % % OUT % % none (but writes file) additionals = optargs(varargin, {{}}); % get filename if isempty(spec1) spec = spec2; elseif isempty(spec2) spec = spec1; else spec = {spec1, spec2}; end fn = self.find_granule_by_datetime(date, spec); mainfileexists = exist(fn, 'file'); addisexist = false(size(additionals)); for i = 1:length(additionals) fna = additionals{i}.find_granule_by_datetime(date, spec); addisexist(i) = exist(fna, 'file'); end % if everything is already there, do nothing if mainfileexists && all(addisexist) && ~colloc_config('overwrite') % FIXME TODO: should I check that the fields match? logtext(atmlab('OUT'), 'All output files already exist:\n', fn); logtext(atmlab('OUT'), ' %s\n', fn); for i = 1:length(additionals) logtext(atmlab('OUT'), ' %s\n', additionals{i}.find_granule_by_datetime(date, spec)); end return end % so SOME work is to be done, at least info_addi = struct(); info_addi.history = [datestr(now, 'YYYY-mm-dd') ' Obtained additionals from collocs']; info_addi.parent_dataset = self.name; info_addi.parent_spec = spec; if mainfileexists && ~colloc_config('overwrite') logtext(atmlab('OUT'), 'Collocations exist, but additionals incomplete\n'); % FIXME: don't reprocess all additionals, but read those % that are already there? [result, ~, attr] = self.read_date(date, spec, fieldnames(self.cols)); info_addi.parent_id = attr.id; for i = 1:length(additionals) logtext(atmlab('OUT'), 'Collecting additionals for ''%s''\n', additionals{i}.name); addi = additionals{i}.process_delayed(result, spec1, spec2); additionals{i}.store(date, spec, addi, info_addi); end else % assumption: % if the main file does not exist, associated don't either try [result, addis, also] = ... self.collocate_date(date, spec1, spec2, additionals); catch ME switch ME.identifier case {'atmlab:collocate_date:nosource', ... 'atmlab:CollocatedDataset:noother'} logtext(atmlab('ERR'), ... 'No succesful collocations at %04d-%02d-%02d, not writing\n', ... date(1), date(2), date(3)); return; otherwise ME.rethrow(); end end % additional attributes, main part info_core = struct(); info_core.history = [datestr(now, 'YYYY-mm-dd') ' Collocations generated from scratch']; info_core.maxdist_km = colloc_config('distance'); info_core.maxtime_s = colloc_config('interval'); info_core.primary_dataset = self.primary.name; info_core.primary_info = spec1; info_core.primary_version = also.version{1}; info_core.secondary_dataset = self.secondary.name; info_core.secondary_sensor = spec2; info_core.secondary_version = also.version{2}; info_core.start_time = double(date2unixsecs(date(1), date(2), date(3))); [~, attr] = self.store(date, spec, result, info_core); info_addi.parent_id = attr.id; for i = 1:length(additionals) additionals{i}.store(date, spec, addis{i}, info_addi); end end end function collocate_and_store_date_range(self, date_start, date_end, spec1, spec2, varargin) % FIXME DOC logtext(atmlab('OUT'), 'Starting collocations\n'); logtext(atmlab('OUT'), '%s %s vs. %s %s\n', self.primary.name, spec1, self.secondary.name, spec2); logtext(atmlab('OUT'), 'From %s to %s\n', mat2str(date_start), mat2str(date_end)); alldates = daterange(date_start, date_end); i = 1; while i <= size(alldates, 1) year = alldates(i, 1); month = alldates(i, 2); day = alldates(i, 3); logtext(atmlab('OUT'), 'collocating %04d-%02d-%02d\n', ... year, month, day); try self.collocate_and_store_date([year, month, day], spec1, spec2, varargin{:}); i = i + 1; catch ME switch ME.identifier case {'atmlab:SatDataset:missing_firstline'} % run firstline-thingies if strfind(ME.message, self.primary.name) again = self.primary; spec = spec1; elseif strfind(ME.message, self.secondary.name) again = self.secondary; spec = spec2; else error(['atmlab:' mfilename], ... ['I received a missing_firstline-error message but ' ... 'I don''t know what dataset it is about. This is a bug. ' ... 'The message was: ' ME.message]); end logtext(atmlab('ERR'), 'Warning: I don''t have firstline-data for today for %s. I''ll try to get some! (todays collocation will be redone)', again.name); again.find_granule_first_line(alldates(i, :), alldates(end, :), spec); again.granule_first_line(alldates(i, :), spec, true); % force reload otherwise ME.rethrow(); end end end logtext(atmlab('OUT'), 'Finished!\n'); %%% end function M = process(self, collocations, data1, date1, spec1, data2, date2, spec2) % Process core collocations % % + Process 'core' fields % + Copy additional fields? % + Concatenate to what's already there? % % NOTE: time is assumed to be 1D n = size(collocations, 1); X = mat2cell(collocations, size(collocations, 1), [1 1 1 1]); c = self.cols; nfields = length(fieldnames(c)); M = nan*zeros(n, nfields); [line1, pos1, line2, pos2] = X{:}; % index for direct addressing i1 = sub2ind(size(data1.lat), line1, pos1); i2 = sub2ind(size(data2.lat), line2, pos2); M(:, c.LINE1) = line1; M(:, c.LINE2) = line2; M(:, c.POS1) = pos1; M(:, c.POS2) = pos2; M(:, c.LAT1) = data1.lat(i1); M(:, c.LON1) = data1.lon(i1); M(:, c.LAT2) = data2.lat(i2); M(:, c.LON2) = data2.lon(i2); % FIXME: this is not correct, use datevec! X = num2cell(date1); % round here; slight loss of precision causes this to be % slightly less than integer, which causes it to be floored % down to the /previous/ integer when storing in NetCDF later M(:, c.START1) = round(date2unixsecs(X{:})); X = num2cell(date2); M(:, c.START2) = round(date2unixsecs(X{:})); M(:, c.TIME1) = data1.epoch + data1.time(line1); M(:, c.TIME2) = data2.epoch + data2.time(line2); %% remove duplicates first1 = self.primary.granule_first_line(date1, spec1); first2 = self.secondary.granule_first_line(date2, spec2); wrongrows = (M(:, c.LINE1) < first1) | ... (M(:, c.LINE2) < first2); logtext(atmlab('OUT'), ['Removing %d scanlines primary before %d, ', ... 'or secondary before %d\n'], ... sum(wrongrows), first1, first2); M(wrongrows, :) = []; end function [M, M_cols, addis, addis_cols, additionals] = read(self, date_start, date_end, spec, all_fields, varargin) % collocation_read Read collocations for indicated period % % This method reads all or a selection of the collocations for % the indicated period, between date_start and date_end. % % FORMAT % % [M, M_cols, addis, addis_cols, additionals] = ... % col.read(date_start, date_end, spec, fields[, limits[, % filters]]); % % IN % % date_start datevec First date to read % date_end datevec Last date to read (inclusive) % spec various Specification. Details depend % on dataset, but usually a % string with a satellite or a % cell array of strings with two % satellites. % fields cell-str Cell array of strings with all % the fields to be read. Can be % from the core-dataset or from % the additional datasets. % % OPT % % limits structure Structure describing acceptable % ranges for zero or more fields. % E.g. struct('LAT1', [-30 30]) % will limit collocations to % those where the value of % 'LAT1', is between -30 and +30, % inclusive on both ends. % % filters cell-array Cell array of cell arrays: % {filter1, filter2, ...}. Each % filter1 is itself a cell-array % with 2 or 3 elements: % {handle, fieldnames, extra}, % where handle is a % function-handle, fieldnames a % cell array of strings with % fields whose values will be % passed on to the filter, and % extra a cell array of arbitrary % values that will be passed on % to the handle as-is. % % OUT % % M matrix with values corresponding to fields stored % in core collocations % % M_cols structure describing what field ended up in what % column of M % % addis cell-array Cell array of matrices similar to 'M', % one for each additional dataset for % which at least one field was found % % addis_cols structure Like M_cols, corresponding to each of % addis % % associated cell-array of AssociatedData containing the % AssociatedData objects for which at % least one field ended up in addis. % % TODO: % - read back from original data. Currently, % col.collocate_and_store_date can already take care of this, if % collocations exist. % - so far limits and filters work only for core fields. Should % work for any fields. % - add check that associated-datasets match % % EXAMPLE % % [M, c, MM, cc, aa] = ... % col.read([2007 1 1],[2007 1 10], 'n18', ... % {'LAT1', 'LON1', 'LAT2', 'LON2', 'RO_ice_water_path', 'cld_reff_vis','cld_opd_vis'}, ... % struct('LAT1', [-30 30]), ... % {{@(x, y) x>y, {'LAT1', 'LON1'}}}); %% prepare configuration things [limits_all, filters_by_name] = optargs(varargin, {struct(), {}}); % distribute fields, limits, etc. over core, additionals [fields_core, additionals, additional_fields, deal_index] = self.deal_fields(all_fields); additionals_limits = cell(size(additionals)); % use deal_index to sort limits-fields in categories for i = 1:length(all_fields) field = all_fields{i}; if any(strcmp(field, fieldnames(limits_all))) if deal_index(i) == 0 limits_core.(field) = limits_all.(field); else additionals_limits{deal_index(i)}.(field) = limits_all.(field); end end end additionals_day = cell(size(additional_fields)); addis_cols = cell(size(additional_fields)); addis = cell(size(additional_fields)); for i = 1:length(addis) addis{i} = struct(); end if nargout<3 && ~isempty(additionals) warning(['atmlab:' mfilename], ... '%s was called with %d (<3) output arguments, additionals are lost', ... mfilename, nargout); end % Not filtering duplicates, this should be done when processing % collocations initially M = []; %% loop through all the dates dates = daterange(date_start, date_end); M_cols = struct(); filters_by_no = cell(size(filters_by_name)); hitno = 0; % count no. of days with collocs for i = 1:size(dates, 1) date = dates(i, :); %% read collocations for date try [collocations_day, M_cols_here] = self.read_single_day(date, spec, fields_core); if max(size(collocations_day))==0 logtext(atmlab('OUT'), 'no collocations\n'); continue end M_cols = M_cols_here; hitno = hitno + 1; for j = 1:length(additionals) [additionals_day{j}, addis_cols{j}] = additionals{j}.read_single_day(date, spec, additional_fields{j}); end catch ME switch (ME.identifier) case {'MATLAB:load:couldNotReadFile', 'MATLAB:nonExistentField', ... 'MATLAB:gunzip:invalidFilename','MATLAB:netcdf:open:noSuchFile', ... 'atmlab:exec_system_cmd:shell'} logtext(atmlab('ERR'), 'Problem for %04d-%02d-%02d: %s\n', ... date(1), date(2), date(3), ME.message); continue otherwise ME.rethrow(); end end %% apply limitations if hitno == 1 % convert limits-structure to limits-matrix. This is % only done after the first time I find collocations, % because only then I know for sure what sizes the % fields have. limmat = limstruct2limmat(limits_all, M_cols); for k = 1:length(filters_by_name) filters_by_no{k} = {... filters_by_name{k}{1}, ... cellfun(@(s) M_cols.(s)', filters_by_name{k}{2}, 'UniformOutput', false), ... filters_by_name{k}{3:end}}; end end lim = collocation_restrain(collocations_day, limmat, filters_by_no); collocations_day = collocations_day(lim, :); for j = 1:length(additionals) additionals_day{j} = additionals{j}.limit_to(additionals_day{j}, lim); end % if dolimits % lim = collocation_restrain(collocations_day, limmat, filters); % collocations_day = collocations_day(lim, :); % end %% add to total if isempty(M) % should mean additionals are empty, too M = collocations_day; addis = additionals_day; else L = size(M, 1); N = size(collocations_day, 1); M((L+1):(L+N), :) = collocations_day; for j = 1:length(additional_fields) N_a = size(additionals_day{j}, 1); addis{j}((L+1):(L+N_a), :) = additionals_day{j}; end end end if hitno==0 warning(['atmlab:' mfilename], ... 'No collocations found at all. Do not trust column descriptions.'); end %%% end % semi-internal use function add_associated(cd, ad) % INTERNAL USE adds an associated dataset % cd.associated = {cd.associated{:} ad}; %#ok end end methods (Access = private) function [fields, additionals, additional_fields, deal_index] = deal_fields(self, all_fields) % deal fields: core, additionals? n_f = 0; n_add = zeros(size(self.associated)); additionals = {}; additional_fields = {}; deal_index = nan*zeros(size(all_fields)); for i = 1:length(all_fields) field = all_fields{i}; if any(strcmp(fieldnames(self.members), field)) n_f = n_f + 1; fields{n_f} = field; %#ok deal_index(i) = 0; else found = false; for j = 1:length(self.associated) asso = self.associated{j}; if any(strcmp(fieldnames(asso.members), field)) n_add(j) = n_add(j) + 1; additional_fields{j}{n_add(j)} = field; %#ok % check if this additionaldataset is already % known, if yes, find index in existing names existing_names = cellfun(@(X) X.name, additionals, 'UniformOutput', false); if ~any(strcmp(existing_names, asso.name)) additionals = {additionals{:} asso}; %#ok end samename = strcmp(cellfun(@(X) X.name, additionals, 'UniformOutput', false), asso.name); assert(sum(samename)<=1, ['atmlab:' mfilename], ... ['More than one additional contains field ' ... asso.name]); deal_index(i) = find(samename); found = true; end end if ~found error(['atmlab:' mfilename], ... 'Field %s was not found in core or in any additional dataset', ... field); end end end end end end