function [M, cols_out, limmat, filters] = collocation_read(sat1, sensor1, sat2, sensor2, ...
start_date, end_date, cols_in, limits, filters)
% collocation_read Read collocations for indicated period
%
% This m-file reads all the collocations for the indicated period, between
% start_date and end_date, for the indicated collocations type (like
% collocations between CloudSat and MHS or between MHS and MHS).
%
% FORMAT
%
% [M, cols] = collocation_read(...
% sat1, sensor1, sat2, sensor2, ...
% start_date, end_date, cols_in[, limits[, filters]])
%
% IN
%
% sat1 string primary satellite
% sensor1 string sensor on satellite; passed on to reader
% function which can still return data from
% other sensors; for example,
% cloudsat/cpr+noaa18/mhs can also return
% data from amsua or hirs.
% sat2 string secondary satellite
% sensor2 string sensor on satellite
% start_date 3x1 array Starting date for which to read collocations
% end_date 3x1 array Ending date for which to read collocations
% cols_in cell array Column names. This is passed on to the
% reader function.
% limits structure Describes acceptable ranges for the
% different columns requested. Data outside
% these ranges will not be returned. Note:
% [-inf, inf] will still get rid of nans.
% Note2: if limits=nan, no filtering is done
% AT ALL.
% filters cell array Cell array of cell arrays. Each member of
% the cell array is {@filter, {COL1, ...,
% COLN}, {'foo', 'bar', ..., 'baz'}}.
% The filter is called according to
% filter(M(COL1, :), ..., M(COLN, :), 'foo',
% 'bar', ..., 'baz') and shall return logical
% true wherever the filter is passed. See
% examples on website (see below).
%
% OUT
%
% M NxM matrix Matrix containing collected info
% cols structure Structure describing the names of the columns
%
% For examples, see online help on the satpage
%
% $Id$
%% prepare configuration things
no = number_sats_in_dataset(['collocation_' sensor1 '_' sensor2]);
cols = colloc_constants(['cols_' sensor1 '_' sensor2]);
if ~exist('filters', 'var')
filters = {};
end
switch no
case 1
s = sat2;
case 2
s = {sat1, sat2};
end
%% check legacy format
if iscell(cols_in{1})
warning('atmlab:collocation_read', ...
['fields should be {''a'', ''b'', ''c'', ''d'', ...}, ' ...
'not {{''a'', ''b'', ...}, {''c'', ''d'', ...}}']);
cols_in = [cols_in{:}];
end
%% ALWAYS get rid of doubles
% FIXME: at some point in the future, this should be redundant
if isfield(cols.overlap, 'filter_double')
filters = [filters ...
{{@colloc_select_good_lines, cols.overlap.filter_double{1}, {sat1, sensor1}}, ...
{@colloc_select_good_lines, cols.overlap.filter_double{2}, {sat2, sensor2}}}];
% but only where filter_double has a value
filters(cellfun(@(c) isempty(c{2}), filters)) = [];
% make sure the fields are asked for
for c = horzcat(cols.overlap.filter_double{:})
% cannot use union here, because it wants a cellstr :(
if ~any(cellfun(@(d) isequal(c, d), cols_in))
cols_in = [cols_in c]; %#ok
end
end
else
logtext(atmlab('OUT'), 'Not filtering doubles, hopefully this was done when processing\n');
end
%% get name_struct
name_struct = cols_cell_to_cols_struct(cols, cols_in);
% ncols: highest value
C = struct2cell(name_struct);
ncols = max(horzcat(C{:}));
%% convert limits structure to matrix as wanted by restrain_collocations
dolimits = true;
if exist('limits', 'var')
if isequalwithequalnans(limits, nan)
dolimits = false;
limmat = zeros(0, 3);
else
limmat = limstruct2limmat(limits, name_struct);
end
else
limmat = zeros(0, 3);
end
% for the filters, convert column names to column numbers, using the
% earlier obtained name_struct
if ~isempty(filters)
for i = 1:length(filters)
filters{i}{2} = cellfun(@(s) name_struct.(s)', filters{i}{2}, 'UniformOutput', false);
end
end
%% check if this can be done via hdf5 (much faster!)
years = start_date(1):end_date(1);
paths_hdf5 = cell(size(years));
for i = 1:length(years)
year = years(i);
try
paths_hdf5{i} = find_datafile_by_date([year 1 1], s, ['collocation_' sensor1 '_' sensor2 '_hdf5']);
assert(~~exist(paths_hdf5{i}, 'file'), ...
'atmlab:collocation_read', 'HDF5-file does not exist: %s', ...
paths_hdf5{i});
catch ME
switch (ME.identifier)
case {'atmlab:find_datadir_by_date', 'atmlab:collocation_read', 'atmlab:find_granules_by_date', 'atmlab:input:undefined'}
if iscell(s)
s_p = horzcat(s{:});
else
s_p = s;
end
logtext(atmlab('OUT'), ...
'Note: no HDF5 @ %d %s %s %s\n', ...
year, s_p, sensor1, sensor2)
continue
otherwise
ME.rethrow()
end
end
end
% if any(cellfun(@length, paths_hdf5))
% %% do this stuff via HDF5
% if ~all(cellfun(@length, C))
% error('atmlab:collocation_read', ...
% 'Period PARTIALLY covered by HDF-5, this is not supported yet');
% end
% logtext(atmlab('OUT'), 'Found HDF5\n');
% % Convert limits-matrix to limits-strings
% baselimstrs = {'', ''};
% fieldnames_both = {union(fieldnames(cols.overlap), fieldnames(cols.data)), fieldnames(cols.meandata)};
% for i = 1:2
%
% limcellstr = cellfun(@(v) ...
% ['(' v ' >= ' num2str(limits.(v)(1)) ') & (' ...
% v ' <= ' num2str(limits.(v)(2)) ') & '], ...
% intersect(fieldnames_both{i}, fieldnames(limits)), ...
% 'UniformOutput', false);
% baselimstrs{i} = horzcat(limcellstr{:});
% end
% for i = 1:length(years)
% year = years(i);
% hdf5file = paths_hdf5{i};
% % and limit dates
% global_start = date2unixsecs(start_date(1), start_date(2), start_date(3));
% this_year_start = date2unixsecs(year, 1, 1);
% this_start = max(global_start, this_year_start);
% global_end = date2unixsecs(end_date(1), end_date(2), end_date(3));
% this_year_end = date2unixsecs(year, 12, 31, 24);
% this_end = min(global_end, this_year_end);
% % add datelimits to limstr
% limstr = [baselimstrs{1} sprintf(' (B_TIME >= %d) & (B_TIME <= %d)', ...
% this_start, this_end) ' MEANSEP ' baselimstrs{2}];
% limstr = deblank(limstr);
% if strcmp(limstr(end), '&')
% limstr = deblank(limstr(1:end-2));
% end
% M = collocation_read_frompipe(hdf5file, cols_in, limstr);
% cols_out = name_struct;
% return
% end
% end
%% pre-allocate at least zero rows
M = zeros(0, ncols);
%% loop through all the dates
dates = daterange(start_date, end_date);
for i = 1:size(dates, 1)
date = dates(i, :);
%% read collocations for date
path = find_datafile_by_date(date, s, ...
['collocation_' sensor1 '_' sensor2]);
try
collocations_day = read_collocs_data_mean(path, cols_in, cols);
catch ME
switch (ME.identifier)
case {'MATLAB:load:couldNotReadFile', 'MATLAB:nonExistentField', ...
'MATLAB:gunzip:invalidFilename','MATLAB:netcdf:open:noSuchFile', ...
'atmlab:exec_system_cmd:shell'}
logtext(atmlab('ERR'), 'Problem for %04d-%02d-%02d: %s\n', ...
date(1), date(2), date(3), ME.message);
continue
otherwise
ME.rethrow();
end
end
if isempty(collocations_day)
logtext(atmlab('OUT'), 'no collocations\n');
continue
end
%% apply limitations
if dolimits
lim = collocation_restrain(collocations_day, limmat, filters);
collocations_day = collocations_day(lim, :);
end
%% add to total
L = size(M, 1);
N = size(collocations_day, 1);
M((L+1):(L+N), :) = collocations_day;
end
cols_out = name_struct;