classdef SatDataset < handle % subclass from handle to pass-by-reference
% Class to represent a dataset.
%
% Objects from this class lie at the basis of everything that the
% Collocation Toolkit does. For example, a CollocatedDataset
% consists of two SatDatasets. The SatDataset describes where the data
% are stored on the disk, how the data are read, and contains some more
% information needed for collocations. However, a SatDataset can
% certainly also be used stand-alone, for example, to loop through all
% the granule in a particular time period.
%
% When instantiating an object from this class, (or, for that matter,
% any class in the toolkit!), you should define a number of its properties
% (see below). This can be done either when creating the class (see
% constructor info), or later, in the same way as setting a member
% of a structure. It's probably best to define overall properties, such
% as name, granule_duration, etc., when creating, and to define local
% properties later. For example:
%
% >> spam = SatDataset('name', 'MyDataset', 'granule_duration', '3600', ...
% 'satname', 'SpamSat');
%
% and later:
%
% >> spam.basedir = '/here/are/the/data';
%
% When a dataset is created, it is registered in a global structure
% that can be obtained by calling datasets. For this reason, it
% is recommended to always at least give a dataset a name when it is
% first created. Not doing so will result in a warning and an
% automatically calculated name, which is probably neither helpful nor
% descriptive.
%
% A number of datasets are pre-defined in define_datasets.
% All registered datasets can be listed with datasets.
%
%
% SatDataset properties:
%
% Properties to describe the location of the granules:
%
% basedir - Specify root for all datasets
% subdir - Specify directory for individual granule
% re - Regular expression matching individual granule
% filename - Exact filename calculating path for individual granule
%
% Property describing how to read the granule:
%
% reader - Reading routine for one granule for this dataset
%
% Other properties:
%
% name - Name with which this dataset is registered
% granule_duration - Duration of a single granule in seconds
% satname - Name for a single-satellite dataset
% collocated_datasets - Read-only prop. listing CollocatedDatasets.
% starttimesfile - Name for filename containing start times if not in filename
% (Use properties for a complete listing)
%
% SatDataset methods:
%
% A selection of the most important methods is shown below.
% For a full listing, see doc SatDataset.
%
% Class instantiation:
%
% SatDataset - Info on creating a SatDataset object
%
% Methods for finding granules:
%
% find_datadir_by_date - Find dir. containing granules for date
% find_granules_by_date - Find all granules for date
% find_granules_for_period - Find all granules for multi-day period
% find_granule_by_datetime - Find granule starting at exact datetime
% find_granule_by_unixsecs - Similar, but in unixsecs
% find_extreme_granule - Find first or last granule for dataset
% get_starttime - Get starttime from db (if not in file)
% set_starttimes - Update starttimes-db
%
% Methods for reading granules:
%
% read_granule - Read granule starting at exact datetime
%
% Methods for detecting duplicates:
%
% find_granule_first_line - Create hashtable cataloguing duplicates
% granule_first_line - Look up entry from this hashtable
%
% Other useful methods:
%
% find_info_from_granule - Get information from granule filename
%
% See also: CollocatedDataset, HomemadeDataset (child), AssociatedDataset, FieldCopier, Collapser
%
% Don't forget the Collocation User's Guide.
% $Id$
% read/write props
properties
% Name of the dataset (pseudo-required).
%
% If not given, a name will be automatically calculated. This name
% is not necessarily descriptive, so it's usually good to give a
% name upon creating the dataset.
name;
% Basedir for the dataset (required).
%
% The base directory in which all granules reside. For example,
% mhs.basedir = '/storage3/data/mhs'.
basedir;
% Subdir for a particular day of data (required).
%
% In many datasets, data are stored per date, in subdirectories
% such as year/month/day/ or year/doy/. With this property, the user
% tells the dataset how the data are sorted. In locating granules,
% certain strings are replaced: $SAT, $YEAR4, $YEAR2, $MONTH, $DAY,
% $DOY. Example: mhs.subdir = '$SAT_mhs_$YEAR4/$MONTH/$DAY'.
subdir;
% Regular expression to find individual granule (conditionally optional).
%
% This is required for datasets where the full filename cannot be
% exactly calculated from available information, for example, due
% to each filename containing orbit numbers, downlink stations, or
% other information not known or relevant. When searching
% for granules, the regular expression described by this field is
% used for all files in a particular directory, expecting exactly
% one match. Therefore, the regular expression (a named one passed
% to regexp must match exactly one file.
%
% The regular expression may be applied either to the basename, or
% to the full path, or to anything in-between.
%
% Fields used for matching are:
% year02, year04, year, month, day, doy, hour, minute, orbitno
%
% For datasets that are L1 or L2, data is usually sorted at least
% per day, but often per granule, which may be specified either by
% hour/minute or by orbitno, some unique orbit number. Hopefully
% each filename specifies either hour/minute uniquely within a
% day, or an orbit number uniquely. See 'kiruna_init' for examples
% of what to write in re.
%
% See also: filename, find_granule_by_datetime
%
% Example:
% mhs.re = '(?[a-z0-9]{6})_(?[a-z0-9]+)_(?\d{4})/(?\d{2})/(?\d{2})/[A-Z0-9.]+\.S(?\d{2})(?\d{2})|\.S(?\d{2})(?\d{2})'
re;
% String describing basename (conditionally optional).
%
% This is used for datasets where the full filename can be exactly
% calculate from available information, and no regular expression
% is needed. This includes only the basename (no directory).
%
% See also: re, find_granule_by_datetime
%
% Example:
% mhs.collocated_datasets(1).filename = 'collocations_$SAT.nc.gz'
filename;
% Function handle to read a single granule (required)
%
% This property points to a function handle used to read a single
% granule (so it becomes in effect a static method). The documentation
% below is a prescription of the interface that the function handle
% assigned to this property must have in order for methods like
% read_granule and CollocatedDataset.collocate_granule to work.
% It is not the documentation for any method that already exists!
%
% FORMAT
%
% data = cd.reader(fullpath, fields)
%
% IN
%
% fullpath string Full path to granule to be read.
% fields cellstr
%
% Any other fields to be read. This is used by the FieldCopier
% class to copy fields from the original datasets.
%
% OUT
%
% The function shall return a single output. This output is a
% structure with at least the following fields:
%
% lat double
%
% Latitude in degrees. This must be a 2-D matrix, although
% the dimensions may be 1 (so it can be a row-vector, a
% column-vector, a scalar, or a matrix). It contains the
% latitude for every single measurement in the data.
%
% lon double
%
% Longitude in degrees. Like the latitude. The sizes of the
% fields 'lat' and 'lon' must be exactly equal.
%
% time double
%
% Time in seconds since 00:00 UT. This must be a column
% vector where the number of elements is equal to the number
% of rows in the 'lat' and 'lon' fields (one scanline can
% only have a single time).
%
% THROWS
%
% If the data cannot be read, throws an error with
% error identifier `atmlab:invalid_data`.
%
% EXAMPLE
%
% This function is not normally called directly by the user, but
% since it must often be implemented by the user, examples consist
% of example implementations. The datasets directory
% contains a number of functions with names starting with
% 'common_read'. Those are all implementations for 'reader' used for
% various datasets that come with atmlab. For example, for data
% stored in the same format as AMSU, MHS, HIRS, one can use:
%
% mhs.reader = '@satreaders.poes_radiometer'
reader = @(varargin)[];
% Function to post-process a single granule
%
% After a granule is read (see attribute reader), there might be
% dataset-specific post-processing to be done immediately after the
% reading. For example, one may want to calculate pseudo-fields
% to get IWP when the data only have IWC, and one should save
% memory for later on. By default, this property is set to a no-op.
%
% Note that since this is a function implemented as an attribute
% and not as a method (so that redefining does not require
% subclassing), one needs to explicitly pass the dataset as a first
% argument (see below) when calling the function. However, calling
% this function is taken care of by read_granule, and normally the
% user does not need to call this function explicitly.
%
% The method read_granule takes care of removing ballast as
% indicated by the 'dependencies' fields.
%
% FORMAT
%
% data = self.processor(self, data, fields)
%
% IN
%
% self SatDataset object
% data data as returned by self.reader
% fields fields as passed to self.reader
%
% OUT
% data altered data struct
%
% See also: SatDataset/pseudo_fields
reader_processor = @(self, data, fields)data;
% Structure with pseudo-field information
%
% For some datasets, the user might want to acquire fields not
% actually in the data. For example, to save memory, one might want
% to get IWP when the data actually contain IWC. For this,
% read_granule needs some information on pseudo_fields,
% specifically, for each pseudo field a cell string with
% dependencies. The dependencies cellstr lists the real fields upon
% which the pseudo fields depend. These will be read, passed to
% reader_processor, and then thrown away.
%
% You may wish to add some attributes for user documentation.
%
% Note: every pseudo field MUST have a dependencies member that is
% a cellstr, but this cellstr may be empty.
%
% Note: Users choosing pseudo-field names equal to fields in the
% actual data will be sentenced to the Usenet Death Penalty.
%
% See also: SatDataset/reader_processor
pseudo_fields = struct();
% Granule duration in seconds (required)
%
% Describes the duration in seconds for a single granule. In case
% this varies, choose an upper limit. This is used by
% CollocatedDataset.overlap_granule.
%
% Example:
% mhs.granule_duration = 6130
granule_duration;
% Satellite name for single-satellite datasets (conditionally required)
%
% Some datasets, such as cpr, are contained only on a single satellite.
% However, the database genereated by find_granule_first_line
% and used by granule_first_line still needs a satellite to
% generate the string for the filename to store the data. The
% satellite name is also used in some verbose output. It is not
% needed or used for multi-satellite datasets, where instead the
% 'spec' specifies the satellite (such as in MHS).
%
% Example:
% cpr.satname = 'cloudsat'
satname;
% Name for first-line database (predefined)
%
% Predefined property describing where firstline-db is stored
%
% The first-line database (see find_granule_first_line
% and granule_first_line)
% is needed to prevent duplicates. This contains the filename, but
% not the directory. Currently unused? FIXME
%
% firstline_filename = 'firstline_$NAME_$SPEC';
% Cache for reading granules
cache;
% Make this dataset visible. Defaults to true.
%
% If you're creating a dataset for fully internal use, you might
% want to set this to false to prevent automatic searchers from
% detecting it.
visible = true;
% List of satellites
%
% For multi-satellite datasets, this may contain a list of
% satellites for this dataset. This is only for information and
% does not need to be complete. It may be used e.g. for looping
% through all the possible datasets. Not to be confused with
% satname, which is for single-satellite datasets.
sats;
% try to use regular expression for finding; a regular expression
% may still be needed for identifying information by
% find_info_from_granule, but not used for finding a filename in
% the first place.
tryre = true;
% Flag if it needs an exernal starttimes-file
%
% Defaults to false
needs_starttimesfile = false;
% Some datasets do not contain a starting time in the filename,
% even if there are multiple granule per day. To resolve this, in
% the root of the dataset (e.g. directly in basedir) we store a
% hashtable (Container.Map) that records the starting time based
% on a id that is unique per filename (e.g. orbit number). One
% example of such a dataset is Saphir.
starttimesfile;
% Database of starting times for datasets where those are not
% clear from the filename.
starttimes
% Field that can be a structure for arbitrary metadata, such as
% height, channel definitions, or anything. This is not used within
% the framework of atmlab, but can be used by the user for
% anything.
metadata;
% defaults cannot be set in any subclass, because re-defining a
% property is not allowed except under special circumstances that
% cannot be met here. Therefore, defaults should be encompassed in
% a property, a structure, 'defaults', that is /not/ defined here,
% but only by the class at the end of the hierarchy
end
% read-only props (alterable with methods)
properties (SetAccess = protected)
% list of CollocatedDatasets using this SatDataset (read-only)
%
% This property, which is read-only and automatically filled,
% contains a list of CollocatedDatasets
% that use this SatDataset.
collocated_datasets = [];
% Full path for db containing start-times where applicable.
%
% This is simply basedir/starttimesfile
starttimes_fullpath
end
% flags
properties (Constant, GetAccess = protected)
FL_INITIAL = -5;
FL_NOTFOUND = -2;
FL_ALLDUPL = typecast(int32(-1), 'uint32');
FL_NONEXT = -3;
FL_NOCUR = -4;
end
% internally used properties
properties (Transient, GetAccess = private, SetAccess = private)
firstlines;
cachedstarttimes;
end
methods
%% constructor
function self = SatDataset(varargin)
% Creates a SatDataset object.
%
% For full information on the class, see doc SatDataset.
%
% FORMAT
%
% ds = SatDataset('name', 'MyDataset', ...
% 'basedir', '/some/where', ...
% ...);
%
% IN
%
% Any pair of key/value arguments, as when creating a
% structure. Valid keys are properties. To get a list of
% valid properties, call properties SatDataset
% and/or doc SatDataset.
%
% OUT
%
% Valid SatDataset object.
% this may be defined in subclasses
if isprop(self, 'defaults')
def_fields = fieldnames(self.defaults); %#ok<*MCNPN>
for i = 1:length(def_fields)
fldnm = def_fields{i};
self.(fldnm) = self.defaults.(fldnm);
end
end
for i = 1:2:nargin
self.(varargin{i}) = varargin{i+1};
end
if isempty(self.name)
% give a name
nm = self.calculate_name();
warning(['atmlab:' mfilename], ...
'You didn''t name me! I''ll name myself %s (for now)', nm);
self.name = nm;
end
if isempty(self.starttimesfile)
self.starttimesfile = 'granule_start_times.mat';
end
self.cache = CachedData();
end
%% things that can be done with datasets
function fulldir = find_datadir_by_date(self, date, varargin)
% find_datadir_by_date Find directory containing granules
%
% For the given datevec and specification, return a string with
% the path to the directory that contains the granules for this particular
% datevec.
%
% FORMAT
%
% fulldir = ds.find_datadir_by_date(datevec, spec)
%
% IN
%
% datevec vector [year month day] etc.
% spec any name of sat or cellstr {sat1 sat2}
%
% OUT
%
% fulldir string path to directory
% verify basedir is defined and exists
spec = optargs(varargin, {''});
errid = ['atmlab:' mfilename]; %find_datadir_by_date';
assert(any(self.basedir), errid, 'No basedir initialised for %s %s', class(self), char(self.name));
assert(exist(self.basedir, 'dir')~=0, errid, ...
['Configured data directory for %s is %s, ' ...
'but this does not exist or is not a directory. ' ...
'Please define basedir for %s correctly ' ...
'or create the directory.'], ...
self.name, self.basedir, self.name);
fulldir = fullfile(self.basedir, self.subdir);
fulldir = self.repvars(fulldir, date, spec);
end
function S = find_info_from_granule(self, fname)
% find_info_from_granule Extract info from a granule filename
%
% Thin shell around regexp.
%
% FORMAT
%
% S = ds.find_info_from_granule(fname)
%
% IN
%
% fname string (full) path to granule
%
% OUT
%
% - structure containing whatever info could be obtained
if isempty(self.re) % try to convert it
self.pos2re();
end
S = regexp(fname, self.re, 'names');
end
function [M, paths] = find_granules_by_date(self, date, varargin)
% find_granules_by_date Find all granule start times for date/sat/dataset
%
% This function finds all granule start times for granules with any coverage
% for the the indicated for the indicated satellite/dataset pair. This
% includes the last granule of the date before, unless this is explicitly
% prohibited. Note that the last granule of the date before may or may not
% actually contain information for the day requested. The
% function assumes at most one granule for the previous date
% contains information for the current date. If two outputs are
% requested, paths to the granules are also output, as a cell
% array of strings.
%
% FORMAT
%
% [M, paths] = find_granules_by_date(date[, spec[, with_yesterday]])
%
% OUT
%
% M N x 6 matrix where each row corresponds to one granule.
% The first five columns are year,
% month, day, hour, minute. The last
% column may contain a granule number
% for some datasets, or -1 otherwise.
%
% paths cell of strings Cell array of strings, corresponding full paths
%
% IN
%
% date vector [year month day]
% spec string or cellstr, sat or pair of sats
% with_yesterday (optional) logical, include yesterdays last
% granule or not. Defaults to 'true', as this
% is needed to cover all of the date; but if
% looping over dates, one might not want it.
%
% EXAMPLE
%
% >> [grans, paths] = mhs.find_granules_by_date([2010, 9, 8], 'noaa18');
%
% $Id$
% errid = 'atmlab:find_granules_by_date';
[spec, with_yesterday] = optargs(varargin, {'', true});
datadir = self.find_datadir_by_date(date, spec);
if self.tryre && ~isequaln(self.re, nan) && ~isempty(self.re)
matchy = self.re;
else
matchy = self.filename; % exact match only
end
matchy = self.repvars(matchy, date, spec);
files = dir(datadir);
nfiles = length(files);
M = zeros(nfiles, 6);
for i = 1:nfiles
fname = files(i).name;
%nam = regexp(fname, matchy, 'names');
nam = self.find_info_from_granule(fname);
if ~isempty(nam)
% if present, year/month/day should be the same
if ~self.infofit(nam, date, spec)
continue;
end
M(i, 1:3) = date(1:3);
if isfield(nam, 'hour')
M(i, 4) = str2double(nam.hour);
end
if isfield(nam, 'minute')
M(i, 5) = str2double(nam.minute);
end
if self.needs_starttimesfile
if isfield(nam, 'orbitno2') % need both numbers
% specifies range, so so product should be
% unique
M(i, 6) = str2double(nam.orbitno) * ...
str2double(nam.orbitno2);
else
M(i, 6) = str2double(nam.orbitno);
end
else
M(i, 6) = -1;
end
end
end
% all paths
paths = cellfun(@(f) fullfile(datadir, f), {files.name}, 'UniformOutput', false);
% remove lines with zeroes (those are not granules)
nogran = M(:, 1)==0;
M(nogran, :) = [];
paths(nogran) = [];
% sort
[M, I] = sortrows(M);
paths = paths(I);
% add yesterday
if with_yesterday
yesterday = datevec(datenum(date)-1);
[M_yesterday, paths_yesterday] = self.find_granules_by_date(yesterday, spec, false);
if ~isempty(M_yesterday) % maybe today is genesis/big bang/epoch/birth/1970-01-01
M = [M_yesterday(end, :); M];
paths = [paths_yesterday(end) paths];
end
end
end
function datafile = find_granule_by_datetime(self, datevec, varargin)
% find_granule_by_datetime Return full path of datafile by date/time
%
% Returns the full path of the datafile corresponding to the input
% arguments. There are two modes
%
% - If dataset.filename is defined, the path is
% calculated directly from dataset properties and the arguments
% passed on. The file may or may not actually exist, so in this case,
% you can use it to calculate the path when planning to create it.
%
% - If this is not defined, then dataset.re has to
% be defined. It will search the filesystem for a file matching the
% regular expression and the indicated datevec/spec for this dataset.
%
% FORMAT
%
% datafile = dataset.find_granule_by_datetime(date,[ spec[, tol]])
%
% IN
%
% datevec vector Date/time to find datafile for
% spec string/ Name of satellite. For datasets belonging to one
% cellstr satellite, a simple string. For datasets belonging
% to two satellites (such as collocations), a cell
% array of two strings. For
% single-satellite datasets, not needed.
%
% OUT
%
% datafile string Path to the looked-for datafile.
%
% EXAMPLE
%
% >> path = mhs.find_granule_by_datetime([2010 9 8 8 8], 'noaa18');
errid = 'atmlab:find_granule_by_datetime';
spec = optargs(varargin, {''});
% implementation:
% 1. find the directory containing the files from basedir/subdir
% 2. if possible, calculate the filename directly
% 3. otherwise, list all files and match with regexp+tolerance
datevec(end+1:5) = 0;
fulldir = self.find_datadir_by_date(datevec, spec);
if (nargin() <= 3 && ~isequaln(self.filename, nan) && ~isempty(self.filename)) || ~self.tryre % calculate directly
fn = self.repvars(self.filename, datevec, spec);
datafile = fullfile(fulldir, fn);
else % will search through all granules with find_granules_by_date
[granules, paths] = self.find_granules_by_date(datevec, spec, false);
found = granules(:, 4)==datevec(4) & granules(:, 5)==datevec(5);
nfound = sum(found);
if iscell(spec) % short-string for error message
sp = horzcat(spec{:});
else
sp = spec;
end
if nfound==0
error(errid, 'No datafile found for %s %s [%s]', self.name, sp, num2str(datevec));
elseif nfound > 1
error(errid, 'Multiple datefiles found for %s %s', self.name, sp);
else
datafile = paths{found};
end
end
end
function datafile = find_granule_by_unixsecs(self, unixsecs, varargin)
% find_granule_by_unixsecs Find granule starting at indicated time
%
% IN
%
% unixsecs time in seconds since 1970-01-01T00:00:00Z
%
% ...remaining arguments passed on to find_granule_by_datetime
[year, month, day, hour, minute, second] = unixsecs2date(unixsecs);
datafile = self.find_granule_by_datetime([year month day hour minute second], varargin{:});
end
function [allgrans, allpaths] = find_granules_for_period(self, date1, date2, spec)
% find_granules_for_period List all granules for sat/dataset for period
%
% For the period between date1 and date2, list all granules (as vectors
% indicating the starting date/time) available.
%
% FORMAT
%
% [allgrans, allpaths] = find_granules_for_period(date1, date2, spec)
%
% IN
%
% date1 datevec starting date [y,m,d]
% date2 datevec ending date [y,m,d]
% spec various (optional)
%
% OUT
%
% allgrans matrix all granules in daterange
% allpaths cellstr all paths to those granules
%
% EXAMPLE
%
% >> [allgrans, allpaths] = mhs.find_granules_for_period([2010, 9, 5], [2010 9 7], 'noaa18');
narginchk(3, 4);
if ~exist('spec', 'var')
spec = [];
end
dates = daterange(date1(1:3), date2(1:3));
ndates = size(dates, 1);
allgrans = nan*zeros(ndates*15, 6);
allpaths = cell(size(allgrans));
n = 0;
for i = 1:ndates
date = dates(i, :);
[grans, paths] = self.find_granules_by_date(date, spec, false);
ngrans = size(grans, 1);
allgrans(n+1:n+ngrans, :) = grans;
allpaths(n+1:n+ngrans) = paths;
n = n + ngrans;
end
to_remove = isnan(allgrans(:, 1));
allgrans(to_remove, :) = [];
allpaths(to_remove) = [];
[allgrans, I] = sortrows(allgrans);
allpaths = allpaths(I);
end
function [gran, path] = find_granule_covering_instant(self, datevec, varargin)
% For datevec/spec, find granule covering this moment
%
% For a combination of a date-vector and a specification
% (satellite), get the granule that covers this instant. Note
% that this method does NOT verify that the instant is really
% covered, but rather returns the granule that started most
% recently before the considered moment.
spec = optargs(varargin, {''});
[grans, paths] = self.find_granules_by_date(datevec(1:3), spec, true);
grans_unisecs = date2unixsecs(grans(:, 1), grans(:, 2), grans(:, 3), grans(:, 4), grans(:, 5));
dv_unisecs = date2unixsecs(datevec(1), datevec(2), datevec(3), datevec(4), datevec(5));
last_i = find(dv_unisecs >= grans_unisecs, 1, 'last');
gran = grans(last_i, :);
path = paths(last_i);
% Not implemented yet.
end
function data = read_granule(self, date, varargin)
% read_granule Read any granule
%
% High-level function to read a particular granule for a
% satellite/sensor-pair, if one doesn't know what function is suitable to
% use to read it. It also gets rid of ballast (duplicates).
%
% Uses member reader to find out what function to use to
% read this particular data. Uses self.granule_first_line
% to find out the first scanline to use (e.g. the rest are doubles).
%
% FORMAT
%
% M = read_granule(date[, spec[, extra[, remdouble[, force[, reload]]]]])
%
% IN
%
% datevec vector Date-vector indicating the starting time.
% spec various (optional) E.g. satellite (noaa18, cloudsat, etc.)
% extra cellstr (optional)
%
% Extra args to reader. Usually, those are extra fields
% that are to be read. Two of those extra fields are a
% special-case and taken care of by read_granule, and not
% passed on to the reader: 'scanline_position' and
% 'scanline_number' are assigned according to the
% position in the scanline and the scanline number,
% accordingly.
%
% remdouble logical (optional) Remove duplicates or not? Default true.
% One reason to pass 'false' might be
% when searching for data according
% to line/pos.
% force logical (optional) rather than throwing an error,
% return [] in case of error. Default false.
% reload logical (optional) Do not use caching, but
% reload in any case.
%
% OUT
%
% data struct Contains at least:
%
% - lat Latitude in degrees
% - lon Longitude in degrees [-180, 180]
% - time Time in seconds since 'epoch'
% - epoch Time in seconds since 1970-01-01
% - version String or float.
%
% Further fields should be returned depending on
% arguments passed to 'extra' and the particular dataset.
%
% EXAMPLE
%
% >> data = mhs.read_granule([2010 9 8 8 8], 'noaa18', {}, true, false);
%
% $Id$
% optional arguments
[spec, fields_asked, remdouble, force, reload] = optargs(varargin, {'', {}, true, false, false});
% checks for earlier error detection
rqre_datatype(date, @isnumeric);
rqre_datatype(spec, {@ischar, @iscellstr});
rqre_datatype(fields_asked, @iscell);
rqre_datatype(remdouble, @islogical);
rqre_datatype(force, @islogical);
rqre_datatype(reload, @islogical);
% special case: remove 'scanline_position' and
% 'scanline_number' from 'extra'. Those are in reality always
% returned, but readers do not necessarily understand them.
fields_asked(strcmp(fields_asked, 'scanline_number')) = [];
fields_asked(strcmp(fields_asked, 'scanline_position')) = [];
% these must always be present in reader return
fields_always = {'time', 'lat', 'lon', 'epoch', 'version'};
%% generate varname for caching
vn = genvarname(DataHash([self.name, date, varargin]));
%% read cached if appropiate
if ~reload
if ~isempty(self.cache) && self.cache.has_entry(vn)
logtext(atmlab('OUT'), 'Reading %s from cache (entry %s)\n', ...
self.name, vn);
data = self.cache.get_entry(vn);
return
end
if isfield(self,'pcd') && ~isempty(self.pcd) && self.pcd.has_entry(vn)
% logtext(atmlab('OUT'), 'Reading %s from cache (entry %s)\n', ...
% self.name, vn);
data = self.pcd.get_entry(vn);
return
end
end
%% find datafile and function to read it
datafile = self.find_granule_by_datetime(date, spec);
%% Split extras in fields in data and pseudo-fields
% pseudos = fieldnames(self.pseudo_fields);
fields_in_data = {};
fields_in_pseudo = {};
fields_in_deps = {}; % only in dependencies
for i = 1:length(fields_asked)
field = fields_asked{i};
if isfield(self.pseudo_fields, field)
fields_in_pseudo = [fields_in_pseudo field]; %#ok
deps = self.pseudo_fields.(field).dependencies;
for j = 1:length(deps)
dep = deps{j};
if ~any(strcmp(dep, [fields_always(:)' fields_asked(:)' fields_in_deps(:)']))
fields_in_deps = [fields_in_deps dep]; %#ok
end
end
else
fields_in_data = [fields_in_data field]; %#ok
end
end
%% read datafile
%logtext(colloc_config('stdout'), '%s(''%s'')\n', ...
% func2str(reader), datafile);
logtext(atmlab('OUT'), 'Reading %s\n', datafile);
try
data = self.reader(datafile, [fields_in_data fields_in_deps]);
catch ME
if force
switch ME.identifier
case {'atmlab:invalid_data', 'atmlab:exec_system_cmd:shell'}
logtext(atmlab('ERR'), ...
'Unable to read: %s\n', ME.message);
return
otherwise
ME.rethrow();
end
else
% deciding to create a new exception, so that readers
% further down than catch all
ME2 = MException('atmlab:SatDataset:cannotread', ...
['Failure while %s %s was reading %s. Original error had ' ...
'id ''%s'', message ''%s''. See ''cause'' for details.'], ...
class(self), self.name, datafile, ME.identifier, ME.message);
ME2 = ME2.addCause(ME);
% keyboard
ME2.throw();
end
end
%% do further processing
data = self.reader_processor(self, data, fields_in_pseudo);
%% set version from info if not already set
if ~isfield(data, 'version')
info = self.find_info_from_granule(datafile);
if isfield(info, 'version')
data.version = info.version;
end
end
%% check/convert datafile contents
data = rmfield(data, fields_in_deps);
fields_expected = [fields_always fields_in_data fields_in_pseudo];
ffound = isfield(data, fields_expected);
if ~all(ffound);
error(['atmlab:' mfilename ':missingfield'], ...
['After reading with %s and processing with %s, ' ...
'the following fields were expected but not found: %s'], ...
func2str(self.reader), func2str(self.reader_processor), ...
strjoin(fields_expected(~ffound), ', '));
end
if isinteger(data.time)
% later functions will suffocate on non-double time
data.time = double(data.time);
end
nlines = size(data.time, 1);
npos = size(data.lat, 2);
% add scanline_number and scanline_position (before removing
% duplicates!)
data.scanline_number = uint32(repmat((1:nlines).', [1 npos]));
data.scanline_position = uint16(repmat(1:npos, [nlines 1]));
if ~isfield(data, 'version') % try from filename
fileinfo = self.find_info_from_granule(datafile);
if isfield(fileinfo, 'version')
data.version = fileinfo.version;
end
end
%% remove duplicates
if remdouble
% read ballast
firstline = self.granule_first_line(date(1:5), spec);
if firstline > 0 && firstline < intmax(class(firstline))
% get rid of ballast in all fields
for f = fieldnames(data)'
fn = f{1};
if isnumeric(data.(fn)) && size(data.(fn), 1) == nlines
% magic in next lines explained at
% http://www.mathworks.de/matlabcentral/newsreader/view_thread/290890
sz = size(data.(fn));
data.(fn) = reshape(data.(fn)(firstline:nlines, :), [nlines-firstline+1, sz(2:end)]);
end
end
end
end
%% cache results
if ~isempty(self.cache)
self.cache.set_entry(vn, data);
elseif ~isempty(self.pcd)
self.pcd.set_entry(vn, data);
end
end
function first = granule_first_line(self, d, varargin)
% granule_first_line Returns first scanline not present in previous granule
%
% For a certain granule, return the number of the first scanline that
% is not in the previous scanline. This m-file uses a previously created
% database (a hashtable). This hash-table is cached between subsequent
% calls of the function.
%
% There should exist an entry for each satellite/sensor granule. If it's
% not found, an error is raised, and there is probably a bug somewhere. If
% the satellite/sensor granule exists, but there is no (unique) previous
% granule, line -2 is returned.
%
% FORMAT
%
% first = granule_first_line(datevec[, spec[, reload]])
%
% IN
%
% d various Starting date/time for granule,
% datevec or unixsecs
% spec various (optional) Might be indicating satellite(s)
% reload logical (optional) Reload scanline data (i.e. not
% cached). Defaults to false.
% force logical (optional) If nothing found, don't
% throw an error, but return empty.
%
% OUT
%
% first number First scanline not in previous granule.
% Special values: -1 (no data found), -2 (no
% previous granule found)
%
% EXAMPLE
%
% >> first = mhs.granule_first_line([2010 9 8 8 8], 'noaa18', false);
% if isequal(self.firstlines, []) % first run
% self.firstlines = containers.Map(
% S = struct;
% end
[spec, reload, force] = optargs(varargin, {'', false, false});
assert(ischar(spec), ['atmlab:' mfilename], ...
'function works only for single-satellite datasets');
if isempty(spec)
sat = self.satname;
else
sat = spec;
end
if isequal(self.firstlines, []) || ~isfield(self.firstlines, sat) || reload % load first
scanfile = datasets_config('firstline_data');
scanfile = strrep(scanfile, '$SAT', sat);
scanfile = strrep(scanfile, '$SENSOR', self.name);
logtext(atmlab('OUT'), ...
['Reading %s. ' ...
'(If this fails, run self.find_granule_first_line ' ...
'(SatDataset.find_granule_first_line)).\n'], scanfile);
try
t = load(scanfile);
catch ME
switch (ME.identifier)
case {'MATLAB:load:couldNotReadFile'}
newME = MException(['atmlab:' mfilename ':failed'], ...
['Unable to locate duplicates for %s. Failed to read file: %s.\n', ...
'You might want to run the ''find_granule_first_line'' method?'], ...
self.name, scanfile);
newME.addCause(ME);
newME.throw();
otherwise
ME.rethrow();
end
end
self.firstlines.(sat) = t.first;
end
if isscalar(d)
unisecs = uint32(d);
else
dv = num2cell(d);
unisecs = uint32(self.get_starttime(d));
end
if self.firstlines.(sat).isKey(unisecs)
first = self.firstlines.(sat)(unisecs);
elseif force
first = [];
else
error(['atmlab:' mfilename ':missing_firstline'], ['no data found for %s @ %d. ', ...
'You may want to run self.find_granule_first_line(...) ', ...
'for a sufficiently long period.'], ...
self.name, unisecs);
end
end
function find_granule_first_line(self, startdate, enddate, varargin)
% find_granule_first_line Create hashtable with scanline overlaps
%
% Creates a map (such as used by granule_first_line) that maps for
% each granule the first scanline not occuring in the previous
% scanline.
%
% The resulting hashtable is written to a file according to
% datasets_config('firstline_data'), which is also where
% granule_first_line is looking for it.
%
% FORMAT
%
% cd.find_granule_first_line(startdate, enddate, spec)
%
% IN
%
% startdate datevec start here
% enddate datevec end here
% spec various specification
%
% OUT
%
% none, but writes a file
%
% EXAMPLE
%
% >> mhs.find_granule_first_line([2008 1 1], [2008 12 31], 'noaa18');
spec = optargs(varargin, {''});
if isempty(spec)
sat = self.satname;
else
sat = spec;
end
scanfile = datasets_config('firstline_data');
scanfile = strrep(scanfile, '$SAT', sat);
scanfile = strrep(scanfile, '$SENSOR', self.name);
logtext(atmlab('OUT'), 'Locating granules\n');
allgrans = self.find_granules_for_period(startdate, enddate, spec);
ngrans = size(allgrans, 1);
logtext(atmlab('OUT'), 'Found %d granules\n', ngrans);
if ngrans == 0
return
end
if exist(scanfile, 'file')
tm = load(scanfile);
self.firstlines.(sat) = tm.first;
else
self.firstlines.(sat) = containers.Map('KeyType', 'uint32', 'ValueType', 'int32');
end
uni_first = self.get_starttime(allgrans(1, :));
if ~self.firstlines.(sat).isKey(uni_first)
logtext(atmlab('OUT'), 'Setting very first to flag: %d:%d\n', ...
uni_first, self.FL_INITIAL);
self.firstlines.(sat)(uni_first) = self.FL_INITIAL;
end
next = 0;
for i = 1:ngrans-1
uni = self.get_starttime(allgrans(i+1, :));
uni = uint32(uni);
[ddv{1:5}] = unixsecs2date(double(uni));
logtext(atmlab('OUT'), 'granule %d/%d: %d-%02d-%02d %02d:%02d\n', i, ngrans-1, ...
ddv{1}, ddv{2}, ddv{3}, ddv{4}, ddv{5});
if self.firstlines.(sat).isKey(uni) && ...
~any(self.firstlines.(sat)(uni) == ...
cellfun(@int64, {self.FL_INITIAL self.FL_NOTFOUND self.FL_ALLDUPL self.FL_NONEXT self.FL_NOCUR}))
logtext(atmlab('OUT'), 'Already exists (%d:%d)\n', uint32(uni), self.firstlines.(sat)(uni));
continue
end
try
couldreadcur = false;
if isequal(next, 0)
cur = self.read_granule(allgrans(i, :), spec, {}, false, false);
else
cur = next;
end
couldreadcur = true;
couldreadnext = false;
next = self.read_granule(allgrans(i+1, :), spec, {}, false, false);
couldreadnext = true;
catch ME
switch ME.identifier
case {'atmlab:find_datafile_by_date', 'atmlab:atovs_get_l1c:zamsu2l1c',...
'atmlab:invalid_data','atmlab:find_granule_by_datetime',...
'MATLAB:imagesci:hdfinfo:invalidFile', 'MATLAB:imagesci:validate:fileOpen', 'atmlab:exec_system_cmd:shell', 'atmlab:SatDataset:cannotread'}
logtext(atmlab('ERR'), 'Problem: %s\n', ME.message);
otherwise
ME.rethrow();
end
end
%uni = date2unixsecs(allgrans(i+1, 1), allgrans(i+1, 2), allgrans(i+1, 3), allgrans(i+1, 4), allgrans(i+1, 5));
if couldreadcur && couldreadnext
t_cur = cur.epoch + cur.time;
t_next = next.epoch + next.time;
%[t_cur, t_next] = unify_time_axis(cur.time, next.time);
if isempty(t_next) || isempty(t_cur)
firstline = 0;
else
% Replaced former by latter because modis L2 can
% have nans for time/lat/lon due to the flagging in
% the reading routine
%firstline = find(t_next > t_cur(end), 1, 'first');
firstline = find(t_next > t_cur(find(~isnan(t_cur), 1, 'last')), 1, 'first');
end
if ~isempty(firstline)
logtext(atmlab('OUT'), 'First line: %d\n', firstline);
self.firstlines.(sat)(uni) = firstline;
else
logtext(atmlab('OUT'), 'No first line, setting to flag\n');
self.firstlines.(sat)(uni) = self.FL_ALLDUPL;
end
elseif couldreadcur
logtext(atmlab('OUT'), 'Could not read next, flagging \n');
self.firstlines.(sat)(uni) = self.FL_NONEXT;
else
logtext(atmlab('OUT'), 'Could not read current. flagging\n');
self.firstlines.(sat)(uni) = self.FL_NOCUR;
end
if mod(i, 10)==0 % store
self.store_firstline(scanfile, sat);
end
end
self.store_firstline(scanfile, sat);
end
%{
function outer = find_extreme_granule(self, spec, mode, varargin)
% find the earliest or latest granule for this dataset
%
% Must always be between Unix Genesis (1970-01-01) and current
% date. Might provide incorrect results if not every day has
% collocations. Use with caution. A safe but slow alternative
% is to run .find_granules_for_period for a very long date interval.
%
% Second argument is either 'first' or 'last'.
%
% EXAMPLE
%
% gran = d.find_extreme_granule('noaa18', 'first')
%
% Uses bisection. Works poorly, misses granules if total period
% is short.
switch mode
case 'first'
a = 1;
b = 2;
extrema = {[1970 1 1], datevec(now)};
case 'last'
a = 2;
b = 1;
extrema = {[self.find_extreme_granule(spec, 'first') 0], datevec(now)};
otherwise
error(['atmlab:' mfilename ':InvalidUse', ...
'''mode'' must be ''first'' or ''last'', not %s'], ...
mode);
end
MAXSTEPS = 100;
%early = [1970 1 1];
%late = datevec(now);
mid = [0 0 0];
for i = 1:MAXSTEPS
oldmid = mid;
mid = datevec((datenum(extrema{1})+datenum(extrema{2}))/2);
% ceil to just next date
mid = mid(1:3);
if all(oldmid == mid)
outer = extrema{b};
break
end
if any(self.find_granules_by_date(mid, spec, false))
extrema{b} = mid;
else
extrema{a} = mid;
end
end
grans = self.find_granules_by_date(outer, spec, false);
if isempty(grans)
if iscell(spec)
tt = [spec{:}];
else
tt = spec;
end
error(['atmlab:' mfilename ':nogranule'], ...
'No granules found for %s %s', self.name, tt);
end
switch mode
case 'first'
outer = grans(1, :);
case 'last'
outer = grans(end, :);
end
end
%}
function start_unixsecs = get_starttime(self, datevec)
% get starttime for granule/granules
%
% This is either taken directly from the datevec, or it is
% taken from a database.
%
% based on datevec, last elem is granule-number
%
% FIXME DOC
if ~isvector(datevec) % pseudo-broadcast with recursion
start_unixsecs = zeros(size(datevec, 1), 1);
for i = 1:size(datevec, 1)
start_unixsecs(i) = self.get_starttime(datevec(i, :));
end
else
if self.needs_starttimesfile
gran_no = datevec(6);
if ~self.starttimes.isKey(gran_no)
self.set_starttime(datevec);
end
start_unixsecs = self.starttimes(gran_no);
else
start_datevec = datevec(1:5);
X = num2cell(start_datevec);
start_unixsecs = date2unixsecs(X{:});
end
start_unixsecs = round(start_unixsecs);
end
end
function set_starttime(self, datevec)
% get starttime from db, if not in file
%
% needs datevec including granule number
gran_no = datevec(6);
fullpath = self.find_granule_by_datetime(datevec);
logtext(atmlab('OUT'), 'Reading %s to get start-time\n', ...
fullpath);
% data = read_saphir_l1(fullpath, 'ScanTimestart');
% Don't remove duplicates, as I need to run this function
% before I can even begin to /detect/ duplicates!
success = false;
try
S = self.read_granule(datevec, [], {}, false);
success = true;
catch ME
switch ME.identifier
case 'atmlab:rqre_in_range:invalid'
logtext(atmlab('ERR'), ...
'Cannot determine start time, setting to 0: %s\n', ...
ME.message);
otherwise
ME.rethrow();
end
end
% X = textscan(data.data.ScanTimestart(1).Data, '%4d%2d%2d %2d%2d%2d%6d');
% [year, month, day, hour, minute, second, ~] = deal(X{:});
if success
self.starttimes(gran_no) = S.epoch;
tm = self.starttimes; %#ok
save(self.starttimes_fullpath, 'tm');
end
% since storing a small hashtable is so much quicker than
% reading a large granule-file, doing this every time is a
% small price to pay for easier programming
% might add a conditional here?
end
function [gridmean, lat, lon] = level3(self, date1, date2, spec, fields, gridsize, varargin)
% Calculate level3 (gridded mean) for period
%
%
% IN
%
% date1
% date2
% spec
% fields
% Cell array. Names of fields to be considered.
% gridsize
% (optional) limiters
% Cell array. Each element is either a function handle
% (passed directly to binned_statistics, or a two-element
% cell array with {{fields}, limiter} where gridded
% data will be limited by calling limiter on {fields}.
% Can be used e.g. to grid only day-time data with the help
% of sun_angles. In the latter case the limiter must take a
% struct with {fields}.
% (optional) struct with options to binning_fast
%
% Returns:
%
% - gridmean: 3-D matrix, [nlat nlon nfield]
%
% FIXME DOC
%
% TODO: think of a way to process
% multi-dimensional fields, i.e. tb [2457x90x5]
[limiters, data] = optargs(varargin, {{}, struct()});
%data.gridsize = gridsize;
allgrans = self.find_granules_for_period(date1, date2, spec);
allstats = {};
lat = -90:gridsize:90;
lon = -180:gridsize:180;
% sort limiters
if ~iscell(limiters)
limiters = {limiters};
end
limit_binnedstats = {};
limit_other = {};
also_fields = {};
j = 1;
k = 1;
for i = 1:length(limiters)
limiter = limiters{i};
if isfunction_handle(limiter)
limit_binnedstats{j} = limiter;
j = j + 1;
elseif iscell(limiter)
limit_other{k} = limiter;
k = k + 1;
also_fields = union(also_fields, setdiff(limiter{1}, {'lat', 'lon', 'time', 'epoch'}));
end
end
i = 0;
while i < size(allgrans, 1)
data = struct();
data.lat = zeros(1e6, 1, 'single');
data.lon = zeros(1e6, 1, 'single');
% data.data = zeros(1e6, ...
% sum(cellfun(@(f)size(grandata.(f), 3), fields)), ...
% 'single');
N = 1;
X = whos('data');
while X.bytes < 2e8 && i < size(allgrans, 1)
i = i + 1;
try
grandata = self.read_granule(allgrans(i, :), spec, union(fields, also_fields));
catch ME
switch ME.identifier
case {'atmlab:SatDataset:cannotread',...
'atmlab:find_granule_by_datetime'}
logtext(atmlab('ERR'), ...
'Cannot read granule: %s', ...
ME.identifier);
continue
otherwise
ME.rethrow();
end
end
% first time, allocate data. width depends on fields
% found = false; nonGeoDim = 1;
nonGeoDim = find(size(grandata.lat)>1,1, 'last') + 1;
% while ~found
% if numel(data.lat)==size(data.lat,nonGeoDim), found = true; end
% nonGeoDim = nonGeoDim + 1;
% end
% for all fields, sum first dimension AFTER lat/lon
% dimensions
if ~isfield(data, 'data')
data.data = zeros(1e6, ...
sum(cellfun(@(f)size(grandata.(f), nonGeoDim), fields)), ...
'single');
end
% apply limiters
mask = true(size(grandata.lat));
for k = 1:length(limit_other)
lim = limit_other{k};
mask = mask & lim{2}(getfields(grandata, lim{1}{:}));
end
grandata.lat = grandata.lat(mask);
grandata.lon = grandata.lon(mask);
for k = 1:length(fields)
field = fields{k};
% if ndims(grandata.(field)) <= nonGeoDim-1
if numel(grandata.(field)) == numel(mask)
grandata.(field) = grandata.(field)(mask);
else
% process multi-dimensional fields, i.e. tb [2457x90x5]
% if ndims(grandata.(field)) > nonGeoDim
% error(['atmlab:' mfilename ':NotImplemented'], ...
% 'I cannot handle %d dimensions for %s!', ...
% ndims(grandata.(field)), field);
% end
if ndims(grandata.(field)) == 3
grandata.(field) = arrayfun(@(i)(par(grandata.(field)(:, :, i), ...
mask)), 1:size(grandata.(field), 3), ...
'UniformOutput', false);
% elseif nonGeoDim == 2
% grandata.(field) = arrayfun(@(i)(par(grandata.(field)(:, i), ...
% mask)), 1:size(grandata.(field), nonGeoDim), ...
% 'UniformOutput', false);
else
error(['atmlab:' mfilename ':NotImplemented'], ...
'Sorry, can''t handle %d dimensions for %s!', ...
ndims(grandata.(field)), field);
end
grandata.(field) = horzcat(grandata.(field){:});
end
end
nnew = size(grandata.lat, 1);
new = N:(N+nnew-1);
if nnew==0 % no data
continue
end
if new(end) > size(data.lat, 1)
% double allocation
data.lat = [data.lat; zeros(size(data.lat, 1), 1, 'single')];
data.lon = [data.lon; zeros(size(data.lon, 1), 1, 'single')];
data.data = [data.data; ...
zeros(size(data.data, 1), size(data.data, 2), 'single')];
end
data.lat(new) = grandata.lat;
data.lon(new) = grandata.lon;
C = cellfun(@(f) single(grandata.(f)), fields, 'UniformOutput', false);
data.data(new, :) = horzcat(C{:});
N = N + nnew;
X = whos('data');
end
data.lat = data.lat(1:N-1);
data.lon = data.lon(1:N-1);
data.data = data.data(1:N-1, :);
% data.lat = grandata.lat;
% data.lon = grandata.lon;
% C = cellfun(@(f) double(grandata.(f)), fields, 'UniformOutput', false);
% data.data = horzcat(C{:});
logtext(atmlab('OUT'), 'Binning\n');
%[binned, lat, lon] = binning_fast(data);
indces = bin_nd({data.lat, data.lon}, {lat, lon});
binned = cellfun(@(ii) data.data(ii, :), indces, 'UniformOutput', false);
% SE: added replace = zeros(1,size(data.data,nonGeoDim)),
% because cell2mat doesn't like cells being empty, and doesn't
% like the cells containing diffent dimensions
stats = binned_statistics(binned, {@(x)(size(x,1)), @(x)(sum(x,1))},...
limit_binnedstats,...
{0,zeros(1,size(data.data,2))},... %replace
struct('dataClass',class(binned{1,1})));
logtext(atmlab('OUT'), 'Calculating statistics\n');
stats.statistic1 = cell2mat(stats.statistic1);
% cell2mat on multi-dim will flatten 2nd dimension,
% unflatten and put this dimension at the end again
stats.statistic2 = permute(reshape(cell2mat(stats.statistic2), [size(stats.statistic2, 1) size(stats.statistic2{1, 1}, 2), size(stats.statistic2, 2)]), [1 3 2]);
allstats = [allstats {stats}];
end
logtext(atmlab('OUT'), 'Finalising\n');
% calculate total count
allcount = cellfun(@(x) x.statistic1, allstats, 'UniformOutput', false);
allcount = cat(3, allcount{:});
totalcount = sum(allcount, 3);
% calculate total sum
%allsums = cellfun(@(x) bsxfun(@times, x.statistic1, x.statistic2), allstats, 'UniformOutput', false);
allsums = cellfun(@(x) x.statistic2, allstats, 'UniformOutput', false);
allsums = cat(4, allsums{:});
totalsum = sum(allsums, 4);
% calculate overall mean
gridmean = bsxfun(@ldivide, totalcount, totalsum);
end
%% getters and setters for properties
function path = get.starttimes_fullpath(self)
path = fullfile(self.basedir, self.starttimesfile);
end
function times = get.starttimes(self)
if ~self.needs_starttimesfile
times = [];
else
if isequaln(self.cachedstarttimes, []) % not initialised
try
self.cachedstarttimes = loadvar(self.starttimes_fullpath, 'tm');
catch ME
switch ME.identifier
case {'MATLAB:load:couldNotReadFile', 'MATLAB:load:notBinaryFile'}
logtext(atmlab('OUT'), ...
['Unable to read starttimes-file at %s ' ...
'(%s). Initialising new.\n'], ...
self.starttimes_fullpath, ME.message);
self.cachedstarttimes = containers.Map('KeyType', 'uint32', 'ValueType', 'int64');
end
end
end
times = self.cachedstarttimes;
end
end
function set.starttimes(self, val)
if ~isa(val, class(containers.Map))
error(['atmlab:' mfilename ':invalid'], ...
'starttimes should be a containers.Map, found %s', ...
class(val));
end
logtext(atmlab('OUT'), 'Writing %s\n', self.starttimes_fullpath);
save(self.starttimes_fullpath, 'val');
end
function set.name(self, value)
% setter for 'name' property.
if ~isempty(self.name)
% deregister old name
datasets('delete', self);
end
self.name = value;
datasets(self);
end
%% overloading stuff
function sobj = saveobj(self)
% sobj = saveobj@handle(self);
warning(['atmlab:' mfilename ':nostoring'], ...
['You tried to store %s %s, but serialisation is not implemented. ' ...
'Attempts to load will not yield a valid object. Sorry.'], ...
class(self), self.name);
sobj = [];
end
end
methods (Access = protected)
function pos2re(~)
error(['atmlab:' mfilename ':cannotconvert'], ...
['The regular expression self.re was not defined, so ' ...
'I tried to convert it automagically from location '...
'information. However, I don''t know how to do this. ' ...
'I give up. Please help me: either implement ' ...
'a pos2re() method, or define a re member.']);
end
function nm = calculate_name(self)
% calculate_name Return automated name if name is not given
%
% This function is called by the constructor if no name is
% given. You may wish to override it in a subclass.
existing_names = fieldnames(datasets);
while true
nm = sprintf([class(self) '_%d_%d'], uint64(date2unixsecs()),round(rand(1, 1)*10000));
if ~any(strcmp(nm, existing_names))
break
end
end
end
function add_collocated_dataset(ds, cd)
% Adds a collocated dataset. INTERNAL USE ONLY!
%
% This is called by the CollocatedDataset constructor.
ds.collocated_datasets = [ds.collocated_datasets cd];
end
function s_out = repvars(self, s, datevec, spec)
% repvars Replace 'magic' variables
%
% Replace the 'magic' variables:
%
% $YEAR4
% $YEAR2
% $MONTH
% $DAY
% $DOY
% $SAT or $SAT1 and $SAT2
% $HOUR
% $MINUTE
%
% For more replacements, overload this function.
%
% FORMAT
%
% s_out = self.repvars(s, datevec, satname)
%
% IN
%
% s string where magic is replaced
% datevec vector date-vector [year month day]
% spec string/ name of satellite/satellites
% cellstr
%
% OUT
%
% s_out string with magic replaced
year = num2str(datevec(1), '%04d');
month = num2str(datevec(2), '%02d');
day = num2str(datevec(3), '%02d');
if length(datevec)>3
hour = num2str(datevec(4), '%02d');
minute = num2str(datevec(5), '%02d');
else
hour = '0';
minute = '0';
end
year02 = year(3:4);
doy = num2str(round(dayofyear(datevec(1), datevec(2), datevec(3))), '%03d');
to_replace = {'$YEAR4', year, '$MONTH', month, '$DAY', day, ...
'$YEAR2', year02, '$DOY', doy, '$HOUR', hour, '$MINUTE', minute};
if iscellstr(spec)
to_replace = [to_replace {'$SAT1', spec{1}, '$SAT2', spec{2}}];
else
if strfind(s, '$SAT1')
warning('atmlab:strrep_variables', ...
['replacing $SAT, but having $SAT1; are you sure you don''t ' ...
'want to pass TWO satellites?']);
end
to_replace = [to_replace {'$SAT', spec}];
end
s_out = strrep_multi(s, to_replace{:});
end
function matches = infofit(self, is, datevec, spec)
yearstr = num2str(datevec(1), '%04d'); % for comparing easily with 2digit years
if (isfield(is, 'year02') && ~isempty(is.year02) && ~strcmp(is.year02, yearstr(3:4))) || ...
(isfield(is, 'year04') && ~isempty(is.year04) && str2double(is.year04)~=datevec(1)) || ...
(isfield(is, 'year') && ~isempty(is.year) && str2double(is.year)~=datevec(1)) || ...
(isfield(is, 'month') && ~isempty(is.month) && str2double(is.month)~=datevec(2)) || ...
(isfield(is, 'day') && ~isempty(is.day) && str2double(is.day)~=datevec(3)) || ...
(isfield(is, 'doy') && ~isempty(is.doy) && str2double(is.doy)~=dayofyear(datevec(1), datevec(2), datevec(3)) || ...
(isfield(is, 'satname') && ~isempty(is.satname) && ~isequal(is.satname, spec)));
matches = false;
else
matches = true;
end
end
end
methods (Access = private)
function store_firstline(self, scanfile, sat)
logtext(atmlab('OUT'), ...
'Writing %s\n', scanfile);
first = self.firstlines.(sat); %#ok
save(scanfile, 'first');
end
end
end