%------------------------------------------------------------------------------ % NAME: qpcls % % Performs an inversion by constrained least squares. % % However, the only method handled so far is OEM. % % The function can also be used to calculate the contribution % function matrix and the weighting function matrix (the second % format below). The number of input argument shall then be 1. % % The fields of X and G are preliminary and can be changed. A % short description of X and G: % % X is array of structures. There is one structure for each % retrieval identity (species profile etc.). The structure fields % depend on the retrieval quantity. For species, the retrieved % profile is both given in VMR (field x) and in fraction to the % a priori profile (field xfrac). % % G gives some information about the sucess and progress of the % inversion. It is a structure with the fields: % converged 1 if convergence was reached, else 0. % failure Description why iteration was stoped (only if % converged=0). % cost Final total cost (chi sqaure). % cost_x Final value of cost corresponding to x % cost_y Final value of cost corresponding to y % All cost values are normalised to the length of y. % niter Number of iterations performed. % start_ga Start value of the ML regularisation parameter. % stop_ga Stop value of the ML regularisation parameter. % % S is a structure giving error estimates, the measurement % response etc. The errors are estimated by the used Se and Sx. % That is, only error sources flagged as level 2 are included. % For a more detailed error estimation, use qpcls_invchar. % The structure has the fields: % si_tot Standard deviation for total error. % si_obs Standard deviation for observation error. % si_smo Standard deviation for smoothing error. % mresp Measurement response. % A Averaging kernel matrix % S_obs Covariance matrix for the observation error. % S_smo Covariance matrix for the smoothing error. % % FORMAT: [X,G,y_corrected,y_fitted,bl,Xiter,S] = qpcls( Q, y ) % or % [Dy,Kx,kx_names,kx_index,kx_aux] = qpcls( Q ) % % OUT: X See above. % G See above. % y_corrected The measurement spectrum (Y) corrected according % to retrieved calibration and baseline parameters. % y_fitted Fitted (to Y_CORRECTED) spectrum. % bl Fitted baseline. % Xiter The state vector for each teration. No unit % conversions have been performed and e.g. species % profiles are in fraction of a priori profile. % S Error estimates and measurement response. See above. % IN: Q Qpack structure. % y Measurement spectrum. %------------------------------------------------------------------------------ % HISTORY: 2001.01.04 Started by by Patrick Eriksson. function [X,G,y_corrected,y_fitted,bl,Xiter,S] = qpcls( Q, y ) % Remember to adjust the checks of nargin and nargout below if argument % lists are changed. if ~qpcls_methods(Q.RETRIEVAL_METHOD) error( sprintf('You have selected an unknown retrieval method (%s)', ... Q.RETRIEVAL_METHOD)); end out(1,1); %=== CALCULATION OPTIONS % %= Set 0 as first guess for the calculation flags. nonlin = 0; return_Dy = 0; do_post = 0; logspecies = 0; do_xiter = 0; do_errorchar = 0; recalc_abs = 0; % %= Set the number of iterations between WFs recalculation to 1. recalc_wfs_niter = 1; % if (nargin<2) | isempty(y) % return_Dy = 1; % out(2,'ONLY CALCULATING Dy'); % else % nonlin = Q.CLS_NONLIN_ON; % if nargout > 1 do_post = 1; end % if nonlin % if Q.CLS_SPECIES_POS_ON logspecies = 1; end % if Q.CLS_RECALC_ABS_ON | Q.TEMPERATURE_DO == 3 recalc_abs = 1; end % if Q.CLS_RECALC_WFS_NITER > 1 recalc_wfs_niter = Q.CLS_RECALC_WFS_NITER; end % end % out(1,'RETRIEVAL BY CONSTRAINED LEAST SQUARES'); end % if nargout > 5 do_xiter = 1; end % if nargout > 6 do_errorchar = 1; end %=== SOME PRINTING % out(2,0); % if nonlin if Q.CLS_GA_START_VALUE == 0 out(2,sprintf('Method : %s, Gauss-Newton', ... upper(Q.RETRIEVAL_METHOD) )); else out(2,sprintf('Method : %s, Marquardt-Levenberg', ... upper(Q.RETRIEVAL_METHOD))); end out(2,sprintf('Start gamma : %.1f', Q.CLS_GA_START_VALUE )); out(2,sprintf('Factor OK : %.1f', Q.CLS_GA_FAC_WHEN_OK )); out(2,sprintf('Factor not OK : %.1f', Q.CLS_GA_FAC_WHEN_NOT_OK )); out(2,sprintf('Stop value : %.3f', Q.CLS_STOP )); out(2,sprintf('Max iterations : %.0f', Q.CLS_MAX_ITER )); % if recalc_abs out(2,' Absorption will be calculated for each iteration.'); else out(2,' Absorption will be scaled by species abundance.'); end % if recalc_wfs_niter > 1 out(2,sprintf(... ' There will %d iterations between recalculations of WFs.',... recalc_wfs_niter)); end % if logspecies out(2,' Positive constraint for species profiles is turned on.'); else out(2,' No constraint for species profiles.'); end else out(2,sprintf('Method : Linear %s', upper(Q.RETRIEVAL_METHOD) )); end % if ~return_Dy if do_post out(2,' Spectrum, cost etc. will be calculated after retrieval.'); else out(2,' No post-retrieval calculations will be performed.'); end end %=== LOAD H MATRICES % load( [Q.OUT,'.h'], '-mat' ); load( [Q.OUT,'.hd'], '-mat' ); % if ~return_Dy & size(H,1)>1 & length(y) ~= size(H,1) error('Wrong length of input measurement vector.'); end %=== CREATE TEMPORARY DIRECTORY AND RUN ARTS FOR FIRST ITERATION % tmpdir = temporary_directory( Q.TMP_AREA ); % do_nonlin = nonlin | do_post; % [ fy, Kx, kx_names, kx_index, kx_aux, cfile, basename ] = ... qp_iter1( Q, H, Hd, tmpdir, 3, do_nonlin, recalc_abs ); % wfs_done = 1; % Flag to indicate that WFs are (re)calculated % if isempty( fy ) [X,G,y_corrected,y_fitted,bl,Xiter,S] = arts_failure; return else yfilename = [ basename, '.y.ab' ]; end % if ~return_Dy & length(y) ~= length(fy) error('Wrong length of input measurement vector.'); end % %= Some sizes xa = kx_aux(:,2); nx = length( xa ); ny = size( Kx, 1 ); %=== LOAD COVARIANCE MATRICES % load( [Q.OUT,'.dxinv'], '-mat' ); load( [Q.OUT,'.seinv'], '-mat' ); % if strcmp( lower( Q.RETRIEVAL_METHOD ), 'oem' ) load( [Q.OUT,'.sxinv'], '-mat' ); RM = Sxinv; clear Sxinv end % if size(Kx,2) ~= size(RM,1) error(['Sizes of Kx and regularization matrix (Sx) do not match', ... ' (recalculation needed?).'] ); end %=== RETURN HERE IF ONLY Dy AND Kx SHALL BE CALCULATED % if return_Dy % KtSeinv = Kx'*Seinv; X = inv( RM + KtSeinv*Kx ) * KtSeinv; G = Kx; y_corrected = kx_names; y_fitted = kx_index; bl = kx_aux; % out(1,-1); if out(1) fprintf('\n'); % NOTE NOTE NOTE end % return %>>>>> Exit >>>>> end % %=== SOME SPECIAL STUFF FOR NON-LINEAR % if nonlin gamma = Q.CLS_GA_START_VALUE; %= Build cfile for calculation of Kx template = which( 'cls_iter2_kx.tmplt' ); % QE = []; QE.RECALC_ABS = recalc_abs; % qtool( Q, tmpdir, template, QE ); cfile_kx = [ cfile, '-FOR_KX' ]; copyfile( cfile, cfile_kx ); %= Index for species profiles % ispecies = []; for i = 1 : size( kx_names, 1 ) group = qp_kinfo( kx_names{i} ); if strcmp( group, 'Species' ) ispecies = [ ispecies; (kx_index(i,1):kx_index(i,2))' ]; end end %= Take log of positive constraint for species % if ~isempty(ispecies) & logspecies xa(ispecies) = log( xa(ispecies) ); end %=== SOME SPECIAL STUFF FOR NON-LINEAR % else gamma = 0; end %=== SOME SPECIAL STUFF FOR NON-LINEAR AND/OR POST CALCULATIONS % if nonlin | do_post %= Build cfile for calculation of FY template = which( 'cls_iter2_y.tmplt' ); % QE = []; QE.RECALC_ABS = recalc_abs; % qtool( Q, tmpdir, template, QE ); cfile_y = [ cfile, '-FOR_Y' ]; copyfile( cfile, cfile_y ); %= Start cost % cost_x = 0; a = y - fy; cost_y = (a' * Seinv * a) / ny; cost = cost_x + cost_y; %= Printing out(2,0); out(1,sprintf(... ' Iter Total Profile Spectrum Conver- WFs')); out(1,sprintf(... ' nr Gamma cost cost cost gence calc.')); out(1,0); out(1,sprintf('%4.0f - %.2e 0.0 %.2e - -',... 0, cost_y, cost_y )); %= Load needed variables % %= Absorption if ~recalc_abs Abs = read_artsvar( basename, 'abs'); Absp = read_artsvar( basename, 'abs_per_tg'); else Abs = []; Absp = []; end % %= Zenith angles if Q.POINTING_DO == 3 za_pencil = read_artsvar( basename, 'za_pencil'); else za_pencil = []; end end %=== READ SOME ARTS VARIABLES THAT ALWAYS ARE NEEDED % p_abs = read_artsvar( basename, 'p_abs' ); f_mono = read_artsvar( basename, 'f_mono' ); vmrs = read_artsvar( basename, 'vmrs' ); %=== INIT ITERATION VARIABLES % iteration = 0; if nonlin max_iter = Q.CLS_MAX_ITER; else max_iter = 1; end % x = xa; y_corrected = y; % if do_xiter Xiter = []; end % G = []; G.converged = 0; if nonlin G.failure = 'max iterations reached'; % default assumption G.start_ga = gamma; else G.failure = '- (linear inversion)'; G.start_ga = NaN; G.stop_ga = NaN; end %############################################################################## %### Start of retrieval loop %############################################################################## % while ~G.converged & ( iteration < max_iter ) iteration = iteration + 1; G.niter = iteration; %=== Calculate new Kx ? % if iteration > 1 & ~rem( iteration-1, recalc_wfs_niter ) % copyfile( cfile_kx, cfile ); qp_arts( Q, cfile ); % Kx = qp_assemble_K( Q, basename, H, Hd, y, bl, 3, x ); % wfs_done = 1; % % The WFs shall corresponds to fractions of the a priori state, but the % WFs are calculated for present retrieved state and a rescaling is % required if logspecies=0. % The Kx to apply for the the log-retrieval of species is Kx*x (see % Eq. 45 of Paper A in my thesis), but this is achieved automatically as % Kx is calculated in fractions of x (and not xa). % if ~isempty(ispecies) & ~logspecies Kx(:,ispecies) = Kx(:,ispecies) ./ (ones(ny,1)*x(ispecies)'); end end %=== Make inversion % % Gaussian elimination (\) is used as this is faster than calculating the % inverse explicitely (by inv()). % KtSeinv = Kx'*Seinv; AAAA = RM + KtSeinv*Kx; % ok_iter = 0; % while ~ok_iter %= If linear, set ok flag for iteration to true if ~nonlin ok_iter = 1; end %= A vector needed in different places if iteration == 1 wdy = KtSeinv * ( y_corrected - fy ); else wdy = KtSeinv * ( y_corrected - fy ) - RM * ( x - xa ); end %= New x if gamma == 0 x_new = x + AAAA \ wdy; else x_new = x + (AAAA + gamma*Dxinv) \ wdy; end %= Calculate spectrum corresponding to X_NEW and calculate cost function if nonlin | do_post %= Map X onto absorption etc. qp_x2arts( Q, x_new, basename, Kx, kx_names, kx_index, kx_aux, ... recalc_abs, logspecies, p_abs, f_mono, vmrs, Abs, Absp, za_pencil ); %= New fy (delete first the old file to detect ARTS crasches) delete( yfilename ); copyfile( cfile_y, cfile ); qp_arts( Q, cfile ); if exist( yfilename, 'file' ) fy_new = read_artsvar( basename, 'y' ); fy_new = H*fy_new; else [X,G,y_corrected,y_fitted,bl,Xiter,S] = arts_failure; return end %= Correct measurement spectrum according to retrieved calibration and %= baseline variables [y_corrected,bl] = qp_x2y( Q, x_new, Kx, kx_names, kx_index, y ); %= New cost a = x_new - xa; cost_x = (a' * RM * a) / ny; % Yes, it shall be ny here a = y_corrected - fy_new; cost_y = (a' * Seinv * a) / ny; cost_new = cost_x + cost_y; end %= If non-linear, check if cost has decreased if nonlin if cost_new < cost ok_iter = 1; else out(1,sprintf('FAILED %.1e %.2e', gamma, cost_new )); if gamma > Q.CLS_GA_UPPER_LIMIT ok_iter = 1; iteration = 9999; G.failure = 'runaway gamma'; elseif gamma > 0 gamma = gamma * Q.CLS_GA_FAC_WHEN_NOT_OK; else gamma = Q.CLS_GA_FAC_WHEN_NOT_OK; end end end end % while if nonlin | do_post %= Calculate convergence number % % (Equation 5.31 in the Rodgers book was used here but it didn't work % well for some harder inversion cases) % conv = ( ( x_new - x )' * AAAA * ( x_new - x ) ) / nx; % if wfs_done wfs_symbol = 'y'; wfs_done = 0; else wfs_symbol = 'n'; end % out(1,sprintf('%4.0f %.1e %.2e %.2e %.2e %.2e %s',... iteration, gamma, cost_new, cost_x, cost_y, conv, wfs_symbol )); %= Fill G if ready with non-linear inversion if nonlin & ( conv < Q.CLS_STOP ) G.converged = 1; G.failure = []; end %= Update spectrum and cost fy = fy_new; cost = cost_new; %= Update spectrum and cost, if not ready if ~G.converged & nonlin gamma = gamma / Q.CLS_GA_FAC_WHEN_OK; end end %= Move X_NEW to X x = x_new; if do_xiter Xiter = [Xiter, x]; end end %############################################################################## %### End of retrieval loop %############################################################################## %=== Clear some variables to save some memory % clear RM Seinv H Hd % if ~do_errorchar clear AAAA KtSeinv end %=== IF LOG-SPECIES, CONVERT BACK % if logspecies & ~isempty(ispecies) x(ispecies) = exp( x(ispecies) ); end %=== FILL X STRUCTURE % X = qp_x2X( x, Q, tmpdir, basename, kx_names, kx_index, kx_aux, p_abs, vmrs ); %=== FIX OTHER RETURN VARIABLES % if do_post % y_fitted = fy; % G.cost = cost; G.cost_x = cost_x; G.cost_y = cost_y; G.stop_ga = gamma; end %=== DELETE THE TEMPORARY DIRECTORY % delete_tmp_dir( Q.TMP_AREA, tmpdir ); %=== Make error characterisation % if do_errorchar % Contribution function matrix Dy = inv(AAAA) * KtSeinv; clear AAAA KtSeinv % Observation error load( [Q.OUT,'.se'], '-mat' ); S.S_obs = Dy * Se * Dy'; clear Se % Averaging kernel matrix A = Dy*Kx; S.A = A; % Measurement response S.mresp = zeros(nx,1); for i = 1 : size( kx_index, 1 ) ind = kx_index(i,1) : kx_index(i,2); S.mresp(ind) = sum( A(ind,ind)' )'; end % Smoothing error load( [Q.OUT,'.sx'], '-mat' ); A = A - eye(nx,nx); S.S_smo = A * Sx * A'; clear Sx A Dy % Standard deviations S.si_obs = sqrt( diag( S.S_obs )); S.si_smo = sqrt( diag( S.S_smo )); S.si_tot = sqrt( S.si_obs.^2 + S.si_smo.^2 ); % Store kx_index in S S.kx_index = kx_index; end %=== END MESSAGES out(1,-1); if out(1) fprintf('\n'); end %--- Sub-functions ----------------------------------------------------------- function [X,G,y_corrected,y_fitted,bl,Xiter,S] = arts_failure % G.failure = 'ARTS crash'; G.cost = NaN; G.cost_x = NaN; G.cost_y = NaN; % X = []; y_corrected = []; y_fitted = []; bl = []; Xiter = []; S = []; % return